From 396dbec24ed86fcb941abc20275e82c3e5029237 Mon Sep 17 00:00:00 2001 From: brian Date: Mon, 24 Apr 2023 18:09:11 +0200 Subject: [PATCH] Using @SuperBuilder for LayerConfigurations --- .../TupleStreamDataSetIteratorTest.java | 2 +- .../ModelTupleStreamIntegrationTest.java | 2 +- .../solr/handler/ModelTupleStreamTest.java | 4 +- .../solr/ltr/model/ScoringModelTest.java | 4 +- .../remote/JsonModelServerTest.java | 20 +- .../spark/parameterserver/BaseSparkTest.java | 4 +- .../train/GradientSharingTrainingTest.java | 10 +- .../deeplearning4j/spark/BaseSparkTest.java | 4 +- .../spark/TestEarlyStoppingSpark.java | 10 +- .../TestEarlyStoppingSparkCompGraph.java | 10 +- .../org/deeplearning4j/spark/TestKryo.java | 22 +- .../spark/datavec/TestPreProcessedData.java | 12 +- .../spark/impl/TestKryoWarning.java | 4 +- .../impl/customlayer/TestCustomLayer.java | 4 +- .../impl/graph/TestSparkComputationGraph.java | 20 +- .../spark/impl/misc/TestFrozenLayers.java | 16 +- .../impl/multilayer/TestMiscFunctions.java | 20 +- .../multilayer/TestSparkDl4jMultiLayer.java | 6 +- ...arameterAveragingSparkVsSingleMachine.java | 18 +- ...TestSparkMultiLayerParameterAveraging.java | 62 +- .../stats/TestTrainingStatsCollection.java | 4 +- .../spark/ui/TestListeners.java | 4 +- .../ActorCriticFactoryCompGraphStdConv.java | 16 +- .../ActorCriticFactoryCompGraphStdDense.java | 14 +- .../ActorCriticFactorySeparateStdDense.java | 20 +- .../rl4j/network/dqn/DQNFactoryStdConv.java | 8 +- .../rl4j/network/dqn/DQNFactoryStdDense.java | 6 +- .../org/deeplearning4j/rl4j/NStepRnn.java | 14 +- .../deeplearning4j/rl4j/RobotLakeExample.java | 10 +- .../org/deeplearning4j/rl4j/TMazeExample.java | 12 +- .../rl4j/policy/PolicyTest.java | 4 +- .../src/test/java/net/brutex/gan/App.java | 43 +- .../net/brutex/gan/MnistDCGANExample.java | 28 +- .../java/net/brutex/gan/MnistSimpleGAN.java | 34 +- .../test/java/net/brutex/spark/BrianTest.java | 6 +- .../java/net/brutex/spark/BrianTest2.java | 6 +- .../java/net/brutex/spark/TestServer.java | 14 +- .../java/net/brutex/spark/TestServer2.java | 14 +- .../integration/IntegrationTestRunner.java | 2 +- .../testcases/dl4j/CNN1DTestCases.java | 10 +- .../testcases/dl4j/CNN2DTestCases.java | 32 +- .../testcases/dl4j/CNN3DTestCases.java | 6 +- .../testcases/dl4j/MLPTestCases.java | 8 +- .../testcases/dl4j/RNNTestCases.java | 18 +- .../testcases/dl4j/UnsupervisedTestCases.java | 2 +- cavis-common-platform/build.gradle | 2 +- .../columns/NumericalColumnAnalysis.java | 2 +- .../samediff/internal/InferenceSession.java | 2 +- .../nd4j/linalg/activations/Activation.java | 44 +- .../java/org/deeplearning4j/RandomTests.java | 4 +- .../java/org/deeplearning4j/TestUtils.java | 6 +- .../RecordReaderDataSetiteratorTest.java | 4 +- .../RecordReaderMultiDataSetIteratorTest.java | 2 +- .../iterator/DataSetIteratorTest.java | 17 +- .../datasets/iterator/TestFileIterators.java | 7 +- .../earlystopping/TestEarlyStopping.java | 44 +- .../TestEarlyStoppingCompGraph.java | 26 +- .../org/deeplearning4j/eval/EvalTest.java | 48 +- .../eval/EvaluationToolsTests.java | 8 +- .../java/org/deeplearning4j/eval/ROCTest.java | 4 +- .../eval/RegressionEvalTest.java | 4 +- .../exceptions/TestInvalidConfigurations.java | 72 +- .../exceptions/TestInvalidInput.java | 44 +- .../gradientcheck/AttentionLayerTest.java | 63 +- .../gradientcheck/BNGradientCheckTest.java | 70 +- .../gradientcheck/CNN1DGradientCheckTest.java | 1074 +++--- .../gradientcheck/CNN3DGradientCheckTest.java | 52 +- .../gradientcheck/CNNGradientCheckTest.java | 2869 ++++++++++------- .../CapsnetGradientCheckTest.java | 10 +- .../gradientcheck/DropoutGradientCheck.java | 25 +- .../GlobalPoolingGradientCheckTests.java | 26 +- .../gradientcheck/GradientCheckTests.java | 46 +- .../GradientCheckTestsComputationGraph.java | 190 +- .../GradientCheckTestsMasking.java | 36 +- .../gradientcheck/LRNGradientCheckTests.java | 6 +- .../gradientcheck/LSTMGradientCheckTests.java | 40 +- .../LossFunctionGradientCheck.java | 14 +- .../NoBiasGradientCheckTests.java | 22 +- .../OutputLayerGradientChecks.java | 12 +- .../gradientcheck/RnnGradientChecks.java | 34 +- .../UtilLayerGradientChecks.java | 58 +- .../gradientcheck/VaeGradientCheckTests.java | 10 +- .../gradientcheck/YoloGradientCheckTests.java | 19 +- .../ComputationGraphConfigurationTest.java | 86 +- .../org/deeplearning4j/nn/conf/JsonTest.java | 4 +- .../MultiLayerNeuralNetConfigurationTest.java | 78 +- .../MultiNeuralNetConfLayerBuilderTest.java | 4 +- .../nn/conf/NeuralNetConfigurationTest.java | 20 +- .../nn/conf/constraints/TestConstraints.java | 36 +- .../nn/conf/dropout/TestDropout.java | 42 +- .../nn/conf/graph/ElementWiseVertexTest.java | 56 +- .../nn/conf/graph/ShiftVertexTest.java | 10 +- .../nn/conf/layers/LayerBuilderTest.java | 30 +- .../nn/conf/layers/LayerConfigTest.java | 66 +- .../layers/LayerConfigValidationTest.java | 40 +- .../conf/preprocessor/CNNProcessorTest.java | 10 +- .../preprocessor/CustomPreprocessorTest.java | 4 +- .../conf/preprocessor/TestPreProcessors.java | 40 +- .../nn/conf/weightnoise/TestWeightNoise.java | 26 +- .../deeplearning4j/nn/dtypes/DTypeTests.java | 268 +- .../nn/graph/ComputationGraphTestRNN.java | 58 +- .../nn/graph/TestCompGraphCNN.java | 20 +- .../nn/graph/TestCompGraphUnsupervised.java | 8 +- .../nn/graph/TestComputationGraphNetwork.java | 336 +- .../nn/graph/TestSetGetParameters.java | 14 +- .../nn/graph/TestVariableLengthTSCG.java | 24 +- .../nn/graph/graphnodes/TestGraphNodes.java | 18 +- .../nn/layers/ActivationLayerTest.java | 576 ++-- .../nn/layers/AutoEncoderTest.java | 16 +- .../nn/layers/BaseLayerConfigurationTest.java | 6 +- .../nn/layers/CacheModeTest.java | 22 +- .../nn/layers/CenterLossOutputLayerTest.java | 226 +- .../nn/layers/DropoutLayerTest.java | 38 +- .../nn/layers/FrozenLayerTest.java | 86 +- .../layers/FrozenLayerWithBackpropTest.java | 124 +- .../nn/layers/OutputLayerTest.java | 56 +- .../nn/layers/RepeatVectorTest.java | 2 +- .../deeplearning4j/nn/layers/SeedTest.java | 2 +- .../deeplearning4j/nn/layers/TestDropout.java | 2 +- .../nn/layers/capsule/CapsNetMNISTTest.java | 12 +- .../nn/layers/capsule/CapsuleLayerTest.java | 10 +- .../capsule/CapsuleStrengthLayerTest.java | 4 +- .../layers/capsule/PrimaryCapsulesTest.java | 22 +- .../convolution/ConvDataFormatTests.java | 92 +- .../layers/convolution/Convolution3DTest.java | 2 +- .../ConvolutionLayerSetupTest.java | 127 +- .../convolution/ConvolutionLayerTest.java | 1676 +++++----- .../LocallyConnectedLayerTest.java | 25 +- .../layers/convolution/SpaceToDepthTest.java | 2 +- .../convolution/SubsamplingLayerTest.java | 10 +- .../convolution/TestConvolutionModes.java | 54 +- .../layers/convolution/Upsampling1DTest.java | 2 +- .../layers/convolution/Upsampling2DTest.java | 2 +- .../layers/custom/TestCustomActivation.java | 47 +- .../nn/layers/custom/TestCustomLayers.java | 28 +- .../custom/testclasses/CustomOutputLayer.java | 19 +- .../layers/feedforward/dense/DenseTest.java | 8 +- .../embedding/EmbeddingLayerTest.java | 122 +- .../normalization/BatchNormalizationTest.java | 82 +- .../normalization/LocalResponseTest.java | 17 +- .../objdetect/TestYolo2OutputLayer.java | 28 +- .../nn/layers/ocnn/OCNNOutputLayerTest.java | 8 +- .../pooling/GlobalPoolingMaskingTests.java | 42 +- .../layers/recurrent/BidirectionalTest.java | 1382 ++++---- .../GravesBidirectionalLSTMTest.java | 26 +- .../nn/layers/recurrent/GravesLSTMTest.java | 18 +- .../layers/recurrent/MaskZeroLayerTest.java | 8 +- .../layers/recurrent/RnnDataFormatTests.java | 26 +- .../recurrent/TestLastTimeStepLayer.java | 8 +- .../recurrent/TestRecurrentWeightInit.java | 19 +- .../nn/layers/recurrent/TestRnnLayers.java | 36 +- .../nn/layers/recurrent/TestSimpleRnn.java | 4 +- .../layers/recurrent/TestTimeDistributed.java | 35 +- .../samediff/SameDiffCustomLayerTests.java | 4 +- .../nn/layers/samediff/TestSameDiffConv.java | 16 +- .../nn/layers/samediff/TestSameDiffDense.java | 44 +- .../samediff/TestSameDiffDenseVertex.java | 8 +- .../layers/samediff/TestSameDiffLambda.java | 20 +- .../layers/samediff/TestSameDiffOutput.java | 12 +- .../samediff/testlayers/SameDiffConv.java | 143 +- .../samediff/testlayers/SameDiffDense.java | 180 +- .../testlayers/SameDiffMSEOutputLayer.java | 98 +- .../nn/layers/variational/TestVAE.java | 32 +- .../nn/misc/CloseNetworkTests.java | 10 +- .../deeplearning4j/nn/misc/LargeNetTest.java | 8 +- .../deeplearning4j/nn/misc/TestLrChanges.java | 66 +- .../nn/misc/TestMemoryReports.java | 24 +- .../nn/misc/TestNetConversion.java | 14 +- .../nn/misc/WorkspaceTests.java | 110 +- .../nn/mkldnn/ValidateMKLDNN.java | 24 +- .../nn/multilayer/BackPropMLPTest.java | 4 +- .../nn/multilayer/MultiLayerTest.java | 230 +- .../nn/multilayer/MultiLayerTestRNN.java | 96 +- .../nn/multilayer/TestMasking.java | 24 +- .../nn/multilayer/TestSetGetParameters.java | 24 +- .../nn/multilayer/TestVariableLengthTS.java | 42 +- .../rl/TestMultiModelGradientApplication.java | 12 +- .../nn/transferlearning/TestFrozenLayers.java | 28 +- .../TestTransferLearningModelSerializer.java | 16 +- .../TransferLearningCompGraphTest.java | 134 +- .../TransferLearningComplex.java | 40 +- .../TransferLearningHelperTest.java | 73 +- .../TransferLearningMLNTest.java | 178 +- .../nn/updater/TestGradientNormalization.java | 10 +- .../nn/updater/TestUpdaters.java | 112 +- .../nn/updater/custom/TestCustomUpdater.java | 6 +- .../nn/weights/WeightInitIdentityTest.java | 12 +- .../solver/BackTrackLineSearchTest.java | 6 +- .../optimize/solver/TestOptimizers.java | 12 +- .../listener/TestCheckpointListener.java | 2 +- .../listener/TestFailureListener.java | 6 +- .../optimizer/listener/TestListeners.java | 12 +- .../parallelism/RandomTests.java | 18 +- .../listener/TestSystemInfoPrintListener.java | 2 +- .../regressiontest/MiscRegressionTests.java | 2 +- .../regressiontest/RegressionTest050.java | 10 +- .../regressiontest/RegressionTest060.java | 14 +- .../regressiontest/RegressionTest071.java | 14 +- .../regressiontest/RegressionTest080.java | 14 +- .../regressiontest/RegressionTest100b4.java | 4 +- .../regressiontest/RegressionTest100b6.java | 4 +- .../customlayer100a/CustomLayer.java | 231 +- .../CompareTrainingImplementations.java | 8 +- .../util/CrashReportingUtilTest.java | 6 +- .../deeplearning4j/util/ModelGuesserTest.java | 6 +- .../util/ModelSerializerTest.java | 38 +- .../util/ModelValidatorTests.java | 6 +- .../nn/modelimport/keras/KerasLayer.java | 19 +- .../nn/modelimport/keras/KerasModel.java | 44 +- .../keras/KerasSequentialModel.java | 14 +- .../modelimport/keras/layers/KerasInput.java | 2 +- .../modelimport/keras/layers/KerasLoss.java | 8 +- .../layers/advanced/activations/KerasELU.java | 2 +- .../advanced/activations/KerasLeakyReLU.java | 2 +- .../advanced/activations/KerasPReLU.java | 4 +- .../advanced/activations/KerasReLU.java | 2 +- .../advanced/activations/KerasSoftmax.java | 2 +- .../activations/KerasThresholdedReLU.java | 2 +- .../KerasAtrousConvolution1D.java | 3 +- .../KerasAtrousConvolution2D.java | 5 +- .../convolutional/KerasConvolution.java | 5 + .../convolutional/KerasConvolution1D.java | 9 +- .../convolutional/KerasConvolution2D.java | 5 +- .../convolutional/KerasConvolution3D.java | 2 +- .../layers/convolutional/KerasCropping1D.java | 4 +- .../layers/convolutional/KerasCropping2D.java | 4 +- .../layers/convolutional/KerasCropping3D.java | 4 +- .../convolutional/KerasDeconvolution2D.java | 4 +- .../KerasDepthwiseConvolution2D.java | 2 +- .../KerasSeparableConvolution2D.java | 2 +- .../convolutional/KerasSpaceToDepth.java | 8 +- .../convolutional/KerasUpsampling1D.java | 4 +- .../convolutional/KerasUpsampling2D.java | 4 +- .../convolutional/KerasUpsampling3D.java | 4 +- .../convolutional/KerasZeroPadding1D.java | 6 +- .../convolutional/KerasZeroPadding2D.java | 4 +- .../convolutional/KerasZeroPadding3D.java | 4 +- .../keras/layers/core/KerasActivation.java | 2 +- .../keras/layers/core/KerasDense.java | 2 +- .../keras/layers/core/KerasDropout.java | 2 +- .../keras/layers/core/KerasLambda.java | 4 +- .../keras/layers/core/KerasMasking.java | 8 +- .../keras/layers/core/KerasRepeatVector.java | 4 +- .../layers/core/KerasSpatialDropout.java | 2 +- .../keras/layers/custom/KerasLRN.java | 2 +- .../layers/embeddings/Keras2DEmbedding.java | 4 +- .../layers/embeddings/KerasEmbedding.java | 4 +- .../layers/local/KerasLocallyConnected1D.java | 8 +- .../layers/local/KerasLocallyConnected2D.java | 9 +- .../keras/layers/noise/KerasAlphaDropout.java | 2 +- .../layers/noise/KerasGaussianDropout.java | 2 +- .../layers/noise/KerasGaussianNoise.java | 2 +- .../KerasBatchNormalization.java | 8 +- .../layers/pooling/KerasGlobalPooling.java | 6 +- .../keras/layers/pooling/KerasPooling1D.java | 4 +- .../keras/layers/pooling/KerasPooling2D.java | 4 +- .../keras/layers/pooling/KerasPooling3D.java | 4 +- .../keras/layers/recurrent/KerasLSTM.java | 12 +- .../layers/recurrent/KerasSimpleRnn.java | 12 +- .../layers/wrappers/KerasBidirectional.java | 14 +- .../keras/utils/KerasModelUtils.java | 2 +- .../configurations/DeepCTRLambdaTest.java | 2 +- .../configurations/FullModelComparisons.java | 4 +- .../configurations/KerasModelImportTest.java | 6 +- .../keras/e2e/KerasModelEndToEndTest.java | 14 +- .../keras/e2e/KerasYolo9000PredictTest.java | 4 +- .../activation/KerasLeakyReLUTest.java | 2 +- .../advanced/activation/KerasPReLUTest.java | 2 +- .../activation/KerasThresholdedReLUTest.java | 2 +- .../KerasAtrousConvolution1DTest.java | 4 +- .../KerasAtrousConvolution2DTest.java | 4 +- .../convolution/KerasConvolution1DTest.java | 4 +- .../convolution/KerasConvolution2DTest.java | 4 +- .../convolution/KerasConvolution3DTest.java | 4 +- .../convolution/KerasCropping1DTest.java | 2 +- .../convolution/KerasCropping2DTest.java | 4 +- .../convolution/KerasCropping3DTest.java | 4 +- .../convolution/KerasDeconvolution2DTest.java | 4 +- .../KerasDepthwiseConvolution2DTest.java | 4 +- .../KerasSeparableConvolution2DTest.java | 4 +- .../convolution/KerasUpsampling1DTest.java | 2 +- .../convolution/KerasUpsampling2DTest.java | 2 +- .../convolution/KerasUpsampling3DTest.java | 2 +- .../convolution/KerasZeroPadding1DTest.java | 2 +- .../convolution/KerasZeroPadding2DTest.java | 4 +- .../convolution/KerasZeroPadding3DTest.java | 4 +- .../layers/core/KerasActivationLayer.java | 2 +- .../keras/layers/core/KerasDenseTest.java | 4 +- .../keras/layers/core/KerasDropoutTest.java | 4 +- .../layers/core/KerasRepeatVectorTest.java | 4 +- .../core/KerasSpatialDropout2DTest.java | 4 +- .../layers/embeddings/KerasEmbeddingTest.java | 2 +- .../local/KerasLocallyConnected1DTest.java | 8 +- .../local/KerasLocallyConnected2DTest.java | 179 +- .../layers/noise/KerasAlphaDropoutTest.java | 4 +- .../noise/KerasGaussianDropoutTest.java | 4 +- .../layers/noise/KerasGaussianNoiseTest.java | 4 +- .../KerasBatchNormalizationTest.java | 2 +- .../layers/pooling/KerasPooling1DTest.java | 2 +- .../layers/pooling/KerasPooling2DTest.java | 2 +- .../layers/pooling/KerasPooling3DTest.java | 2 +- .../keras/layers/recurrent/KerasLSTMTest.java | 4 +- .../layers/recurrent/KerasSimpleRnnTest.java | 4 +- .../wrappers/KerasBidirectionalTest.java | 2 +- .../models/word2vec/Word2VecTestsSmall.java | 6 +- cavis-dnn/cavis-dnn-nn/build.gradle | 3 + .../ai/dnn/api/ILayerConfiguration.java | 1 + .../java/net/brutex/ai/dnn/api/IModel.java | 53 +- .../dnn/networks/ArtificialNeuralNetwork.java | 11 +- .../gradientcheck/GradientCheckUtil.java | 1551 +++++---- .../api/ITraininableLayerConfiguration.java | 2 +- .../conf/ComputationGraphConfiguration.java | 11 +- .../NeuralNetBaseBuilderConfiguration.java | 120 +- .../nn/conf/NeuralNetConfiguration.java | 491 +-- .../nn/conf/graph/AttentionVertex.java | 4 +- .../nn/conf/layers/AbstractLSTM.java | 118 +- .../nn/conf/layers/ActivationLayer.java | 223 +- .../nn/conf/layers/AutoEncoder.java | 85 +- .../conf/layers/BaseLayerConfiguration.java | 836 +++-- .../nn/conf/layers/BaseOutputLayer.java | 97 +- .../nn/conf/layers/BasePretrainNetwork.java | 49 +- .../nn/conf/layers/BaseRecurrentLayer.java | 150 +- .../nn/conf/layers/BaseUpsamplingLayer.java | 55 +- .../nn/conf/layers/BatchNormalization.java | 433 +-- .../nn/conf/layers/CapsuleLayer.java | 235 +- .../nn/conf/layers/CapsuleStrengthLayer.java | 17 +- .../nn/conf/layers/CenterLossOutputLayer.java | 79 +- .../nn/conf/layers/Cnn3DLossLayer.java | 199 +- .../nn/conf/layers/CnnLossLayer.java | 83 +- .../nn/conf/layers/Convolution1D.java | 3 +- .../nn/conf/layers/Convolution1DLayer.java | 393 +-- .../nn/conf/layers/Convolution2D.java | 4 +- .../nn/conf/layers/Convolution3D.java | 462 ++- .../nn/conf/layers/ConvolutionLayer.java | 958 +++--- .../nn/conf/layers/Deconvolution2D.java | 167 +- .../nn/conf/layers/Deconvolution3D.java | 292 +- .../nn/conf/layers/DenseLayer.java | 200 +- .../conf/layers/DepthwiseConvolution2D.java | 299 +- .../nn/conf/layers/DropoutLayer.java | 224 +- .../nn/conf/layers/EmbeddingLayer.java | 144 +- .../conf/layers/EmbeddingSequenceLayer.java | 204 +- .../nn/conf/layers/FeedForwardLayer.java | 123 +- .../nn/conf/layers/GlobalPoolingLayer.java | 196 +- .../conf/layers/GravesBidirectionalLSTM.java | 135 +- .../nn/conf/layers/GravesLSTM.java | 136 +- .../deeplearning4j/nn/conf/layers/LSTM.java | 41 +- .../nn/conf/layers/LayerConfiguration.java | 657 ++-- .../nn/conf/layers/LayerValidation.java | 4 +- .../layers/LearnedSelfAttentionLayer.java | 321 +- .../layers/LocalResponseNormalization.java | 374 +-- .../nn/conf/layers/LocallyConnected1D.java | 517 ++- .../nn/conf/layers/LocallyConnected2D.java | 648 ++-- .../nn/conf/layers/LossLayer.java | 142 +- .../nn/conf/layers/NoParamLayer.java | 66 +- .../nn/conf/layers/OutputLayer.java | 122 +- .../nn/conf/layers/PReLULayer.java | 242 +- .../nn/conf/layers/Pooling1D.java | 5 +- .../nn/conf/layers/Pooling2D.java | 5 +- .../nn/conf/layers/PrimaryCapsules.java | 704 ++-- .../conf/layers/RecurrentAttentionLayer.java | 187 +- .../nn/conf/layers/RnnLossLayer.java | 216 +- .../nn/conf/layers/RnnOutputLayer.java | 90 +- .../nn/conf/layers/SelfAttentionLayer.java | 265 +- .../conf/layers/SeparableConvolution2D.java | 409 +-- .../nn/conf/layers/SpaceToBatchLayer.java | 327 +- .../nn/conf/layers/SpaceToDepthLayer.java | 104 +- .../nn/conf/layers/Subsampling1DLayer.java | 367 +-- .../nn/conf/layers/Subsampling3DLayer.java | 681 ++-- .../nn/conf/layers/SubsamplingLayer.java | 887 +++-- .../nn/conf/layers/Upsampling1D.java | 104 +- .../nn/conf/layers/Upsampling2D.java | 99 +- .../nn/conf/layers/Upsampling3D.java | 122 +- .../nn/conf/layers/ZeroPadding1DLayer.java | 239 +- .../nn/conf/layers/ZeroPadding3DLayer.java | 138 +- .../nn/conf/layers/ZeroPaddingLayer.java | 153 +- .../conf/layers/convolutional/Cropping1D.java | 200 +- .../conf/layers/convolutional/Cropping2D.java | 255 +- .../conf/layers/convolutional/Cropping3D.java | 95 +- .../misc/ElementWiseMultiplicationLayer.java | 22 +- .../nn/conf/layers/misc/FrozenLayer.java | 200 +- .../layers/misc/FrozenLayerWithBackprop.java | 33 +- .../nn/conf/layers/misc/RepeatVector.java | 80 +- .../layers/objdetect/Yolo2OutputLayer.java | 324 +- .../conf/layers/recurrent/Bidirectional.java | 98 +- .../conf/layers/recurrent/LastTimeStep.java | 22 +- .../nn/conf/layers/recurrent/SimpleRnn.java | 50 +- .../layers/recurrent/TimeDistributed.java | 111 +- .../samediff/AbstractSameDiffLayer.java | 635 ++-- .../layers/samediff/SameDiffLambdaLayer.java | 5 +- .../conf/layers/samediff/SameDiffLayer.java | 57 +- .../layers/samediff/SameDiffOutputLayer.java | 8 +- .../conf/layers/samediff/SameDiffVertex.java | 2 +- .../nn/conf/layers/util/MaskLayer.java | 3 +- .../nn/conf/layers/util/MaskZeroLayer.java | 51 +- .../variational/VariationalAutoencoder.java | 529 ++- .../BaseWrapperLayerConfiguration.java | 77 +- .../nn/conf/misc/DummyConfig.java | 2 +- .../nn/conf/ocnn/OCNNOutputLayer.java | 306 +- .../conf/serde/BaseNetConfigDeserializer.java | 10 +- ...utationGraphConfigurationDeserializer.java | 13 +- .../nn/conf/serde/JsonMappers.java | 1 + .../NeuralNetConfigurationDeserializer.java | 10 +- .../nn/gradient/DefaultGradient.java | 7 +- .../deeplearning4j/nn/gradient/Gradient.java | 3 +- .../nn/graph/ComputationGraph.java | 18 +- .../nn/graph/vertex/impl/LayerVertex.java | 2 +- .../nn/layers/AbstractLayer.java | 23 +- .../deeplearning4j/nn/layers/BaseLayer.java | 6 +- .../nn/layers/BaseOutputLayer.java | 10 +- .../nn/layers/BasePretrainNetwork.java | 5 +- .../nn/layers/DropoutLayer.java | 4 +- .../deeplearning4j/nn/layers/FrozenLayer.java | 4 +- .../nn/layers/FrozenLayerWithBackprop.java | 2 +- .../deeplearning4j/nn/layers/LossLayer.java | 6 +- .../nn/layers/RepeatVector.java | 2 +- .../nn/layers/convolution/Cnn3DLossLayer.java | 6 +- .../nn/layers/convolution/CnnLossLayer.java | 18 +- .../convolution/Convolution3DLayer.java | 4 +- .../layers/convolution/ConvolutionLayer.java | 26 +- .../convolution/Deconvolution2DLayer.java | 10 +- .../convolution/Deconvolution3DLayer.java | 2 +- .../DepthwiseConvolution2DLayer.java | 12 +- .../SeparableConvolution2DLayer.java | 16 +- .../nn/layers/convolution/SpaceToBatch.java | 10 +- .../subsampling/Subsampling3DLayer.java | 2 +- .../subsampling/SubsamplingLayer.java | 53 +- .../convolution/upsampling/Upsampling2D.java | 4 +- .../layers/feedforward/dense/DenseLayer.java | 4 +- .../feedforward/embedding/EmbeddingLayer.java | 2 +- .../embedding/EmbeddingSequenceLayer.java | 6 +- .../normalization/BatchNormalization.java | 12 +- .../nn/layers/ocnn/OCNNOutputLayer.java | 14 +- .../nn/layers/ocnn/OCNNParamInitializer.java | 6 +- .../nn/layers/pooling/GlobalPoolingLayer.java | 1 + .../layers/recurrent/BaseRecurrentLayer.java | 2 +- .../recurrent/GravesBidirectionalLSTM.java | 10 +- .../nn/layers/recurrent/GravesLSTM.java | 4 +- .../nn/layers/recurrent/LSTM.java | 439 +-- .../nn/layers/recurrent/LSTMHelpers.java | 2 +- .../nn/layers/recurrent/RnnLossLayer.java | 18 +- .../nn/layers/recurrent/RnnOutputLayer.java | 16 +- .../nn/layers/recurrent/SimpleRnn.java | 2 +- .../training/CenterLossOutputLayer.java | 6 +- .../nn/layers/util/IdentityLayer.java | 14 +- .../variational/VariationalAutoencoder.java | 4 +- .../nn/multilayer/MultiLayerNetwork.java | 64 +- .../params/BidirectionalParamInitializer.java | 28 +- .../nn/params/DefaultParamInitializer.java | 10 +- .../nn/params/GravesLSTMParamInitializer.java | 4 +- .../nn/params/LSTMParamInitializer.java | 4 +- .../nn/params/SameDiffParamInitializer.java | 4 +- .../nn/params/SimpleRnnParamInitializer.java | 6 +- .../FineTuneConfiguration.java | 14 +- .../nn/transferlearning/TransferLearning.java | 20 +- .../nn/updater/BaseMultiLayerUpdater.java | 7 +- .../nn/updater/LayerUpdater.java | 2 +- .../graph/ComputationGraphUpdater.java | 2 +- .../nn/weights/IWeightInit.java | 2 + .../deeplearning4j/nn/weights/WeightInit.java | 162 +- .../nn/weights/WeightInitConstant.java | 9 + .../nn/weights/WeightInitDistribution.java | 9 + .../nn/weights/WeightInitIdentity.java | 9 + .../nn/weights/WeightInitLecunUniform.java | 9 + .../nn/weights/WeightInitNormal.java | 9 + .../nn/weights/WeightInitRelu.java | 9 + .../nn/weights/WeightInitReluUniform.java | 9 + .../nn/weights/WeightInitSigmoidUniform.java | 9 + .../nn/weights/WeightInitUniform.java | 9 + .../WeightInitVarScalingNormalFanAvg.java | 9 + .../WeightInitVarScalingNormalFanIn.java | 9 + .../WeightInitVarScalingNormalFanOut.java | 9 + .../WeightInitVarScalingUniformFanAvg.java | 9 + .../WeightInitVarScalingUniformFanIn.java | 9 + .../WeightInitVarScalingUniformFanOut.java | 9 + .../nn/weights/WeightInitXavier.java | 9 + .../nn/weights/WeightInitXavierLegacy.java | 9 + .../nn/weights/WeightInitXavierUniform.java | 9 + .../embeddings/WeightInitEmbedding.java | 10 + .../optimize/solvers/BaseOptimizer.java | 2 +- .../util/Convolution1DUtils.java | 12 +- .../deeplearning4j/util/ConvolutionUtils.java | 17 +- .../util/CrashReportingUtil.java | 6 +- .../org/deeplearning4j/util/NetworkUtils.java | 2 +- .../deeplearning4j/util/OutputLayerUtil.java | 8 +- .../deeplearning4j/util/TimeSeriesUtils.java | 2 +- .../main/resources/simplelogger.properties | 5 +- .../java/net/brutex/ai/dnn/api/dnnTest.java | 28 +- .../ParameterServerParallelWrapperTest.java | 12 +- .../InplaceParallelInferenceTest.java | 18 +- .../parallelism/ParallelInferenceTest.java | 40 +- .../parallelism/ParallelWrapperTest.java | 12 +- .../parallelism/TestListeners.java | 8 +- .../TestParallelEarlyStopping.java | 6 +- .../TestParallelEarlyStoppingUI.java | 4 +- .../factory/DefaultTrainerContextTest.java | 12 +- .../factory/SymmetricTrainerContextTest.java | 12 +- .../main/ParallelWrapperMainTest.java | 12 +- .../deeplearning4j/spark/BaseSparkTest.java | 4 +- .../spark/TestEarlyStoppingSpark.java | 10 +- .../TestEarlyStoppingSparkCompGraph.java | 10 +- .../org/deeplearning4j/spark/TestKryo.java | 24 +- .../spark/datavec/TestPreProcessedData.java | 12 +- .../spark/impl/TestKryoWarning.java | 4 +- .../impl/customlayer/TestCustomLayer.java | 4 +- .../impl/graph/TestSparkComputationGraph.java | 20 +- .../spark/impl/misc/TestFrozenLayers.java | 19 +- .../impl/multilayer/TestMiscFunctions.java | 24 +- .../multilayer/TestSparkDl4jMultiLayer.java | 6 +- ...arameterAveragingSparkVsSingleMachine.java | 18 +- ...TestSparkMultiLayerParameterAveraging.java | 62 +- .../stats/TestTrainingStatsCollection.java | 4 +- .../spark/ui/TestListeners.java | 4 +- .../spark/parameterserver/BaseSparkTest.java | 4 +- .../train/GradientSharingTrainingTest.java | 10 +- cavis-native/cavis-native-lib/build.gradle | 8 +- .../ConvolutionalIterationListener.java | 2 +- .../org/deeplearning4j/ui/ManualTests.java | 38 +- .../ui/weights/TestConvolutionalListener.java | 12 +- .../ui/model/stats/BaseStatsListener.java | 2 +- .../ui/stats/TestStatsListener.java | 2 +- .../ui/stats/TestTransferStatsCollection.java | 4 +- .../ui/module/train/TrainModuleUtils.java | 13 +- .../deeplearning4j/ui/TestRemoteReceiver.java | 4 +- .../org/deeplearning4j/ui/TestVertxUI.java | 18 +- .../deeplearning4j/ui/TestVertxUIManual.java | 10 +- .../ui/TestVertxUIMultiSession.java | 8 +- .../org/deeplearning4j/zoo/model/AlexNet.java | 26 +- .../deeplearning4j/zoo/model/Darknet19.java | 8 +- .../zoo/model/FaceNetNN4Small2.java | 104 +- .../zoo/model/InceptionResNetV1.java | 80 +- .../org/deeplearning4j/zoo/model/LeNet.java | 12 +- .../org/deeplearning4j/zoo/model/NASNet.java | 12 +- .../deeplearning4j/zoo/model/ResNet50.java | 54 +- .../deeplearning4j/zoo/model/SimpleCNN.java | 66 +- .../deeplearning4j/zoo/model/SqueezeNet.java | 34 +- .../zoo/model/TextGenerationLSTM.java | 6 +- .../deeplearning4j/zoo/model/TinyYOLO.java | 6 +- .../org/deeplearning4j/zoo/model/UNet.java | 70 +- .../org/deeplearning4j/zoo/model/VGG16.java | 52 +- .../org/deeplearning4j/zoo/model/VGG19.java | 56 +- .../deeplearning4j/zoo/model/Xception.java | 116 +- .../org/deeplearning4j/zoo/model/YOLO2.java | 8 +- .../zoo/model/helper/DarknetHelper.java | 8 +- .../zoo/model/helper/FaceNetHelper.java | 34 +- .../model/helper/InceptionResNetHelper.java | 90 +- .../zoo/model/helper/NASNetHelper.java | 60 +- .../org/deeplearning4j/zoo/MiscTests.java | 2 +- .../deeplearning4j/zoo/TestInstantiation.java | 532 +-- vsconfig.gradle | 4 +- 549 files changed, 18468 insertions(+), 20124 deletions(-) diff --git a/.old/deeplearning4j/deeplearning4j-dataimport-solrj/src/test/java/org/deeplearning4j/nn/dataimport/solr/client/solrj/io/stream/TupleStreamDataSetIteratorTest.java b/.old/deeplearning4j/deeplearning4j-dataimport-solrj/src/test/java/org/deeplearning4j/nn/dataimport/solr/client/solrj/io/stream/TupleStreamDataSetIteratorTest.java index 67ad09bd1..cad9ae278 100644 --- a/.old/deeplearning4j/deeplearning4j-dataimport-solrj/src/test/java/org/deeplearning4j/nn/dataimport/solr/client/solrj/io/stream/TupleStreamDataSetIteratorTest.java +++ b/.old/deeplearning4j/deeplearning4j-dataimport-solrj/src/test/java/org/deeplearning4j/nn/dataimport/solr/client/solrj/io/stream/TupleStreamDataSetIteratorTest.java @@ -207,7 +207,7 @@ public class TupleStreamDataSetIteratorTest extends SolrCloudTestCase { final MultiLayerNetwork model = new MultiLayerNetwork( NeuralNetConfiguration.builder() .list( - new OutputLayer.Builder(LossFunction.MSE) + OutputLayer.builder(LossFunction.MSE) .nIn(3) .nOut(1) .weightInit(WeightInit.ONES) diff --git a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamIntegrationTest.java b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamIntegrationTest.java index c2c260fdd..c4e22ea56 100644 --- a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamIntegrationTest.java +++ b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamIntegrationTest.java @@ -155,7 +155,7 @@ public class ModelTupleStreamIntegrationTest extends SolrCloudTestCase { final NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list( - new OutputLayer.Builder() + OutputLayer.builder() .nIn(numInputs) .nOut(numOutputs) .activation(Activation.IDENTITY) diff --git a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamTest.java b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamTest.java index c6a05607b..a8cf3f3be 100644 --- a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamTest.java +++ b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/handler/ModelTupleStreamTest.java @@ -244,7 +244,7 @@ public class ModelTupleStreamTest { final NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list( - new OutputLayer.Builder() + OutputLayer.builder() .nIn(numInputs) .nOut(numOutputs) .activation(Activation.IDENTITY) @@ -278,7 +278,7 @@ public class ModelTupleStreamTest { .graphBuilder() .addInputs("inputLayer") .addLayer("outputLayer", - new OutputLayer.Builder() + OutputLayer.builder() .nIn(numInputs) .nOut(numOutputs) .activation(Activation.IDENTITY) diff --git a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/ltr/model/ScoringModelTest.java b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/ltr/model/ScoringModelTest.java index 1986511bb..ed4aa7e09 100644 --- a/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/ltr/model/ScoringModelTest.java +++ b/.old/deeplearning4j/deeplearning4j-modelexport-solr/src/test/java/org/deeplearning4j/nn/modelexport/solr/ltr/model/ScoringModelTest.java @@ -194,7 +194,7 @@ public class ScoringModelTest { final NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list( - new OutputLayer.Builder().nIn(numFeatures).nOut(1).lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).build() + OutputLayer.builder().nIn(numFeatures).nOut(1).lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).build() ) .build(); @@ -221,7 +221,7 @@ public class ScoringModelTest { .graphBuilder() .addInputs("inputLayer") .addLayer("outputLayer", - new OutputLayer.Builder().nIn(numFeatures).nOut(1).lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).build(), + OutputLayer.builder().nIn(numFeatures).nOut(1).lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).build(), "inputLayer") .setOutputs("outputLayer") .build(); diff --git a/.old/deeplearning4j/deeplearning4j-remote/deeplearning4j-json-server/src/test/java/org/deeplearning4j/remote/JsonModelServerTest.java b/.old/deeplearning4j/deeplearning4j-remote/deeplearning4j-json-server/src/test/java/org/deeplearning4j/remote/JsonModelServerTest.java index dd75472c6..087246af1 100644 --- a/.old/deeplearning4j/deeplearning4j-remote/deeplearning4j-json-server/src/test/java/org/deeplearning4j/remote/JsonModelServerTest.java +++ b/.old/deeplearning4j/deeplearning4j-remote/deeplearning4j-json-server/src/test/java/org/deeplearning4j/remote/JsonModelServerTest.java @@ -75,8 +75,8 @@ public class JsonModelServerTest extends BaseDL4JTest { .updater(new Adam(0.119f)) .weightInit(WeightInit.XAVIER) .list() - .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(10).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS).activation(Activation.SIGMOID).nIn(10).nOut(1).build()) + .layer(0, DenseLayer.builder().activation(Activation.TANH).nIn(4).nOut(10).build()) + .layer(1, OutputLayer.builder(LossFunctions.LossFunction.SQUARED_LOSS).activation(Activation.SIGMOID).nIn(10).nOut(1).build()) .build(); model = new MultiLayerNetwork(conf); @@ -543,8 +543,8 @@ public class JsonModelServerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new DenseLayer.Builder().nIn(784).nOut(10).build()) - .layer(new LossLayer.Builder().activation(Activation.SOFTMAX).build()) + .layer(DenseLayer.builder().nIn(784).nOut(10).build()) + .layer(LossLayer.builder().lossFunction().activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -600,10 +600,10 @@ public class JsonModelServerTest extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("input1", "input2") - .addLayer("L1", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input1") - .addLayer("L2", new DenseLayer.Builder().nIn(3).nOut(4).build(), "input2") + .addLayer("L1", DenseLayer.builder().nIn(3).nOut(4).build(), "input1") + .addLayer("L2", DenseLayer.builder().nIn(3).nOut(4).build(), "input2") .addVertex("merge", new MergeVertex(), "L1", "L2") - .addLayer("out", new OutputLayer.Builder().nIn(4+4).nOut(3).build(), "merge") + .addLayer("out", OutputLayer.builder().nIn(4+4).nOut(3).build(), "merge") .setOutputs("out") .build(); @@ -656,11 +656,11 @@ public class JsonModelServerTest extends BaseDL4JTest { .updater(new Sgd(0.01)) .graphBuilder() .addInputs("input") - .addLayer("L1", new DenseLayer.Builder().nIn(8).nOut(4).build(), "input") - .addLayer("out1", new OutputLayer.Builder() + .addLayer("L1", DenseLayer.builder().nIn(8).nOut(4).build(), "input") + .addLayer("out1", OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nIn(4).nOut(3).build(), "L1") - .addLayer("out2", new OutputLayer.Builder() + .addLayer("out2", OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE) .nIn(4).nOut(2).build(), "L1") .setOutputs("out1","out2") diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/BaseSparkTest.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/BaseSparkTest.java index 12c6c97eb..9d547abdc 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/BaseSparkTest.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/BaseSparkTest.java @@ -129,9 +129,9 @@ public abstract class BaseSparkTest extends BaseDL4JTest implements Serializable protected NeuralNetConfiguration getBasicConf() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .updater(new Nesterovs(0.1, 0.9)).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(nOut) .activation(Activation.SOFTMAX).build()) .build(); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java index 870510915..664e44d26 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java @@ -137,7 +137,7 @@ public class GradientSharingTrainingTest extends BaseSparkTest { .updater(new AMSGrad(0.1)) .graphBuilder() .addInputs("in") - .layer("out", new OutputLayer.Builder().nIn(784).nOut(10).activation(Activation.SOFTMAX) + .layer("out", OutputLayer.builder().nIn(784).nOut(10).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("out") .build(); @@ -272,15 +272,15 @@ public class GradientSharingTrainingTest extends BaseSparkTest { .weightInit(WeightInit.XAVIER) .seed(12345) .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); } else { conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .seed(12345) .list() - .layer(new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()) - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(DenseLayer.builder().nIn(4).nOut(4).activation(Activation.TANH).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); } MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -358,7 +358,7 @@ public class GradientSharingTrainingTest extends BaseSparkTest { .updater(new AMSGrad(0.001)) .graphBuilder() .addInputs("in") - .layer("out", new OutputLayer.Builder().nIn(784).nOut(10).activation(Activation.SOFTMAX) + .layer("out", OutputLayer.builder().nIn(784).nOut(10).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("out") .build(); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java index 686560ffc..4fdfba87a 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java @@ -132,9 +132,9 @@ public abstract class BaseSparkTest extends BaseDL4JTest implements Serializable protected NeuralNetConfiguration getBasicConf() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .updater(new Nesterovs(0.1, 0.9)).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(nOut) .activation(Activation.SOFTMAX).build()) .build(); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java index 7154808f6..e81e53360 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java @@ -71,7 +71,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -127,7 +127,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(10.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY) + .layer(0, OutputLayer.builder().nIn(4).nOut(3).activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -166,7 +166,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -212,7 +212,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -249,7 +249,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java index 76fa0e65b..d975dcdd7 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java @@ -74,7 +74,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); @@ -128,7 +128,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(2.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3).activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); @@ -169,7 +169,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); @@ -217,7 +217,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); @@ -256,7 +256,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java index 47f1807d0..80146dc24 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/TestKryo.java @@ -69,7 +69,7 @@ public class TestKryo extends BaseSparkKryoTest { m.put(0, 0.5); m.put(10, 0.1); NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() - .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).list().layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) + .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).list().layer(0, OutputLayer.builder().nIn(10).nOut(10).build()) .build(); testSerialization(mlc, si); @@ -79,23 +79,23 @@ public class TestKryo extends BaseSparkKryoTest { .dist(new UniformDistribution(-1, 1)) .updater(new Adam(new MapSchedule(ScheduleType.ITERATION,m))) .graphBuilder() - .addInputs("in").addLayer("out", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in") + .addInputs("in").addLayer("out", OutputLayer.builder().nIn(10).nOut(10).build(), "in") .setOutputs("out").build(); testSerialization(cgc, si); //Check main layers: - Layer[] layers = new Layer[] {new OutputLayer.Builder().nIn(10).nOut(10).build(), - new RnnOutputLayer.Builder().nIn(10).nOut(10).build(), new LossLayer.Builder().build(), - new CenterLossOutputLayer.Builder().nIn(10).nOut(10).build(), - new DenseLayer.Builder().nIn(10).nOut(10).build(), - new ConvolutionLayer.Builder().nIn(10).nOut(10).build(), new SubsamplingLayer.Builder().build(), + Layer[] layers = new Layer[] {OutputLayer.builder().nIn(10).nOut(10).build(), + RnnOutputLayer.builder().nIn(10).nOut(10).build(), LossLayer.builder().lossFunction().build(), + CenterLossOutputLayer.builder().nIn(10).nOut(10).build(), + DenseLayer.builder().nIn(10).nOut(10).build(), + ConvolutionLayer.builder().nIn(10).nOut(10).build(), SubsamplingLayer.builder().build(), new Convolution1DLayer.Builder(2, 2).nIn(10).nOut(10).build(), - new ActivationLayer.Builder().activation(Activation.TANH).build(), - new GlobalPoolingLayer.Builder().build(), new GravesLSTM.Builder().nIn(10).nOut(10).build(), - new LSTM.Builder().nIn(10).nOut(10).build(), new DropoutLayer.Builder(0.5).build(), - new BatchNormalization.Builder().build(), new LocalResponseNormalization.Builder().build()}; + ActivationLayer.builder().activation(Activation.TANH).build(), + GlobalPoolingLayer.builder().build(), GravesLSTM.builder().nIn(10).nOut(10).build(), + LSTM.builder().nIn(10).nOut(10).build(), DropoutLayer.builder(0.5).build(), + BatchNormalization.builder().build(), LocalResponseNormalization.builder().build()}; for (Layer l : layers) { testSerialization(l, si); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java index 946f8816f..3f9150504 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java @@ -86,9 +86,9 @@ public class TestPreProcessedData extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(4).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX) .build()) .build(); @@ -137,9 +137,9 @@ public class TestPreProcessedData extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(4).nOut(3) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX) .build(), "0") @@ -191,9 +191,9 @@ public class TestPreProcessedData extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(4).nOut(3) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX) .build(), "0") diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java index 6aa102fb4..5c7841d23 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java @@ -41,7 +41,7 @@ public class TestKryoWarning { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) + .layer(0, OutputLayer.builder().nIn(10).nOut(10).build()) .build(); TrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1).build(); @@ -58,7 +58,7 @@ public class TestKryoWarning { try { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("0") + .addLayer("0", OutputLayer.builder().nIn(10).nOut(10).build(), "in").setOutputs("0") .build(); TrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1).build(); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java index 1b7bf1052..e2ac55f05 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java @@ -53,9 +53,9 @@ public class TestCustomLayer extends BaseSparkTest { //Custom layers are tested more extensively in dl4j core NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) .layer(1, new CustomLayer(3.14159)).layer(2, - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(10).nOut(10).build()) .build(); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java index 7a28146fb..22e6d5a5c 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java @@ -79,8 +79,8 @@ public class TestSparkComputationGraph extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") - .addLayer("l0", new DenseLayer.Builder().nIn(4).nOut(10).build(), "in") - .addLayer("l1", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("l0", DenseLayer.builder().nIn(4).nOut(10).build(), "in") + .addLayer("l1", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(10).nOut(2).build(), "l0") .setOutputs("l1").build(); @@ -107,8 +107,8 @@ public class TestSparkComputationGraph extends BaseSparkTest { ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .updater(new Sgd(0.1)) .graphBuilder().addInputs("in") - .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) + .addLayer("dense", DenseLayer.builder().nIn(4).nOut(2).build(), "in").addLayer("out", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) .build(), "dense") .setOutputs("out").build(); @@ -141,9 +141,9 @@ public class TestSparkComputationGraph extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.1) .seed(123).updater(new Nesterovs(0.1, 0.9)).graphBuilder() .addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(nOut) .activation(Activation.SOFTMAX).build(), "0") @@ -220,9 +220,9 @@ public class TestSparkComputationGraph extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(Updater.RMSPROP) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(4).nOut(4) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(4).nOut(3).activation(Activation.SOFTMAX) .build(), "0") @@ -421,8 +421,8 @@ public class TestSparkComputationGraph extends BaseSparkTest { .graphBuilder() .addInputs("input1", "input2") .addVertex("avg",new ElementWiseVertex(ElementWiseVertex.Op.Average),"input1","input2") - .addLayer("dense",new DenseLayer.Builder().dropOut(0.9).nIn(featSize).nOut(featSize / 2).build(),"avg") - .addLayer("output",new OutputLayer.Builder().nIn(featSize / 2).nOut(2).lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).hasBias(false).build(),"dense") + .addLayer("dense",DenseLayer.builder().dropOut(0.9).nIn(featSize).nOut(featSize / 2).build(),"avg") + .addLayer("output",OutputLayer.builder().nIn(featSize / 2).nOut(2).lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).hasBias(false).build(),"dense") .setOutputs("output") .build(); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java index 87493404e..60f4e8c4c 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java @@ -62,10 +62,10 @@ public class TestFrozenLayers extends BaseSparkTest { int nOut = 3; MultiLayerNetwork origModel = new MultiLayerNetwork(overallConf.clone().list() - .layer(0, new DenseLayer.Builder().nIn(6).nOut(5).build()) - .layer(1, new DenseLayer.Builder().nIn(5).nOut(4).build()) - .layer(2, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(6).nOut(5).build()) + .layer(1, DenseLayer.builder().nIn(5).nOut(4).build()) + .layer(2, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(3, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); @@ -138,10 +138,10 @@ public class TestFrozenLayers extends BaseSparkTest { ComputationGraph origModel = new ComputationGraph(NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.TANH).graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(6).nOut(5).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(4).build(), "0") - .addLayer("2", new DenseLayer.Builder().nIn(4).nOut(3).build(), "1") - .addLayer("3", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("0", DenseLayer.builder().nIn(6).nOut(5).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(5).nOut(4).build(), "0") + .addLayer("2", DenseLayer.builder().nIn(4).nOut(3).build(), "1") + .addLayer("3", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), "2") diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java index adc3d5508..6f47c1412 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java @@ -58,8 +58,8 @@ public class TestMiscFunctions extends BaseSparkTest { public void testFeedForwardWithKey() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) .activation(Activation.SOFTMAX).build()) .build(); @@ -109,9 +109,9 @@ public class TestMiscFunctions extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .list() - .layer( new LSTM.Builder().nIn(4).nOut(3).build()) - .layer(new GlobalPoolingLayer(PoolingType.AVG)) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) + .layer( LSTM.builder().nIn(4).nOut(3).build()) + .layer(GlobalPoolingLayer.builder(PoolingType.AVG)) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) .activation(Activation.SOFTMAX).build()) .build(); @@ -164,9 +164,9 @@ public class TestMiscFunctions extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in1", "in2") - .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in1") - .addLayer("1", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in2").addLayer("2", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(6).nOut(3) + .addLayer("0", DenseLayer.builder().nIn(4).nOut(3).build(), "in1") + .addLayer("1", DenseLayer.builder().nIn(4).nOut(3).build(), "in2").addLayer("2", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(6).nOut(3) .activation(Activation.SOFTMAX).build(), "0", "1") .setOutputs("2").build(); @@ -221,7 +221,7 @@ public class TestMiscFunctions extends BaseSparkTest { int nIn = 10; NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution( new GaussianReconstructionDistribution(Activation.IDENTITY)) .nIn(nIn).nOut(5).encoderLayerSizes(12).decoderLayerSizes(13).build()) @@ -261,7 +261,7 @@ public class TestMiscFunctions extends BaseSparkTest { NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() .list().layer(0, - new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new LossFunctionWrapper( Activation.IDENTITY, new LossMSE())) .nIn(nIn).nOut(5).encoderLayerSizes(12).decoderLayerSizes(13) diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java index e66e8bb9d..440a70d14 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java @@ -111,9 +111,9 @@ public class TestSparkDl4jMultiLayer extends BaseSparkTest { .updater(new Adam(1e-3)) .l2(1e-5) .list() - .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(500).build()) - .layer(1, new DenseLayer.Builder().nIn(500).nOut(100).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(0, DenseLayer.builder().nIn(28 * 28).nOut(500).build()) + .layer(1, DenseLayer.builder().nIn(500).nOut(100).build()) + .layer(2, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX).nIn(100).nOut(10).build()) .build(); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java index e5faa2884..92162dc75 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java @@ -68,7 +68,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder() + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()).layer(1, OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build()) .build(); return conf; @@ -79,11 +79,11 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).list() - .layer(0, new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) + .layer(0, ConvolutionLayer.builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) .activation(Activation.TANH).build()) - .layer(1, new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) + .layer(1, ConvolutionLayer.builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) .activation(Activation.TANH).build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10) .build()) .inputType(InputType.convolutional(10, 10, 3)).build(); return conf; @@ -95,8 +95,8 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", - new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in").addLayer("1", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) .nOut(10).build(), "0") .setOutputs("1").build(); @@ -109,11 +109,11 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).graphBuilder() .addInputs("in") - .addLayer("0", new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1) + .addLayer("0", ConvolutionLayer.builder().nOut(3).kernelSize(2, 2).stride(1, 1) .padding(0, 0).activation(Activation.TANH).build(), "in") - .addLayer("1", new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1) + .addLayer("1", ConvolutionLayer.builder().nOut(3).kernelSize(2, 2).stride(1, 1) .padding(0, 0).activation(Activation.TANH).build(), "0") - .addLayer("2", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10) + .addLayer("2", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10) .build(), "1") .setOutputs("2").setInputTypes(InputType.convolutional(10, 10, 3)) .build(); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java index 8907c2165..d8740af1a 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java @@ -129,9 +129,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { DataSet d = new IrisDataSetIterator(150, 150).next(); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(100).nOut(3) .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER) .build()) @@ -167,9 +167,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { .updater(new Adam(1e-6)) .weightInit(WeightInit.XAVIER) .list() - .layer(new BatchNormalization.Builder().nIn(4).nOut(4).build()) - .layer(new DenseLayer.Builder().nIn(4).nOut(32).activation(Activation.RELU).build()) - .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(32).nOut(3) + .layer(BatchNormalization.builder().nIn(4).nOut(4).build()) + .layer(DenseLayer.builder().nIn(4).nOut(32).activation(Activation.RELU).build()) + .layer(org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(32).nOut(3) .activation(Activation.SOFTMAX).build()) .build(); @@ -277,9 +277,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { } NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MSE).nIn(3).nOut(nOut).activation(Activation.SOFTMAX) .build()) .build(); @@ -302,9 +302,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.1) .seed(123).updater(new Nesterovs(0.1, 0.9)).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(nOut) .activation(Activation.SOFTMAX).build()) .build(); @@ -391,9 +391,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build()) .build(); @@ -455,9 +455,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build()) .build(); @@ -525,9 +525,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build()) .build(); @@ -614,9 +614,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build(), "0") @@ -687,9 +687,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(4).nOut(4) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(4).nOut(3).activation(Activation.SOFTMAX) .build()) .build(); @@ -771,9 +771,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build()) .build(); @@ -822,9 +822,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build(), "0") @@ -862,7 +862,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .weightInit(WeightInit.XAVIER).list() - .layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12) + .layer(0, VariationalAutoencoder.builder().nIn(8).nOut(10).encoderLayerSizes(12) .decoderLayerSizes(13).reconstructionDistribution( new GaussianReconstructionDistribution(Activation.IDENTITY)) .build()) @@ -898,7 +898,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12) + .addLayer("0", VariationalAutoencoder.builder().nIn(8).nOut(10).encoderLayerSizes(12) .decoderLayerSizes(13).reconstructionDistribution( new GaussianReconstructionDistribution(Activation.IDENTITY)) .build(), "in") @@ -938,8 +938,8 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).build()) - .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(layerSize).build()) + .layer(1, OutputLayer.builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).lossFunction( LossFunctions.LossFunction.MCXENT) .build()) @@ -993,8 +993,8 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).build()) - .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(layerSize).build()) + .layer(1, OutputLayer.builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).lossFunction( LossFunctions.LossFunction.MCXENT) .build()) @@ -1047,13 +1047,13 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { } NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3).build()) .build(); ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).build(), "in") + .addLayer("out", OutputLayer.builder().nIn(4).nOut(3).build(), "in") .setOutputs("out") .build(); diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java index fc446048f..1c7d845eb 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java @@ -69,8 +69,8 @@ public class TestTrainingStatsCollection extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).build()) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, OutputLayer.builder().nIn(10).nOut(10).build()) .build(); int miniBatchSizePerWorker = 10; diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java index 6d8a9e9bd..07ae7391a 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java @@ -62,9 +62,9 @@ public class TestListeners extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(100).nOut(3) .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER) .build()) diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdConv.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdConv.java index ed8ceacda..27190ab8a 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdConv.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdConv.java @@ -65,27 +65,27 @@ public class ActorCriticFactoryCompGraphStdConv implements ActorCriticFactoryCom .weightInit(WeightInit.XAVIER) .l2(conf.getL2()).graphBuilder() .addInputs("input").addLayer("0", - new ConvolutionLayer.Builder(8, 8).nIn(shapeInputs[0]).nOut(16) + ConvolutionLayer.builder(8, 8).nIn(shapeInputs[0]).nOut(16) .stride(4, 4).activation(Activation.RELU).build(), "input"); - confB.addLayer("1", new ConvolutionLayer.Builder(4, 4).nIn(16).nOut(32).stride(2, 2).activation(Activation.RELU).build(), "0"); + confB.addLayer("1", ConvolutionLayer.builder(4, 4).nIn(16).nOut(32).stride(2, 2).activation(Activation.RELU).build(), "0"); - confB.addLayer("2", new DenseLayer.Builder().nIn(w * h * 32).nOut(256).activation(Activation.RELU).build(), "1"); + confB.addLayer("2", DenseLayer.builder().nIn(w * h * 32).nOut(256).activation(Activation.RELU).build(), "1"); if (conf.isUseLSTM()) { - confB.addLayer("3", new LSTM.Builder().nIn(256).nOut(256).activation(Activation.TANH).build(), "2"); + confB.addLayer("3", LSTM.builder().nIn(256).nOut(256).activation(Activation.TANH).build(), "2"); - confB.addLayer("value", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) + confB.addLayer("value", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) .nIn(256).nOut(1).build(), "3"); - confB.addLayer("softmax", new RnnOutputLayer.Builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) + confB.addLayer("softmax", RnnOutputLayer.builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) .nIn(256).nOut(numOutputs).build(), "3"); } else { - confB.addLayer("value", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) + confB.addLayer("value", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) .nIn(256).nOut(1).build(), "2"); - confB.addLayer("softmax", new OutputLayer.Builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) + confB.addLayer("softmax", OutputLayer.builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) .nIn(256).nOut(numOutputs).build(), "2"); } diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdDense.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdDense.java index f05d43f3b..6ec905cb6 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdDense.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactoryCompGraphStdDense.java @@ -56,31 +56,31 @@ public class ActorCriticFactoryCompGraphStdDense implements ActorCriticFactoryCo .l2(conf.getL2()).graphBuilder() .setInputTypes(conf.isUseLSTM() ? InputType.recurrent(nIn) : InputType.feedForward(nIn)).addInputs("input") - .addLayer("0", new DenseLayer.Builder().nIn(nIn) + .addLayer("0", DenseLayer.builder().nIn(nIn) .nOut(conf.getNumHiddenNodes()).activation(Activation.RELU).build(), "input"); for (int i = 1; i < conf.getNumLayers(); i++) { - confB.addLayer(i + "", new DenseLayer.Builder().nIn(conf.getNumHiddenNodes()).nOut(conf.getNumHiddenNodes()) + confB.addLayer(i + "", DenseLayer.builder().nIn(conf.getNumHiddenNodes()).nOut(conf.getNumHiddenNodes()) .activation(Activation.RELU).build(), (i - 1) + ""); } if (conf.isUseLSTM()) { - confB.addLayer(getConf().getNumLayers() + "", new LSTM.Builder().activation(Activation.TANH) + confB.addLayer(getConf().getNumLayers() + "", LSTM.builder().activation(Activation.TANH) .nOut(conf.getNumHiddenNodes()).build(), (getConf().getNumLayers() - 1) + ""); - confB.addLayer("value", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) + confB.addLayer("value", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) .nOut(1).build(), getConf().getNumLayers() + ""); - confB.addLayer("softmax", new RnnOutputLayer.Builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) + confB.addLayer("softmax", RnnOutputLayer.builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) .nOut(numOutputs).build(), getConf().getNumLayers() + ""); } else { - confB.addLayer("value", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) + confB.addLayer("value", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) .nOut(1).build(), (getConf().getNumLayers() - 1) + ""); - confB.addLayer("softmax", new OutputLayer.Builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) + confB.addLayer("softmax", OutputLayer.builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) .nOut(numOutputs).build(), (getConf().getNumLayers() - 1) + ""); } diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactorySeparateStdDense.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactorySeparateStdDense.java index 80cb6384b..98c58e96f 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactorySeparateStdDense.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/ac/ActorCriticFactorySeparateStdDense.java @@ -61,22 +61,22 @@ public class ActorCriticFactorySeparateStdDense implements ActorCriticFactorySep .updater(conf.getUpdater() != null ? conf.getUpdater() : new Adam()) .weightInit(WeightInit.XAVIER) .l2(conf.getL2()) - .list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(conf.getNumHiddenNodes()) + .list().layer(0, DenseLayer.builder().nIn(nIn).nOut(conf.getNumHiddenNodes()) .activation(Activation.RELU).build()); for (int i = 1; i < conf.getNumLayers(); i++) { - confB.layer(i, new DenseLayer.Builder().nIn(conf.getNumHiddenNodes()).nOut(conf.getNumHiddenNodes()) + confB.layer(i, DenseLayer.builder().nIn(conf.getNumHiddenNodes()).nOut(conf.getNumHiddenNodes()) .activation(Activation.RELU).build()); } if (conf.isUseLSTM()) { - confB.layer(conf.getNumLayers(), new LSTM.Builder().nOut(conf.getNumHiddenNodes()).activation(Activation.TANH).build()); + confB.layer(conf.getNumLayers(), LSTM.builder().nOut(conf.getNumHiddenNodes()).activation(Activation.TANH).build()); - confB.layer(conf.getNumLayers() + 1, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) + confB.layer(conf.getNumLayers() + 1, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) .nIn(conf.getNumHiddenNodes()).nOut(1).build()); } else { - confB.layer(conf.getNumLayers(), new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) + confB.layer(conf.getNumLayers(), OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) .nIn(conf.getNumHiddenNodes()).nOut(1).build()); } @@ -96,22 +96,22 @@ public class ActorCriticFactorySeparateStdDense implements ActorCriticFactorySep .weightInit(WeightInit.XAVIER) //.regularization(true) //.l2(conf.getL2()) - .list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(conf.getNumHiddenNodes()) + .list().layer(0, DenseLayer.builder().nIn(nIn).nOut(conf.getNumHiddenNodes()) .activation(Activation.RELU).build()); for (int i = 1; i < conf.getNumLayers(); i++) { - confB2.layer(i, new DenseLayer.Builder().nIn(conf.getNumHiddenNodes()).nOut(conf.getNumHiddenNodes()) + confB2.layer(i, DenseLayer.builder().nIn(conf.getNumHiddenNodes()).nOut(conf.getNumHiddenNodes()) .activation(Activation.RELU).build()); } if (conf.isUseLSTM()) { - confB2.layer(conf.getNumLayers(), new LSTM.Builder().nOut(conf.getNumHiddenNodes()).activation(Activation.TANH).build()); + confB2.layer(conf.getNumLayers(), LSTM.builder().nOut(conf.getNumHiddenNodes()).activation(Activation.TANH).build()); - confB2.layer(conf.getNumLayers() + 1, new RnnOutputLayer.Builder(new ActorCriticLoss()) + confB2.layer(conf.getNumLayers() + 1, RnnOutputLayer.builder(new ActorCriticLoss()) .activation(Activation.SOFTMAX).nIn(conf.getNumHiddenNodes()).nOut(numOutputs).build()); } else { - confB2.layer(conf.getNumLayers(), new OutputLayer.Builder(new ActorCriticLoss()) + confB2.layer(conf.getNumLayers(), OutputLayer.builder(new ActorCriticLoss()) .activation(Activation.SOFTMAX).nIn(conf.getNumHiddenNodes()).nOut(numOutputs).build()); } diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdConv.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdConv.java index bb64200bd..ff8027a78 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdConv.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdConv.java @@ -60,15 +60,15 @@ public class DQNFactoryStdConv implements DQNFactory { .l2(conf.getL2()) .updater(conf.getUpdater() != null ? conf.getUpdater() : new Adam()) .weightInit(WeightInit.XAVIER).l2(conf.getL2()).list() - .layer(0, new ConvolutionLayer.Builder(8, 8).nIn(shapeInputs[0]).nOut(16).stride(4, 4) + .layer(0, ConvolutionLayer.builder(8, 8).nIn(shapeInputs[0]).nOut(16).stride(4, 4) .activation(Activation.RELU).build()); - confB.layer(1, new ConvolutionLayer.Builder(4, 4).nOut(32).stride(2, 2).activation(Activation.RELU).build()); + confB.layer(1, ConvolutionLayer.builder(4, 4).nOut(32).stride(2, 2).activation(Activation.RELU).build()); - confB.layer(2, new DenseLayer.Builder().nOut(256).activation(Activation.RELU).build()); + confB.layer(2, DenseLayer.builder().nOut(256).activation(Activation.RELU).build()); - confB.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nOut(numOutputs) + confB.layer(3, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nOut(numOutputs) .build()); confB.inputType(InputType.convolutional(shapeInputs[1], shapeInputs[2], shapeInputs[0])); diff --git a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdDense.java b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdDense.java index 15b33170a..9cd064989 100644 --- a/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdDense.java +++ b/.old/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/network/dqn/DQNFactoryStdDense.java @@ -61,7 +61,7 @@ public class DQNFactoryStdDense implements DQNFactory { .l2(conf.getL2()) .list() .layer(0, - new DenseLayer.Builder() + DenseLayer.builder() .nIn(nIn) .nOut(conf.getNumHiddenNodes()) .activation(Activation.RELU).build() @@ -69,12 +69,12 @@ public class DQNFactoryStdDense implements DQNFactory { for (int i = 1; i < conf.getNumLayers(); i++) { - confB.layer(i, new DenseLayer.Builder().nIn(conf.getNumHiddenNodes()).nOut(conf.getNumHiddenNodes()) + confB.layer(i, DenseLayer.builder().nIn(conf.getNumHiddenNodes()).nOut(conf.getNumHiddenNodes()) .activation(Activation.RELU).build()); } confB.layer(conf.getNumLayers(), - new OutputLayer.Builder(LossFunctions.LossFunction.MSE) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(conf.getNumHiddenNodes()) .nOut(numOutputs) diff --git a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/NStepRnn.java b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/NStepRnn.java index dc23edd6e..b9351ce6a 100644 --- a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/NStepRnn.java +++ b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/NStepRnn.java @@ -141,16 +141,16 @@ public class NStepRnn { .graphBuilder() .addInputs("input") .setInputTypes(InputType.recurrent(NUM_INPUTS)) - .addLayer("lstm", new LSTM.Builder().nOut(lstmLayerSize).activation(Activation.TANH).build(), "input") - .addLayer("dl", new DenseLayer.Builder().nOut(dl1Size).activation(Activation.RELU).build(), "input", "lstm") - .addLayer("dl-1", new DenseLayer.Builder().nOut(dl2Size).activation(Activation.RELU).build(), "dl") + .addLayer("lstm", LSTM.builder().nOut(lstmLayerSize).activation(Activation.TANH).build(), "input") + .addLayer("dl", DenseLayer.builder().nOut(dl1Size).activation(Activation.RELU).build(), "input", "lstm") + .addLayer("dl-1", DenseLayer.builder().nOut(dl2Size).activation(Activation.RELU).build(), "dl") .addVertex("dl-rnn", new PreprocessorVertex(new FeedForwardToRnnPreProcessor()), "dl-1"); } private static ITrainableNeuralNet buildActorCriticNetwork() { ComputationGraphConfiguration valueConfiguration = buildBaseNetworkConfiguration(COMBINED_LSTM_LAYER_SIZE, COMBINED_DL1_LAYER_SIZE, COMBINED_DL2_LAYER_SIZE) - .addLayer("value", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nOut(1).build(), "dl-rnn", "lstm") - .addLayer("softmax", new RnnOutputLayer.Builder(new ActorCriticLoss()).activation(Activation.SOFTMAX).nOut(NUM_ACTIONS).build(), "dl-rnn", "lstm") + .addLayer("value", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nOut(1).build(), "dl-rnn", "lstm") + .addLayer("softmax", RnnOutputLayer.builder(new ActorCriticLoss()).activation(Activation.SOFTMAX).nOut(NUM_ACTIONS).build(), "dl-rnn", "lstm") .setOutputs("value", "softmax") .build(); @@ -164,12 +164,12 @@ public class NStepRnn { private static ITrainableNeuralNet buildSeparateActorCriticNetwork() { ComputationGraphConfiguration valueConfiguration = buildBaseNetworkConfiguration(SEPARATE_LSTM_LAYER_SIZE, SEPARATE_DL1_LAYER_SIZE, SEPARATE_DL2_LAYER_SIZE) - .addLayer("value", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nOut(1).build(), "dl-rnn", "lstm") + .addLayer("value", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nOut(1).build(), "dl-rnn", "lstm") .setOutputs("value") .build(); ComputationGraphConfiguration policyConfiguration = buildBaseNetworkConfiguration(SEPARATE_LSTM_LAYER_SIZE, SEPARATE_DL1_LAYER_SIZE, SEPARATE_DL2_LAYER_SIZE) - .addLayer("softmax", new RnnOutputLayer.Builder(new ActorCriticLoss()).activation(Activation.SOFTMAX).nOut(NUM_ACTIONS).build(), "dl-rnn", "lstm") + .addLayer("softmax", RnnOutputLayer.builder(new ActorCriticLoss()).activation(Activation.SOFTMAX).nOut(NUM_ACTIONS).build(), "dl-rnn", "lstm") .setOutputs("softmax") .build(); diff --git a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/RobotLakeExample.java b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/RobotLakeExample.java index adbd6a3c5..628174f78 100644 --- a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/RobotLakeExample.java +++ b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/RobotLakeExample.java @@ -197,13 +197,13 @@ public class RobotLakeExample { InputType.feedForward(4)) // radar ) .addInputs("tracker-in", "radar-in") - .layer("dl_1", new DenseLayer.Builder().activation(Activation.RELU).nOut(40).build(), "tracker-in", "radar-in") - .layer("dl_out", new DenseLayer.Builder().activation(Activation.RELU).nOut(40).build(), "dl_1"); + .layer("dl_1", DenseLayer.builder().activation(Activation.RELU).nOut(40).build(), "tracker-in", "radar-in") + .layer("dl_out", DenseLayer.builder().activation(Activation.RELU).nOut(40).build(), "dl_1"); } private static ITrainableNeuralNet buildQNetwork() { ComputationGraphConfiguration conf = buildBaseNetworkConfiguration() - .addLayer("output", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) + .addLayer("output", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) .nOut(RobotLake.NUM_ACTIONS).build(), "dl_out") .setOutputs("output") @@ -220,9 +220,9 @@ public class RobotLakeExample { private static ITrainableNeuralNet buildActorCriticNetwork() { ComputationGraphConfiguration conf = buildBaseNetworkConfiguration() - .addLayer("value", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) + .addLayer("value", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) .nOut(1).build(), "dl_out") - .addLayer("softmax", new OutputLayer.Builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) + .addLayer("softmax", OutputLayer.builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) .nOut(RobotLake.NUM_ACTIONS).build(), "dl_out") .setOutputs("value", "softmax") .build(); diff --git a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/TMazeExample.java b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/TMazeExample.java index 64c971e00..4a8a4ed88 100644 --- a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/TMazeExample.java +++ b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/TMazeExample.java @@ -181,18 +181,18 @@ public class TMazeExample { .graphBuilder() .setInputTypes(InputType.recurrent(NUM_INPUTS)) .addInputs("input") - .addLayer("goal", new LSTM.Builder() + .addLayer("goal", LSTM.builder() .nOut(40) .activation(Activation.TANH) .build(), "input") - .addLayer("corridor", new DenseLayer.Builder().nOut(40).activation(Activation.RELU).build(), "input", "goal") - .addLayer("corridor-1", new DenseLayer.Builder().nOut(20).activation(Activation.RELU).build(), "corridor") + .addLayer("corridor", DenseLayer.builder().nOut(40).activation(Activation.RELU).build(), "input", "goal") + .addLayer("corridor-1", DenseLayer.builder().nOut(20).activation(Activation.RELU).build(), "corridor") .addVertex("corridor-rnn", new PreprocessorVertex(new FeedForwardToRnnPreProcessor()), "corridor-1"); } private static ITrainableNeuralNet buildQNetwork() { ComputationGraphConfiguration conf = buildBaseNetworkConfiguration() - .addLayer("output", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) + .addLayer("output", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) .nOut(NUM_ACTIONS).build(), "goal", "corridor-rnn") .setOutputs("output") @@ -207,9 +207,9 @@ public class TMazeExample { private static ITrainableNeuralNet buildActorCriticNetwork() { ComputationGraphConfiguration conf = buildBaseNetworkConfiguration() - .addLayer("value", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) + .addLayer("value", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY) .nOut(1).build(), "goal", "corridor-rnn") - .addLayer("softmax", new RnnOutputLayer.Builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) + .addLayer("softmax", RnnOutputLayer.builder(new ActorCriticLoss()).activation(Activation.SOFTMAX) .nOut(NUM_ACTIONS).build(), "goal", "corridor-rnn") .setOutputs("value", "softmax") .build(); diff --git a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/policy/PolicyTest.java b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/policy/PolicyTest.java index f0ff3f641..38ffefd08 100644 --- a/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/policy/PolicyTest.java +++ b/.old/rl4j/rl4j-core/src/test/java/org/deeplearning4j/rl4j/policy/PolicyTest.java @@ -167,9 +167,9 @@ public class PolicyTest { @Test public void testACPolicy() throws Exception { ComputationGraph cg = new ComputationGraph(NeuralNetConfiguration.builder().seed(444).graphBuilder().addInputs("input") - .addLayer("output", new OutputLayer.Builder().nOut(1).lossFunction(LossFunctions.LossFunction.XENT).activation(Activation.SIGMOID).build(), "input").setOutputs("output").build()); + .addLayer("output", OutputLayer.builder().nOut(1).lossFunction(LossFunctions.LossFunction.XENT).activation(Activation.SIGMOID).build(), "input").setOutputs("output").build()); MultiLayerNetwork mln = new MultiLayerNetwork(NeuralNetConfiguration.builder().seed(555).list() - .layer(0, new OutputLayer.Builder().nOut(1).lossFunction(LossFunctions.LossFunction.XENT).activation(Activation.SIGMOID).build()).build()); + .layer(0, OutputLayer.builder().nOut(1).lossFunction(LossFunctions.LossFunction.XENT).activation(Activation.SIGMOID).build()).build()); ACPolicy policy = new ACPolicy(new DummyAC(mln), true, Nd4j.getRandom()); diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java index 0287d32a9..18a0fc50d 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java @@ -95,13 +95,13 @@ public class App { private static LayerConfiguration[] genLayers() { return new LayerConfiguration[] { - new DenseLayer.Builder().nIn(INPUT).nOut(X_DIM*Y_DIM*CHANNELS).weightInit(WeightInit.NORMAL).build(), - new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), - new DenseLayer.Builder().nIn(X_DIM*Y_DIM*CHANNELS).nOut(X_DIM*Y_DIM).build(), - new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), - new DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM).build(), - new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), - new DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM*CHANNELS).activation(Activation.TANH) + DenseLayer.builder().nIn(INPUT).nOut(X_DIM*Y_DIM*CHANNELS).weightInit(WeightInit.NORMAL).build(), + ActivationLayer.builder(Activation.LEAKYRELU).build(), + DenseLayer.builder().nIn(X_DIM*Y_DIM*CHANNELS).nOut(X_DIM*Y_DIM).build(), + ActivationLayer.builder(new ActivationLReLU(0.2)).build(), + DenseLayer.builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM).build(), + ActivationLayer.builder(new ActivationLReLU(0.2)).build(), + DenseLayer.builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM*CHANNELS).activation(Activation.TANH) .build() }; } @@ -131,19 +131,19 @@ public class App { private static LayerConfiguration[] disLayers() { return new LayerConfiguration[]{ - new DenseLayer.Builder().nOut(X_DIM*Y_DIM*CHANNELS*2).build(), //input is set by setInputType on the network - new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), - new DropoutLayer.Builder(1 - 0.5).build(), - new DenseLayer.Builder().nIn(X_DIM * Y_DIM*CHANNELS*2).nOut(X_DIM*Y_DIM*CHANNELS*4).build(), //HxBxC - new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), - new DropoutLayer.Builder(1 - 0.5).build(), - new DenseLayer.Builder().nIn(X_DIM*Y_DIM*CHANNELS*4).nOut(X_DIM*Y_DIM*CHANNELS).build(), - new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), - new DropoutLayer.Builder(1 - 0.5).build(), - new DenseLayer.Builder().nIn(X_DIM*Y_DIM*CHANNELS).nOut(X_DIM*Y_DIM).build(), - new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), - new DropoutLayer.Builder(1 - 0.5).build(), - new OutputLayer.Builder(LossFunction.XENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build() + DenseLayer.builder().nOut(X_DIM*Y_DIM*CHANNELS*2).build(), //input is set by setInputType on the network + ActivationLayer.builder(new ActivationLReLU(0.2)).build(), + DropoutLayer.builder(1 - 0.5).build(), + DenseLayer.builder().nIn(X_DIM * Y_DIM*CHANNELS*2).nOut(X_DIM*Y_DIM*CHANNELS*4).build(), //HxBxC + ActivationLayer.builder(new ActivationLReLU(0.2)).build(), + DropoutLayer.builder(1 - 0.5).build(), + DenseLayer.builder().nIn(X_DIM*Y_DIM*CHANNELS*4).nOut(X_DIM*Y_DIM*CHANNELS).build(), + ActivationLayer.builder(new ActivationLReLU(0.2)).build(), + DropoutLayer.builder(1 - 0.5).build(), + DenseLayer.builder().nIn(X_DIM*Y_DIM*CHANNELS).nOut(X_DIM*Y_DIM).build(), + ActivationLayer.builder(new ActivationLReLU(0.2)).build(), + DropoutLayer.builder(1 - 0.5).build(), + OutputLayer.builder().lossFunction(LossFunction.XENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build() }; } @@ -171,7 +171,7 @@ public class App { LayerConfiguration[] disLayers = Arrays.stream(disLayers()) .map((layer) -> { if (layer instanceof DenseLayer || layer instanceof OutputLayer) { - return new FrozenLayerWithBackprop(layer); + return FrozenLayerWithBackprop.builder(layer); } else { return layer; } @@ -242,6 +242,7 @@ public class App { gan.addTrainingListeners(new ScoreToChartListener("gan")); //dis.setListeners(new ScoreToChartListener("dis")); + System.out.println(gan.toString()); gan.fit(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1)); //gan.fit(new DataSet(trainData.next().getFeatures(), Nd4j.zeros(batchSize, 1))); diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java index 4dd171fea..7f2af46ae 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistDCGANExample.java @@ -101,21 +101,21 @@ public class MnistDCGANExample { public static void main(String[] args) throws Exception { Supplier genSupplier = () -> { return new MultiLayerNetwork(NeuralNetConfiguration.builder() - .layer(0, new DenseLayer.Builder().nIn(latentDim).nOut(width / 2 * height / 2 * 128) + .layer(0, DenseLayer.builder().nIn(latentDim).nOut(width / 2 * height / 2 * 128) .activation(Activation.LEAKYRELU).weightInit(WeightInit.NORMAL).build()) - .layer(1, new Convolution2D.Builder().nIn(128).nOut(128).kernelSize(5, 5) + .layer(1, Convolution2D.builder().nIn(128).nOut(128).kernelSize(5, 5) .convolutionMode(ConvolutionMode.Same).activation(Activation.LEAKYRELU).build()) // Up-sampling to 28x28x256 - .layer(2, new Deconvolution2D.Builder().nIn(128).nOut(128).stride(2, 2) + .layer(2, Deconvolution2D.builder().nIn(128).nOut(128).stride(2, 2) .kernelSize(5, 5).convolutionMode(ConvolutionMode.Same) .activation(Activation.LEAKYRELU).build()) - .layer(3, new Convolution2D.Builder().nIn(128).nOut(128).kernelSize(5, 5) + .layer(3, Convolution2D.builder().nIn(128).nOut(128).kernelSize(5, 5) .convolutionMode(ConvolutionMode.Same).activation(Activation.LEAKYRELU).build()) - .layer(4, new Convolution2D.Builder().nIn(128).nOut(128).kernelSize(5, 5) + .layer(4, Convolution2D.builder().nIn(128).nOut(128).kernelSize(5, 5) .convolutionMode(ConvolutionMode.Same).activation(Activation.LEAKYRELU).build()) - .layer(5, new Convolution2D.Builder().nIn(128).nOut(channels).kernelSize(7, 7) + .layer(5, Convolution2D.builder().nIn(128).nOut(channels).kernelSize(7, 7) .convolutionMode(ConvolutionMode.Same).activation(Activation.LEAKYRELU).build()) - .layer(6, new ActivationLayer.Builder().activation(Activation.TANH).build()) + .layer(6, ActivationLayer.builder().activation(Activation.TANH).build()) .inputPreProcessor(1, new FeedForwardToCnnPreProcessor(height / 2, width / 2, 128)) .inputPreProcessor(6, new CnnToFeedForwardPreProcessor(height, width, channels)) @@ -129,17 +129,17 @@ public class MnistDCGANExample { //.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) //.gradientNormalizationThreshold(100.0) - .layer(0, new Convolution2D.Builder().nIn(channels).nOut(64).kernelSize(3, 3) + .layer(0, Convolution2D.builder().nIn(channels).nOut(64).kernelSize(3, 3) .activation(Activation.LEAKYRELU).build()) - .layer(1, new Convolution2D.Builder().nIn(64).nOut(64).kernelSize(3, 3).stride(2, 2) + .layer(1, Convolution2D.builder().nIn(64).nOut(64).kernelSize(3, 3).stride(2, 2) .activation(Activation.LEAKYRELU).build()) - .layer(2, new Convolution2D.Builder().nIn(64).nOut(64).kernelSize(3, 3).stride(2, 2) + .layer(2, Convolution2D.builder().nIn(64).nOut(64).kernelSize(3, 3).stride(2, 2) .activation(Activation.LEAKYRELU).build()) - .layer(3, new Convolution2D.Builder().nIn(64).nOut(64).kernelSize(3, 3).stride(2, 2) + .layer(3, Convolution2D.builder().nIn(64).nOut(64).kernelSize(3, 3).stride(2, 2) .activation(Activation.LEAKYRELU).build()) - .layer(4, new DropoutLayer.Builder().dropOut(0.5).build()) - .layer(5, new DenseLayer.Builder().nIn(64 * 2 * 2).nOut(1).activation(Activation.SIGMOID).build()) - .layer(6, new LossLayer.Builder().lossFunction(LossFunctions.LossFunction.XENT).build()) + .layer(4, DropoutLayer.builder().dropOut(0.5).build()) + .layer(5, DenseLayer.builder().nIn(64 * 2 * 2).nOut(1).activation(Activation.SIGMOID).build()) + .layer(6, LossLayer.builder().lossFunction(LossFunctions.LossFunction.XENT.getILossFunction()).build()) .inputPreProcessor(0, new FeedForwardToCnnPreProcessor(height, width, channels)) .inputPreProcessor(4, new CnnToFeedForwardPreProcessor(2, 2, 64)) .inputType(InputType.convolutionalFlat(height, width, channels)) diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java index be3014f3c..d467f46a4 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java @@ -62,13 +62,13 @@ public class MnistSimpleGAN { .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(100) - .layer(new DenseLayer.Builder().nIn(100).nOut(256).weightInit(WeightInit.NORMAL).build()) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) - .layer(new DenseLayer.Builder().nIn(256).nOut(512).build()) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) - .layer(new DenseLayer.Builder().nIn(512).nOut(1024).build()) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) - .layer(new DenseLayer.Builder().nIn(1024).nOut(784).activation(Activation.TANH).build()) + .layer(DenseLayer.builder().nIn(100).nOut(256).weightInit(WeightInit.NORMAL).build()) + .layer(ActivationLayer.builder(new ActivationLReLU(0.2)).build()) + .layer(DenseLayer.builder().nIn(256).nOut(512).build()) + .layer(ActivationLayer.builder(new ActivationLReLU(0.2)).build()) + .layer(DenseLayer.builder().nIn(512).nOut(1024).build()) + .layer(ActivationLayer.builder(new ActivationLReLU(0.2)).build()) + .layer(DenseLayer.builder().nIn(1024).nOut(784).activation(Activation.TANH).build()) .build(); return new MultiLayerNetwork(genConf); } @@ -83,16 +83,16 @@ public class MnistSimpleGAN { .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(100) - .layer(new DenseLayer.Builder().nIn(784).nOut(1024).updater(updater).build()) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) - .layer(new DropoutLayer.Builder(1 - 0.5).build()) - .layer(new DenseLayer.Builder().nIn(1024).nOut(512).updater(updater).build()) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) - .layer(new DropoutLayer.Builder(1 - 0.5).build()) - .layer(new DenseLayer.Builder().nIn(512).nOut(256).updater(updater).build()) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) - .layer(new DropoutLayer.Builder(1 - 0.5).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.XENT).nIn(256).nOut(1) + .layer(DenseLayer.builder().nIn(784).nOut(1024).updater(updater).build()) + .layer(ActivationLayer.builder(new ActivationLReLU(0.2)).build()) + .layer(DropoutLayer.builder(1 - 0.5).build()) + .layer(DenseLayer.builder().nIn(1024).nOut(512).updater(updater).build()) + .layer(ActivationLayer.builder(new ActivationLReLU(0.2)).build()) + .layer(DropoutLayer.builder(1 - 0.5).build()) + .layer(DenseLayer.builder().nIn(512).nOut(256).updater(updater).build()) + .layer(ActivationLayer.builder(new ActivationLReLU(0.2)).build()) + .layer(DropoutLayer.builder(1 - 0.5).build()) + .layer(OutputLayer.builder(LossFunctions.LossFunction.XENT).nIn(256).nOut(1) .activation(Activation.SIGMOID).updater(updater).build()) .build(); diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java index 75965d7b5..ae1127869 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest.java @@ -288,12 +288,12 @@ public class BrianTest extends BaseSparkSessionTest { .seed(123) .updater(new Nesterovs(0.1, 0.9)) - .layer(0, new DenseLayer.Builder().nIn(5).nOut(20).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(5).nOut(20).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).l2(0.001).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(20).weightInit(WeightInit.XAVIER) + .layer(1, DenseLayer.builder().nIn(20).nOut(20).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) //.layer(2, new DenseLayerConfiguration.Builder().nIn(9).nOut(9).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.XENT).nIn(20).nOut(4) + .layer(2, OutputLayer.builder(LossFunctions.LossFunction.XENT).nIn(20).nOut(4) .weightInit(WeightInit.XAVIER).activation(Activation.SIGMOID).build()) .build(); diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java index 9195933ff..da4436a50 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/BrianTest2.java @@ -298,10 +298,10 @@ public class BrianTest2 /*extends BaseDL4JTest*/ { .seed(123) .updater(new Nesterovs(0.1, 0.9)) - .layer(0, new DenseLayer.Builder().nIn(5).nOut(20).weightInit(WeightInit.XAVIER).activation(Activation.RELU).l2(0.001).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(20).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) + .layer(0, DenseLayer.builder().nIn(5).nOut(20).weightInit(WeightInit.XAVIER).activation(Activation.RELU).l2(0.001).build()) + .layer(1, DenseLayer.builder().nIn(20).nOut(20).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) //.layer(2, new DenseLayerConfiguration.Builder().nIn(9).nOut(9).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.XENT).nIn(20).nOut(4).weightInit(WeightInit.XAVIER).activation(Activation.SIGMOID).build()) + .layer(2, OutputLayer.builder(LossFunctions.LossFunction.XENT).nIn(20).nOut(4).weightInit(WeightInit.XAVIER).activation(Activation.SIGMOID).build()) .build(); //Define SparkNet diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java index 0cf2e5676..5e1dee122 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer.java @@ -87,15 +87,15 @@ public class TestServer { .activation(Activation.RELU) .l2(0) - //.layer(0, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 5).stride(1,1).padding(0,2).nOut(1).name("1st Filter").updater(new Adam.Builder().learningRate(0.2).build()).build()) - //.layer(1, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 2).stride(1,2).padding(0,0).nOut(1).name("2nd Filter").updater(new Adam.Builder().learningRate(0.1).build()).build()) + //.layer(0, ConvolutionLayer.builder().nIn(1).kernelSize(1, 5).stride(1,1).padding(0,2).nOut(1).name("1st Filter").updater(new Adam.Builder().learningRate(0.2).build()).build()) + //.layer(1, ConvolutionLayer.builder().nIn(1).kernelSize(1, 2).stride(1,2).padding(0,0).nOut(1).name("2nd Filter").updater(new Adam.Builder().learningRate(0.1).build()).build()) // .layer(1, new DenseLayerConfiguration.Builder().nIn(10).nOut(64).activation(Activation.RELU).build()) - .layer(0, new DenseLayer.Builder().nIn(10).nOut(100).activation(Activation.RELU).l2(0.003).build()) - .layer(1, new LSTM.Builder().nIn(100).nOut(100).activation(Activation.TANH).build()) - .layer(2, new LSTM.Builder().nIn(100).nOut(100).activation(Activation.TANH).build()) - .layer(3, new DenseLayer.Builder().nIn(100).nOut(16).activation(Activation.RELU).l2(0.001).build()) + .layer(0, DenseLayer.builder().nIn(10).nOut(100).activation(Activation.RELU).l2(0.003).build()) + .layer(1, LSTM.builder().nIn(100).nOut(100).activation(Activation.TANH).build()) + .layer(2, LSTM.builder().nIn(100).nOut(100).activation(Activation.TANH).build()) + .layer(3, DenseLayer.builder().nIn(100).nOut(16).activation(Activation.RELU).l2(0.001).build()) - .layer(4, new OutputLayer.Builder().nIn(16).nOut(numClasses) + .layer(4, OutputLayer.builder().nIn(16).nOut(numClasses) .activation(Activation.SOFTMAX) .lossFunction(new LossMCXENT()) .build() diff --git a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java index db8a74ae7..1ba977178 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java +++ b/brutex-extended-tests/src/test/java/net/brutex/spark/TestServer2.java @@ -127,15 +127,15 @@ public class TestServer2 { .activation(Activation.RELU) .l2(0) - //.layer(0, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 5).stride(1,1).padding(0,2).nOut(1).name("1st Filter").updater(new Adam.Builder().learningRate(0.2).build()).build()) - //.layer(1, new ConvolutionLayer.Builder().nIn(1).kernelSize(1, 2).stride(1,2).padding(0,0).nOut(1).name("2nd Filter").updater(new Adam.Builder().learningRate(0.1).build()).build()) + //.layer(0, ConvolutionLayer.builder().nIn(1).kernelSize(1, 5).stride(1,1).padding(0,2).nOut(1).name("1st Filter").updater(new Adam.Builder().learningRate(0.2).build()).build()) + //.layer(1, ConvolutionLayer.builder().nIn(1).kernelSize(1, 2).stride(1,2).padding(0,0).nOut(1).name("2nd Filter").updater(new Adam.Builder().learningRate(0.1).build()).build()) // .layer(1, new DenseLayerConfiguration.Builder().nIn(10).nOut(64).activation(Activation.RELU).build()) - .layer(0, new DenseLayer.Builder().nIn(10).nOut(100).activation(Activation.RELU).l2(0.003).build()) - .layer(1, new LSTM.Builder().nIn(100).nOut(100).activation(Activation.TANH).build()) - .layer(2, new LSTM.Builder().nIn(100).nOut(100).activation(Activation.TANH).build()) - .layer(3, new DenseLayer.Builder().nIn(100).nOut(16).activation(Activation.RELU).l2(0.001).build()) + .layer(0, DenseLayer.builder().nIn(10).nOut(100).activation(Activation.RELU).l2(0.003).build()) + .layer(1, LSTM.builder().nIn(100).nOut(100).activation(Activation.TANH).build()) + .layer(2, LSTM.builder().nIn(100).nOut(100).activation(Activation.TANH).build()) + .layer(3, DenseLayer.builder().nIn(100).nOut(16).activation(Activation.RELU).l2(0.001).build()) - .layer(4, new OutputLayer.Builder().nIn(16).nOut(numClasses) + .layer(4, OutputLayer.builder().nIn(16).nOut(numClasses) .activation(Activation.SOFTMAX) .lossFunction(new LossMCXENT()) .build() diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java index 786c6d6b9..a46908a1b 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/IntegrationTestRunner.java @@ -832,7 +832,7 @@ public class IntegrationTestRunner { if(m instanceof MultiLayerNetwork){ paramPrefix = l.getIndex() + "_"; } else { - paramPrefix = l.getLayerConfiguration().getLayerName() + "_"; + paramPrefix = l.getLayerConfiguration().getName() + "_"; } Map paramTable = l.getParamTable(); for(Map.Entry e : paramTable.entrySet()){ diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java index ec116ca31..bd4b122f1 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java @@ -88,11 +88,11 @@ public class CNN1DTestCases { .convolutionMode(ConvolutionMode.Same)) .graphBuilder() .addInputs("in") - .layer("0", new Convolution1DLayer.Builder().nOut(32).activation(Activation.TANH).kernelSize(3).stride(1).build(), "in") - .layer("1", new Subsampling1DLayer.Builder().kernelSize(2).stride(1).poolingType(SubsamplingLayer.PoolingType.MAX).build(), "0") - .layer("2", new Cropping1D(1), "1") - .layer("3", new ZeroPadding1DLayer(1), "2") - .layer("out", new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(nOut).build(), "3") + .layer("0", Convolution1DLayer.builder().nOut(32).activation(Activation.TANH).kernelSize(3).stride(1).build(), "in") + .layer("1", Subsampling1DLayer.builder().kernelSize(2).stride(1).poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).build(), "0") + .layer("2", Cropping1D.builder(1).build(), "1") + .layer("3", ZeroPadding1DLayer.builder(1).build(), "2") + .layer("out", RnnOutputLayer.builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(nOut).build(), "3") .setInputTypes(InputType.recurrent(nOut)) .setOutputs("out") .build(); diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java index 4b7b3f7a3..779414c70 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java @@ -105,30 +105,30 @@ public class CNN2DTestCases { .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) - .layer(0, new ConvolutionLayer.Builder(5, 5) + .layer(0, ConvolutionLayer.builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels) .stride(1, 1) .nOut(20) .activation(Activation.IDENTITY) .build()) - .layer(1, new SubsamplingLayer.Builder(PoolingType.MAX) + .layer(1, SubsamplingLayer.builder(PoolingType.MAX) .kernelSize(2, 2) .stride(2, 2) .build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) + .layer(2, ConvolutionLayer.builder(5, 5) //Note that nIn need not be specified in later layers .stride(1, 1) .nOut(50) .activation(Activation.IDENTITY) .build()) - .layer(3, new SubsamplingLayer.Builder(PoolingType.MAX) + .layer(3, SubsamplingLayer.builder(PoolingType.MAX) .kernelSize(2, 2) .stride(2, 2) .build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU) + .layer(4, DenseLayer.builder().activation(Activation.RELU) .nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum) .activation(Activation.SOFTMAX) .build()) @@ -221,7 +221,7 @@ public class CNN2DTestCases { .seed(12345) .build()) .removeVertexKeepConnections("predictions") - .addLayer("predictions", new OutputLayer.Builder() + .addLayer("predictions", OutputLayer.builder() .nIn(4096) .nOut(200) //Tiny imagenet .build(), "fc2") @@ -321,7 +321,7 @@ public class CNN2DTestCases { .removeVertexKeepConnections("conv2d_9") .removeVertexAndConnections("outputs") .addLayer("convolution2d_9", - new ConvolutionLayer.Builder(1,1) + ConvolutionLayer.builder(1,1) .nIn(1024) .nOut(nBoxes * (5 + nClasses)) .stride(1,1) @@ -331,10 +331,10 @@ public class CNN2DTestCases { .build(), "leaky_re_lu_8") .addLayer("outputs", - new Yolo2OutputLayer.Builder() + Yolo2OutputLayer.builder() .lambdaNoObj(lambdaNoObj) .lambdaCoord(lambdaCoord) - .boundingBoxPriors(priors) + .boundingBoxes(priors) .build(), "convolution2d_9") .setOutputs("outputs") @@ -417,32 +417,32 @@ public class CNN2DTestCases { .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) - .layer(0, new ConvolutionLayer.Builder(5, 5) + .layer(0, ConvolutionLayer.builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(1) .stride(1, 1) .nOut(20) .activation(Activation.IDENTITY) .build()) - .layer(1, new SubsamplingLayer.Builder(PoolingType.MAX) + .layer(1, SubsamplingLayer.builder(PoolingType.MAX) .kernelSize(2, 2) .stride(2, 2) .build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) + .layer(2, ConvolutionLayer.builder(5, 5) //Note that nIn need not be specified in later layers .stride(1, 1) .nOut(50) .activation(Activation.IDENTITY) .dropOut(0.5) //**** Dropout on conv layer .build()) - .layer(3, new SubsamplingLayer.Builder(PoolingType.MAX) + .layer(3, SubsamplingLayer.builder(PoolingType.MAX) .kernelSize(2, 2) .stride(2, 2) .build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU) + .layer(4, DenseLayer.builder().activation(Activation.RELU) .dropOut(0.5) //**** Dropout on dense layer .nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(10) .activation(Activation.SOFTMAX) .build()) diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN3DTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN3DTestCases.java index 157116ba9..5e8b0bf34 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN3DTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN3DTestCases.java @@ -82,18 +82,18 @@ public class CNN3DTestCases { .updater(new Nesterovs(0.01, 0.9)) .convolutionMode(ConvolutionMode.Same) - .layer(new Convolution3D.Builder(3,3,3) + .layer(Convolution3D.builder(3,3,3) .dataFormat(Convolution3D.DataFormat.NCDHW) .nIn(nChannels) .stride(2, 2, 2) .nOut(8) .activation(Activation.IDENTITY) .build()) - .layer(new Subsampling3DLayer.Builder(PoolingType.MAX) + .layer(Subsampling3DLayer.builder(PoolingType.MAX) .kernelSize(2, 2, 2) .stride(2, 2, 2) .build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum) .activation(Activation.SOFTMAX) .build()) diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/MLPTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/MLPTestCases.java index 69e9fa4cd..559fb3e6d 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/MLPTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/MLPTestCases.java @@ -104,8 +104,8 @@ public class MLPTestCases { .build())) .l1(1e-3).l2(1e-3) - .layer(new DenseLayer.Builder().activation(Activation.TANH).nOut(64).build()) - .layer(new OutputLayer.Builder().nOut(10) + .layer(DenseLayer.builder().activation(Activation.TANH).nOut(64).build()) + .layer(OutputLayer.builder().nOut(10) .lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .build()) @@ -202,11 +202,11 @@ public class MLPTestCases { .seed(seed) .updater(new Nesterovs(learningRate, 0.9)) - .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes) + .layer(0, DenseLayer.builder().nIn(numInputs).nOut(numHiddenNodes) .weightInit(WeightInit.XAVIER) .activation(Activation.RELU) .build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(1, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX) .nIn(numHiddenNodes).nOut(numOutputs).build()) diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java index edb312c0f..ceabef0b0 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java @@ -119,11 +119,11 @@ public class RNNTestCases { .weightInit(WeightInit.XAVIER) .updater(new Adam(1e-3)) - .layer(0, new LSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) + .layer(0, LSTM.builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .activation(Activation.TANH).build()) - .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize) + .layer(1, LSTM.builder().nIn(lstmLayerSize).nOut(lstmLayerSize) .activation(Activation.TANH).build()) - .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tbpttFwdLength(tbpttLength).tbpttBackLength(tbpttLength) @@ -201,9 +201,9 @@ public class RNNTestCases { .updater(new Adam(5e-2)) .l1(1e-3).l2(1e-3) - .layer(0, new LSTM.Builder().activation(Activation.TANH).nOut(10).build()) - .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) - .layer(new OutputLayer.Builder().nOut(6) + .layer(0, LSTM.builder().activation(Activation.TANH).nOut(10).build()) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build()) + .layer(OutputLayer.builder().nOut(6) .lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .build()) @@ -322,9 +322,9 @@ public class RNNTestCases { .updater(new Adam(5e-2)) .l1(1e-3).l2(1e-3) - .layer(0, new Bidirectional(new LSTM.Builder().activation(Activation.TANH).nOut(10).build())) - .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) - .layer(new OutputLayer.Builder().nOut(6) + .layer(0, Bidirectional.builder(LSTM.builder().activation(Activation.TANH).nOut(10).build()).build()) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build()) + .layer(OutputLayer.builder().nOut(6) .lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .build()) diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/UnsupervisedTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/UnsupervisedTestCases.java index 84b60ffd6..8aa50a231 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/UnsupervisedTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/UnsupervisedTestCases.java @@ -79,7 +79,7 @@ public class UnsupervisedTestCases { .weightInit(WeightInit.XAVIER) .l2(1e-4) - .layer(0, new VariationalAutoencoder.Builder() + .layer(0, VariationalAutoencoder.builder() .activation(Activation.TANH) .encoderLayerSizes(256, 256) //2 encoder layers, each of size 256 .decoderLayerSizes(256, 256) //2 decoder layers, each of size 256 diff --git a/cavis-common-platform/build.gradle b/cavis-common-platform/build.gradle index 946e5df48..df72daf30 100644 --- a/cavis-common-platform/build.gradle +++ b/cavis-common-platform/build.gradle @@ -71,7 +71,7 @@ dependencies { // api "com.fasterxml.jackson.module:jackson-module-scala_${scalaVersion}" - api "org.projectlombok:lombok:1.18.24" + api "org.projectlombok:lombok:1.18.26" /*Logging*/ api 'org.slf4j:slf4j-api:2.0.3' diff --git a/cavis-datavec/cavis-datavec-api/src/main/java/org/datavec/api/transform/analysis/columns/NumericalColumnAnalysis.java b/cavis-datavec/cavis-datavec-api/src/main/java/org/datavec/api/transform/analysis/columns/NumericalColumnAnalysis.java index 14a727274..c814d2b73 100644 --- a/cavis-datavec/cavis-datavec-api/src/main/java/org/datavec/api/transform/analysis/columns/NumericalColumnAnalysis.java +++ b/cavis-datavec/cavis-datavec-api/src/main/java/org/datavec/api/transform/analysis/columns/NumericalColumnAnalysis.java @@ -90,7 +90,7 @@ public abstract class NumericalColumnAnalysis implements ColumnAnalysis { public abstract double getMaxDouble(); @SuppressWarnings("unchecked") - public abstract static class Builder> { + public static abstract class Builder> { protected double mean; protected double sampleStdev; protected double sampleVariance; diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/autodiff/samediff/internal/InferenceSession.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/autodiff/samediff/internal/InferenceSession.java index 8d3b414ab..f7abdc755 100644 --- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/autodiff/samediff/internal/InferenceSession.java +++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/autodiff/samediff/internal/InferenceSession.java @@ -970,7 +970,7 @@ public class InferenceSession extends AbstractSession + * Returns 2 INDArrays:
+ * (a) The gradient dL/dz, calculated from dL/da, and
+ * (b) The parameter gradients dL/dW, where w is the weights in the activation function. For activation functions + * with no gradients, this will be null. + * + * @param in Input, before applying the activation function (z, or 'preOut') + * @param epsilon Gradient to be backpropagated: dL/da, where L is the loss function + * @return dL/dz and dL/dW, for weights w (null if activation function has no weights) + */ + @Override + public Pair backprop(INDArray in, INDArray epsilon) { + return getActivationFunction().backprop(in, epsilon); + } + +/** +* + * @param inputSize + * @return +*/ + @Override + public int numParams(int inputSize) { + return getActivationFunction().numParams(inputSize); + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/RandomTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/RandomTests.java index d939dab81..ac94c2310 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/RandomTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/RandomTests.java @@ -42,9 +42,9 @@ public class RandomTests extends BaseDL4JTest { final NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(10) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(10) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10) .activation(Activation.SOFTMAX).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java index 495b21e18..9bc749b70 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/TestUtils.java @@ -73,7 +73,7 @@ public class TestUtils { throw new RuntimeException(e); } - //Also check the NeuralNetConfiguration is serializable (required by Spark etc) + //Also check the NeuralNetConfiguration is serializable (required by Spark etc.) NeuralNetConfiguration conf = net.getNetConfiguration(); serializeDeserializeJava(conf); @@ -317,14 +317,14 @@ public class TestUtils { for(Layer l : layers){ //Don't use instanceof here - there are sub conv subclasses if(l.getClass() == ConvolutionLayer.class || l instanceof SubsamplingLayer || l instanceof BatchNormalization || l instanceof LSTM){ - Preconditions.checkNotNull(l.getHelper(), l.getLayerConfiguration().getLayerName()); + Preconditions.checkNotNull(l.getHelper(), l.getLayerConfiguration().getName()); } } } public static void assertHelpersAbsent(Layer[] layers) throws Exception { for(Layer l : layers){ - Preconditions.checkState(l.getHelper() == null, l.getLayerConfiguration().getLayerName()); + Preconditions.checkState(l.getHelper() == null, l.getLayerConfiguration().getName()); } } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderDataSetiteratorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderDataSetiteratorTest.java index b5a139779..9eb411db1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderDataSetiteratorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderDataSetiteratorTest.java @@ -473,9 +473,7 @@ public class RecordReaderDataSetiteratorTest extends BaseDL4JTest { public Pair makeRandomCSV(String tempFile, int nLines, int nFeatures) throws IOException { - File temp = temporaryFolder; - temp.mkdirs(); - temp.deleteOnExit(); + File temp = new File(temporaryFolder, "makeRandomCSV.csv"); Random rand = new Random(12345); double[][] dArr = new double[nLines][nFeatures + 1]; diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIteratorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIteratorTest.java index e3c4c38fa..9292f90c9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIteratorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderMultiDataSetIteratorTest.java @@ -774,7 +774,7 @@ public class RecordReaderMultiDataSetIteratorTest extends BaseDL4JTest { @Test public void testExcludeStringColCSV() throws Exception { - File csvFile = temporaryFolder; + File csvFile = new File(temporaryFolder, "test.csv"); StringBuilder sb = new StringBuilder(); for(int i=1; i<=10; i++ ){ diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java index be740689b..cf3aff480 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java @@ -41,7 +41,9 @@ import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.listeners.CollectScoresIterationListener; import org.deeplearning4j.optimize.listeners.ScoreIterationListener; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; @@ -170,11 +172,11 @@ public class DataSetIteratorTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .layer(0, new ConvolutionLayer.Builder(5, 5).nIn(numChannels).nOut(6) + .layer(0, ConvolutionLayer.builder(5, 5).nIn(numChannels).nOut(6) .weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) .stride(1, 1).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(2, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) .inputType(InputType.convolutionalFlat(numRows, numColumns, numChannels)); @@ -207,7 +209,8 @@ public class DataSetIteratorTest extends BaseDL4JTest { } - @Test //@Ignore //Ignored for now - CIFAR iterator needs work - https://github.com/eclipse/deeplearning4j/issues/4673 + @Test @Timeout(1200) @Disabled("Runs quite some time.") +//Ignored for now - CIFAR iterator needs work - https://github.com/eclipse/deeplearning4j/issues/4673 public void testCifarModel() throws Exception { // Streaming runCifar(false); @@ -230,11 +233,11 @@ public class DataSetIteratorTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .layer(0, new ConvolutionLayer.Builder(5, 5).nIn(channels).nOut(6).weightInit(WeightInit.XAVIER) + .layer(0, ConvolutionLayer.builder(5, 5).nIn(channels).nOut(6).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) .build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(2, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/TestFileIterators.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/TestFileIterators.java index c2c8ca166..264e3cb06 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/TestFileIterators.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/TestFileIterators.java @@ -76,10 +76,12 @@ public class TestFileIterators extends BaseDL4JTest { assertEquals(exp, act); //Test multiple directories - File f2a = new File(folder2, "f2a"); + f2a.mkdirs(); File f2b = new File(folder2, "f2b"); + f2b.mkdirs(); File f2c = new File(folder2, "f2c"); + f2c.mkdirs(); d1.save(new File(f2a, "d1.bin")); d2.save(new File(f2a, "d2.bin")); d3.save(new File(f2b, "d3.bin")); @@ -188,8 +190,11 @@ public class TestFileIterators extends BaseDL4JTest { //Test multiple directories File f2a = new File(folder2, "2-f2a"); + f2a.mkdirs(); File f2b = new File(folder2, "2-f2b"); + f2b.mkdirs(); File f2c = new File(folder2, "2-f2C"); + f2c.mkdirs(); d1.save(new File(f2a, "d1.bin")); d2.save(new File(f2a, "d2.bin")); d3.save(new File(f2b, "d3.bin")); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java index 0923ba407..a4923e5da 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java @@ -135,8 +135,8 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .updater(new Sgd(0.5)).weightInit(WeightInit.XAVIER) - .layer(new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()) - .layer(new OutputLayer.Builder().nIn(4).nOut(3) + .layer(DenseLayer.builder().nIn(4).nOut(4).activation(Activation.TANH).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -221,7 +221,7 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.01)).weightInit(WeightInit.XAVIER) - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -250,7 +250,7 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER) - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -300,7 +300,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(5.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER) - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) + .layer(0, OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -338,7 +338,7 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER) - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -381,7 +381,7 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER) - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -421,11 +421,11 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Nesterovs(0.0,0.9)) - .layer(0, new DenseLayer.Builder().nIn(1).nOut(20) + .layer(0, DenseLayer.builder().nIn(1).nOut(20) .weightInit(WeightInit.XAVIER).activation( Activation.TANH) .build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).weightInit(WeightInit.XAVIER).nIn(20).nOut(1) .build()) .build(); @@ -468,7 +468,7 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER) - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -506,7 +506,7 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER) - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -570,8 +570,8 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new DenseLayer.Builder().nIn(784).nOut(32).build()) - .layer(new OutputLayer.Builder().nIn(32).nOut(784).activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(DenseLayer.builder().nIn(784).nOut(32).build()) + .layer(OutputLayer.builder().nIn(32).nOut(784).activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -613,7 +613,7 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new AutoEncoder.Builder().nIn(784).nOut(32).build()) + .layer(AutoEncoder.builder().nIn(784).nOut(32).build()) .build(); @@ -656,7 +656,7 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new VariationalAutoencoder.Builder() + .layer(VariationalAutoencoder.builder() .nIn(784).nOut(32) .encoderLayerSizes(64) .decoderLayerSizes(64) @@ -701,7 +701,7 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new VariationalAutoencoder.Builder() + .layer(VariationalAutoencoder.builder() .nIn(784).nOut(32) .encoderLayerSizes(64) .decoderLayerSizes(64) @@ -748,8 +748,8 @@ public class TestEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new DenseLayer.Builder().nIn(784).nOut(32).build()) - .layer(new OutputLayer.Builder().nIn(32).nOut(10).activation(Activation.SOFTMAX).build()) + .layer(DenseLayer.builder().nIn(784).nOut(32).build()) + .layer(OutputLayer.builder().nIn(32).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -785,7 +785,7 @@ public class TestEarlyStopping extends BaseDL4JTest { public void testEarlyStoppingListeners() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER) - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -868,14 +868,14 @@ public class TestEarlyStopping extends BaseDL4JTest { .ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(1.0) - .layer(0, new LSTM.Builder() + .layer(0, LSTM.builder() .nIn(10) .nOut(10) .activation(Activation.TANH) - .gateActivationFunction(Activation.SIGMOID) + .gateActivationFunction(Activation.SIGMOID.getActivationFunction()) .dropOut(0.5) .build()) - .layer(1, new RnnOutputLayer.Builder() + .layer(1, RnnOutputLayer.builder() .nIn(10) .nOut(outputs) .activation(Activation.SOFTMAX) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java index 22b739f89..d12dc27ab 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStoppingCompGraph.java @@ -79,7 +79,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); @@ -124,7 +124,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(5.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); @@ -160,7 +160,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); @@ -202,7 +202,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); @@ -236,7 +236,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); @@ -300,8 +300,8 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("0", new DenseLayer.Builder().nIn(784).nOut(32).build(), "in") - .layer("1", new OutputLayer.Builder().nIn(32).nOut(784).activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE).build(), "0") + .layer("0", DenseLayer.builder().nIn(784).nOut(32).build(), "in") + .layer("1", OutputLayer.builder().nIn(32).nOut(784).activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE).build(), "0") .setOutputs("1") .build(); @@ -346,7 +346,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("0", new AutoEncoder.Builder().nIn(784).nOut(32).build(), "in") + .layer("0", AutoEncoder.builder().nIn(784).nOut(32).build(), "in") .setOutputs("0") .build(); @@ -391,7 +391,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("0", new VariationalAutoencoder.Builder() + .layer("0", VariationalAutoencoder.builder() .nIn(784).nOut(32) .encoderLayerSizes(64) .decoderLayerSizes(64) @@ -439,7 +439,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { .updater(new Adam(1e-5)) .graphBuilder() .addInputs("in") - .layer("0", new VariationalAutoencoder.Builder() + .layer("0", VariationalAutoencoder.builder() .nIn(784).nOut(32) .encoderLayerSizes(64) .decoderLayerSizes(64) @@ -489,8 +489,8 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("0", new DenseLayer.Builder().nIn(784).nOut(32).build(), "in") - .layer("1", new OutputLayer.Builder().nIn(32).nOut(10).activation(Activation.SOFTMAX).build(), "0") + .layer("0", DenseLayer.builder().nIn(784).nOut(32).build(), "in") + .layer("1", OutputLayer.builder().nIn(32).nOut(10).activation(Activation.SOFTMAX).build(), "0") .setOutputs("1") .build(); @@ -530,7 +530,7 @@ public class TestEarlyStoppingCompGraph extends BaseDL4JTest { .updater(new Sgd(0.001)).weightInit(WeightInit.XAVIER) .graphBuilder() .addInputs("in") - .layer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .layer("0", OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java index 04d6f440f..255ac4ec2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvalTest.java @@ -73,9 +73,9 @@ public class EvalTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(42) .updater(new Sgd(1e-6)).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(2).activation(Activation.TANH) + .layer(0, DenseLayer.builder().nIn(4).nOut(2).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).build()) @@ -180,7 +180,7 @@ public class EvalTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .list() - .layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(0, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(4).nOut(3).build()) .build(); @@ -300,8 +300,8 @@ public class EvalTest extends BaseDL4JTest { .trainingWorkspaceMode(ws) .inferenceWorkspaceMode(ws) .list() - .layer(new LSTM.Builder().nIn(nIn).nOut(layerSize).build()) - .layer(new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut) + .layer(LSTM.builder().nIn(nIn).nOut(layerSize).build()) + .layer(RnnOutputLayer.builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX) .build()) .build(); @@ -311,8 +311,8 @@ public class EvalTest extends BaseDL4JTest { .trainingWorkspaceMode(ws) .inferenceWorkspaceMode(ws) .list() - .layer(new LSTM.Builder().nIn(nIn).nOut(layerSize).build()) - .layer(new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut) + .layer(LSTM.builder().nIn(nIn).nOut(layerSize).build()) + .layer(RnnOutputLayer.builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).build()) .tbpttFwdLength(10).tbpttBackLength(10) .backpropType(BackpropType.TruncatedBPTT) @@ -377,8 +377,8 @@ public class EvalTest extends BaseDL4JTest { .inferenceWorkspaceMode(ws) .graphBuilder() .addInputs("in") - .addLayer("0", new LSTM.Builder().nIn(nIn).nOut(layerSize).build(), "in") - .addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut) + .addLayer("0", LSTM.builder().nIn(nIn).nOut(layerSize).build(), "in") + .addLayer("1", RnnOutputLayer.builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX) .build(), "0") .setOutputs("1") @@ -390,8 +390,8 @@ public class EvalTest extends BaseDL4JTest { .inferenceWorkspaceMode(ws) .graphBuilder() .addInputs("in") - .addLayer("0", new LSTM.Builder().nIn(nIn).nOut(layerSize).build(), "in") - .addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut) + .addLayer("0", LSTM.builder().nIn(nIn).nOut(layerSize).build(), "in") + .addLayer("1", RnnOutputLayer.builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX) .build(), "0") .setOutputs("1") @@ -457,8 +457,8 @@ public class EvalTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .list() - .layer(0, new LSTM.Builder().activation(Activation.TANH).nIn(3).nOut(3).build()) - .layer(1, new RnnOutputLayer.Builder().activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.XENT) + .layer(0, LSTM.builder().activation(Activation.TANH).nIn(3).nOut(3).build()) + .layer(1, RnnOutputLayer.builder().activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.XENT) .nIn(3).nOut(1).build()) .backpropType(BackpropType.TruncatedBPTT).tbpttFwdLength(10).tbpttBackLength(10) .build(); @@ -477,9 +477,9 @@ public class EvalTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(42) .updater(new Sgd(1e-6)).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(2).activation(Activation.TANH) + .layer(0, DenseLayer.builder().nIn(4).nOut(2).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).build()) .build(); @@ -507,8 +507,8 @@ public class EvalTest extends BaseDL4JTest { .seed(12345) .graphBuilder() .addInputs("in") - .addLayer("out1", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build(), "in") - .addLayer("out2", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build(), "in") + .addLayer("out1", OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build(), "in") + .addLayer("out2", OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build(), "in") .setOutputs("out1", "out2") .build(); @@ -541,11 +541,11 @@ public class EvalTest extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("0", new EmbeddingSequenceLayer.Builder().nIn(10).nOut(10).build(), "in") - .layer("1", new LSTM.Builder().nIn(10).nOut(10).build(), "0") - .layer("2", new LSTM.Builder().nIn(10).nOut(10).build(), "0") - .layer("out1", new RnnOutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") - .layer("out2", new RnnOutputLayer.Builder().nIn(10).nOut(20).activation(Activation.SOFTMAX).build(), "2") + .layer("0", EmbeddingSequenceLayer.builder().nIn(10).nOut(10).build(), "in") + .layer("1", LSTM.builder().nIn(10).nOut(10).build(), "0") + .layer("2", LSTM.builder().nIn(10).nOut(10).build(), "0") + .layer("out1", RnnOutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") + .layer("out2", RnnOutputLayer.builder().nIn(10).nOut(20).activation(Activation.SOFTMAX).build(), "2") .setOutputs("out1", "out2") .build(); @@ -569,8 +569,8 @@ public class EvalTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new DenseLayer.Builder().nIn(4).nOut(10).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(3).lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.RELU).build()) + .layer(DenseLayer.builder().nIn(4).nOut(10).build()) + .layer(OutputLayer.builder().nIn(10).nOut(3).lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.RELU).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java index 7cf2431f9..fb4b797fb 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java @@ -48,8 +48,8 @@ public class EvaluationToolsTests extends BaseDL4JTest { DataSetIterator iter = new IrisDataSetIterator(150, 150); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, - new OutputLayer.Builder().nIn(4).nOut(2).activation(Activation.SOFTMAX) + .layer(0, DenseLayer.builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, + OutputLayer.builder().nIn(4).nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -90,8 +90,8 @@ public class EvaluationToolsTests extends BaseDL4JTest { DataSetIterator iter = new IrisDataSetIterator(150, 150); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, - new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) + .layer(0, DenseLayer.builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, + OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/ROCTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/ROCTest.java index ca3ad1b54..ff7e4107a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/ROCTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/ROCTest.java @@ -84,8 +84,8 @@ public class ROCTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).seed(12345) .list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, - new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) + .layer(0, DenseLayer.builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, + OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/RegressionEvalTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/RegressionEvalTest.java index 92991d1cc..01e5eaeda 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/RegressionEvalTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/RegressionEvalTest.java @@ -49,7 +49,7 @@ public class RegressionEvalTest extends BaseDL4JTest { //Basic sanity check NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.ZERO).list() - .layer(0, new OutputLayer.Builder().activation(Activation.TANH) + .layer(0, OutputLayer.builder().activation(Activation.TANH) .lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(5).build()) .build(); @@ -71,7 +71,7 @@ public class RegressionEvalTest extends BaseDL4JTest { ComputationGraphConfiguration graphConf = NeuralNetConfiguration.builder().weightInit(WeightInit.ZERO).graphBuilder() - .addInputs("in").addLayer("0", new OutputLayer.Builder() + .addInputs("in").addLayer("0", OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.TANH).nIn(10).nOut(5).build(), "in") .setOutputs("0").build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java index be9568f89..0fc03569c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java @@ -41,8 +41,8 @@ public class TestInvalidConfigurations extends BaseDL4JTest { public static MultiLayerNetwork getDensePlusOutput(int nIn, int nOut) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(10).build()) - .layer(1, new OutputLayer.Builder().nIn(10).nOut(nOut).build()).build(); + .layer(0, DenseLayer.builder().nIn(nIn).nOut(10).build()) + .layer(1, OutputLayer.builder().nIn(10).nOut(nOut).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -52,8 +52,8 @@ public class TestInvalidConfigurations extends BaseDL4JTest { public static MultiLayerNetwork getLSTMPlusRnnOutput(int nIn, int nOut) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(10).build()) - .layer(1, new RnnOutputLayer.Builder().nIn(10).nOut(nOut).build()).build(); + .layer(0, GravesLSTM.builder().nIn(nIn).nOut(10).build()) + .layer(1, RnnOutputLayer.builder().nIn(10).nOut(nOut).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -63,8 +63,8 @@ public class TestInvalidConfigurations extends BaseDL4JTest { public static MultiLayerNetwork getCnnPlusOutputLayer(int depthIn, int inH, int inW, int nOut) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(5).build()) - .layer(1, new OutputLayer.Builder().nOut(nOut).build()) + .layer(0, ConvolutionLayer.builder().nIn(depthIn).nOut(5).build()) + .layer(1, OutputLayer.builder().nOut(nOut).build()) .inputType(InputType.convolutional(inH, inW, depthIn)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -90,8 +90,8 @@ public class TestInvalidConfigurations extends BaseDL4JTest { public void testDenseNout0() { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(0).build()) - .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).build()).build(); + .layer(0, DenseLayer.builder().nIn(10).nOut(0).build()) + .layer(1, OutputLayer.builder().nIn(10).nOut(10).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -147,8 +147,8 @@ public class TestInvalidConfigurations extends BaseDL4JTest { public void testLSTMNOut0() { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new GravesLSTM.Builder().nIn(10).nOut(0).build()) - .layer(1, new RnnOutputLayer.Builder().nIn(10).nOut(10).build()).build(); + .layer(0, GravesLSTM.builder().nIn(10).nOut(0).build()) + .layer(1, RnnOutputLayer.builder().nIn(10).nOut(10).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -178,8 +178,8 @@ public class TestInvalidConfigurations extends BaseDL4JTest { public void testConvolutionalNOut0() { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new ConvolutionLayer.Builder().nIn(5).nOut(0).build()) - .layer(1, new OutputLayer.Builder().nOut(10).build()) + .layer(0, ConvolutionLayer.builder().nIn(5).nOut(0).build()) + .layer(1, OutputLayer.builder().nOut(10).build()) .inputType(InputType.convolutional(10, 10, 5)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -208,9 +208,9 @@ public class TestInvalidConfigurations extends BaseDL4JTest { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Strict) .list() - .layer(0, new ConvolutionLayer.Builder().kernelSize(3, 2).stride(2, 2).padding(0, 0).nOut(5) + .layer(0, ConvolutionLayer.builder().kernelSize(3, 2).stride(2, 2).padding(0, 0).nOut(5) .build()) - .layer(1, new OutputLayer.Builder().nOut(10).build()) + .layer(1, OutputLayer.builder().nOut(10).build()) .inputType(InputType.convolutional(hIn, wIn, depthIn)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -234,9 +234,9 @@ public class TestInvalidConfigurations extends BaseDL4JTest { int wIn = 10; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new ConvolutionLayer.Builder().kernelSize(7, 7).stride(1, 1).padding(0, 0).nOut(5) + .layer(0, ConvolutionLayer.builder().kernelSize(7, 7).stride(1, 1).padding(0, 0).nOut(5) .build()) - .layer(1, new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) + .layer(1, OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(hIn, wIn, depthIn)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -266,9 +266,9 @@ public class TestInvalidConfigurations extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Strict).list() - .layer(0, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(2, 2) + .layer(0, ConvolutionLayer.builder().kernelSize(3, 3).stride(2, 2) .padding(0, 0).nIn(depthIn).nOut(5).build()) - .layer(1, new OutputLayer.Builder().nIn(5 * 4 * 4).nOut(10).activation(Activation.SOFTMAX).build()) + .layer(1, OutputLayer.builder().nIn(5 * 4 * 4).nOut(10).activation(Activation.SOFTMAX).build()) .inputPreProcessor(1, new CnnToFeedForwardPreProcessor()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -299,9 +299,9 @@ public class TestInvalidConfigurations extends BaseDL4JTest { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 3).stride(2, 2).padding(0, 0).nOut(5) + .layer(0, ConvolutionLayer.builder().kernelSize(2, 3).stride(2, 2).padding(0, 0).nOut(5) .build()) - .layer(1, new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) + .layer(1, OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(hIn, wIn, depthIn)).build(); } catch (Exception e) { fail("Did not expect exception with default (truncate)"); @@ -310,9 +310,9 @@ public class TestInvalidConfigurations extends BaseDL4JTest { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Strict) .list() - .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 3).stride(2, 2).padding(0, 0).nOut(5) + .layer(0, ConvolutionLayer.builder().kernelSize(2, 3).stride(2, 2).padding(0, 0).nOut(5) .build()) - .layer(1, new OutputLayer.Builder().nOut(10).build()) + .layer(1, OutputLayer.builder().nOut(10).build()) .inputType(InputType.convolutional(hIn, wIn, depthIn)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -339,9 +339,9 @@ public class TestInvalidConfigurations extends BaseDL4JTest { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Strict) .list() - .layer(0, new SubsamplingLayer.Builder().kernelSize(2, 3).stride(2, 2).padding(0, 0) + .layer(0, SubsamplingLayer.builder().kernelSize(2, 3).stride(2, 2).padding(0, 0) .build()) - .layer(1, new OutputLayer.Builder().nOut(10).build()) + .layer(1, OutputLayer.builder().nOut(10).build()) .inputType(InputType.convolutional(hIn, wIn, depthIn)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -358,84 +358,84 @@ public class TestInvalidConfigurations extends BaseDL4JTest { @Test public void testCnnInvalidKernel() { assertThrows(IllegalStateException.class, () -> { - new ConvolutionLayer.Builder().kernelSize(3, 0).build(); + ConvolutionLayer.builder().kernelSize(3, 0).build(); }); } @Test public void testCnnInvalidKernel2() { assertThrows(IllegalStateException.class, () -> { - new ConvolutionLayer.Builder().kernelSize(2, 2, 2).build(); + ConvolutionLayer.builder().kernelSize(2, 2, 2).build(); }); } @Test public void testCnnInvalidStride() { assertThrows(IllegalStateException.class, () -> { - new ConvolutionLayer.Builder().kernelSize(3, 3).stride(0, 1).build(); + ConvolutionLayer.builder().kernelSize(3, 3).stride(0, 1).build(); }); } @Test public void testCnnInvalidStride2() { assertThrows(IllegalArgumentException.class, () -> { - new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1).build(); + ConvolutionLayer.builder().kernelSize(3, 3).stride(1).build(); }); } @Test public void testCnnInvalidPadding() { assertThrows(IllegalArgumentException.class, () -> { - new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1).padding(-1, 0).build(); + ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1).padding(-1, 0).build(); }); } @Test public void testCnnInvalidPadding2() { assertThrows(IllegalArgumentException.class, () -> { - new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1).padding(0, 0, 0).build(); + ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1).padding(0, 0, 0).build(); }); } @Test public void testSubsamplingInvalidKernel() { assertThrows(IllegalStateException.class, () -> { - new SubsamplingLayer.Builder().kernelSize(3, 0).build(); + SubsamplingLayer.builder().kernelSize(3, 0).build(); }); } @Test public void testSubsamplingInvalidKernel2() { assertThrows(IllegalArgumentException.class, () -> { - new SubsamplingLayer.Builder().kernelSize(2).build(); + SubsamplingLayer.builder().kernelSize(2).build(); }); } @Test public void testSubsamplingInvalidStride() { assertThrows(IllegalStateException.class, () -> { - new SubsamplingLayer.Builder().kernelSize(3, 3).stride(0, 1).build(); + SubsamplingLayer.builder().kernelSize(3, 3).stride(0, 1).build(); }); } @Test public void testSubsamplingInvalidStride2() { assertThrows(RuntimeException.class, () -> { - new SubsamplingLayer.Builder().kernelSize(3, 3).stride(1, 1, 1).build(); + SubsamplingLayer.builder().kernelSize(3, 3).stride(1, 1, 1).build(); }); } @Test public void testSubsamplingInvalidPadding() { assertThrows(IllegalArgumentException.class, () -> { - new SubsamplingLayer.Builder().kernelSize(3, 3).stride(1, 1).padding(-1, 0).build(); + SubsamplingLayer.builder().kernelSize(3, 3).stride(1, 1).padding(-1, 0).build(); }); } @Test public void testSubsamplingInvalidPadding2() { assertThrows(RuntimeException.class, () -> { - new SubsamplingLayer.Builder().kernelSize(3, 3).stride(1, 1).padding(0).build(); + SubsamplingLayer.builder().kernelSize(3, 3).stride(1, 1).padding(0).build(); }); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidInput.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidInput.java index 4e35f44eb..70ab96855 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidInput.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidInput.java @@ -43,8 +43,8 @@ public class TestInvalidInput extends BaseDL4JTest { @Test public void testInputNinMismatchDense() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()).build(); + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -64,8 +64,8 @@ public class TestInvalidInput extends BaseDL4JTest { @Test public void testLabelsNOutMismatchOutputLayer() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()).build(); + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -85,8 +85,8 @@ public class TestInvalidInput extends BaseDL4JTest { @Test public void testLabelsNOutMismatchRnnOutputLayer() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new LSTM.Builder().nIn(5).nOut(5).build()) - .layer(1, new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); + .layer(0, LSTM.builder().nIn(5).nOut(5).build()) + .layer(1, RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -112,8 +112,8 @@ public class TestInvalidInput extends BaseDL4JTest { int d = 3; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new ConvolutionLayer.Builder().nIn(d).nOut(5).build()) - .layer(1, new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) + .layer(0, ConvolutionLayer.builder().nIn(d).nOut(5).build()) + .layer(1, OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(h, w, d)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -139,8 +139,8 @@ public class TestInvalidInput extends BaseDL4JTest { int d = 3; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new ConvolutionLayer.Builder().nIn(d).nOut(5).build()) - .layer(1, new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) + .layer(0, ConvolutionLayer.builder().nIn(d).nOut(5).build()) + .layer(1, OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(h, w, d)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -165,8 +165,8 @@ public class TestInvalidInput extends BaseDL4JTest { int d = 3; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new SubsamplingLayer.Builder().kernelSize(2, 2).build()) - .layer(1, new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) + .layer(0, SubsamplingLayer.builder().kernelSize(2, 2).build()) + .layer(1, OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(h, w, d)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -188,8 +188,8 @@ public class TestInvalidInput extends BaseDL4JTest { public void testInputNinMismatchLSTM() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new GravesLSTM.Builder().nIn(5).nOut(5).build()) - .layer(1, new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); + .layer(0, GravesLSTM.builder().nIn(5).nOut(5).build()) + .layer(1, RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -209,8 +209,8 @@ public class TestInvalidInput extends BaseDL4JTest { public void testInputNinMismatchBidirectionalLSTM() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new GravesBidirectionalLSTM.Builder().nIn(5).nOut(5).build()) - .layer(1, new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); + .layer(0, GravesBidirectionalLSTM.builder().nIn(5).nOut(5).build()) + .layer(1, RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -231,8 +231,8 @@ public class TestInvalidInput extends BaseDL4JTest { public void testInputNinMismatchEmbeddingLayer() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new EmbeddingLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()).build(); + .layer(0, EmbeddingLayer.builder().nIn(10).nOut(10).build()) + .layer(1, OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -259,13 +259,13 @@ public class TestInvalidInput extends BaseDL4JTest { LayerConfiguration l; switch (layerType){ case "simple": - l = new SimpleRnn.Builder().nIn(5).nOut(5).build(); + l = SimpleRnn.builder().nIn(5).nOut(5).build(); break; case "lstm": - l = new LSTM.Builder().nIn(5).nOut(5).build(); + l = LSTM.builder().nIn(5).nOut(5).build(); break; case "graves": - l = new GravesLSTM.Builder().nIn(5).nOut(5).build(); + l = GravesLSTM.builder().nIn(5).nOut(5).build(); break; default: throw new RuntimeException(); @@ -273,7 +273,7 @@ public class TestInvalidInput extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(l) - .layer(new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); + .layer(RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java index b83cc07c4..286f93992 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java @@ -88,14 +88,13 @@ public class AttentionLayerTest extends BaseDL4JTest { .activation(Activation.TANH) .updater(new NoOp()) .weightInit(WeightInit.XAVIER) - .list() - .layer(new LSTM.Builder().nOut(layerSize).build()) + .layer(LSTM.builder().nOut(layerSize).build()) .layer( projectInput ? - new SelfAttentionLayer.Builder().nOut(4).nHeads(2).projectInput(true).build() - : new SelfAttentionLayer.Builder().nHeads(1).projectInput(false).build() + SelfAttentionLayer.builder().nOut(4).nHeads(2).projectInput(true).build() + : SelfAttentionLayer.builder().nHeads(1).projectInput(false).build() ) - .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) - .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build()) + .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(nIn)) .build(); @@ -150,13 +149,13 @@ public class AttentionLayerTest extends BaseDL4JTest { .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .list() - .layer(new LSTM.Builder().nOut(layerSize).build()) + .layer(LSTM.builder().nOut(layerSize).build()) .layer( projectInput ? - new LearnedSelfAttentionLayer.Builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build() - : new LearnedSelfAttentionLayer.Builder().nHeads(1).nQueries(numQueries).projectInput(false).build() + LearnedSelfAttentionLayer.builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build() + : LearnedSelfAttentionLayer.builder().nHeads(1).nQueries(numQueries).projectInput(false).build() ) - .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) - .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build()) + .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(nIn)) .build(); @@ -190,13 +189,13 @@ public class AttentionLayerTest extends BaseDL4JTest { .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .list() - .layer(new LSTM.Builder().nOut(layerSize).build()) + .layer(LSTM.builder().nOut(layerSize).build()) .layer( projectInput ? - new LearnedSelfAttentionLayer.Builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build() - : new LearnedSelfAttentionLayer.Builder().nHeads(1).nQueries(numQueries).projectInput(false).build() + LearnedSelfAttentionLayer.builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build() + : LearnedSelfAttentionLayer.builder().nHeads(1).nQueries(numQueries).projectInput(false).build() ) - .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) - .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build()) + .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(nIn)) .build(); @@ -245,10 +244,10 @@ public class AttentionLayerTest extends BaseDL4JTest { .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .list() - .layer(new LSTM.Builder().nOut(layerSize).build()) - .layer(new RecurrentAttentionLayer.Builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) - .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) - .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) + .layer(LSTM.builder().nOut(layerSize).build()) + .layer(RecurrentAttentionLayer.builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build()) + .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(nIn)) .build(); @@ -308,10 +307,10 @@ public class AttentionLayerTest extends BaseDL4JTest { .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .list() - .layer(new LSTM.Builder().nOut(layerSize).build()) - .layer(new RecurrentAttentionLayer.Builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) - .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) - .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) + .layer(LSTM.builder().nOut(layerSize).build()) + .layer(RecurrentAttentionLayer.builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build()) + .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(nIn)) .build(); @@ -367,15 +366,15 @@ public class AttentionLayerTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .graphBuilder() .addInputs("input") - .addLayer("rnnKeys", new SimpleRnn.Builder().nOut(layerSize).build(), "input") - .addLayer("rnnQueries", new SimpleRnn.Builder().nOut(layerSize).build(), "input") - .addLayer("rnnValues", new SimpleRnn.Builder().nOut(layerSize).build(), "input") + .addLayer("rnnKeys", SimpleRnn.builder().nOut(layerSize).build(), "input") + .addLayer("rnnQueries", SimpleRnn.builder().nOut(layerSize).build(), "input") + .addLayer("rnnValues", SimpleRnn.builder().nOut(layerSize).build(), "input") .addVertex("attention", projectInput ? new AttentionVertex.Builder().nOut(4).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build() : new AttentionVertex.Builder().nOut(3).nHeads(1).projectInput(false).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "rnnQueries", "rnnKeys", "rnnValues") - .addLayer("pooling", new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build(), "attention") - .addLayer("output", new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling") + .addLayer("pooling", GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build(), "attention") + .addLayer("output", OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling") .setOutputs("output") .setInputTypes(InputType.recurrent(nIn)) .build(); @@ -431,13 +430,13 @@ public class AttentionLayerTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .graphBuilder() .addInputs("input") - .addLayer("rnn", new SimpleRnn.Builder().activation(Activation.TANH).nOut(layerSize).build(), "input") + .addLayer("rnn", SimpleRnn.builder().activation(Activation.TANH).nOut(layerSize).build(), "input") .addVertex("attention", projectInput ? new AttentionVertex.Builder().nOut(4).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build() : new AttentionVertex.Builder().nOut(4).nHeads(1).projectInput(false).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "rnn", "rnn", "rnn") - .addLayer("pooling", new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build(), "attention") - .addLayer("output", new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling") + .addLayer("pooling", GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build(), "attention") + .addLayer("output", OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling") .setOutputs("output") .setInputTypes(InputType.recurrent(nIn)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java index 0380ed2a0..54c7e7763 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java @@ -78,11 +78,11 @@ public class BNGradientCheckTest extends BaseDL4JTest { .dataType(DataType.DOUBLE) .seed(12345L) .dist(new NormalDistribution(0, 1)).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3) + .layer(0, DenseLayer.builder().nIn(4).nOut(3) .activation(Activation.IDENTITY).build()) - .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).nOut(3).build()) - .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1,BatchNormalization.builder().useLogStd(useLogStd).nOut(3).build()) + .layer(2, ActivationLayer.builder().activation(Activation.TANH).build()) + .layer(3, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3).build()); MultiLayerNetwork mln = new MultiLayerNetwork(builder.build()); @@ -122,11 +122,11 @@ public class BNGradientCheckTest extends BaseDL4JTest { .dataType(DataType.DOUBLE) .updater(new NoOp()).seed(12345L) .dist(new NormalDistribution(0, 2)).list() - .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) + .layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) .activation(Activation.IDENTITY).build()) - .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).build()) - .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1,BatchNormalization.builder().useLogStd(useLogStd).build()) + .layer(2, ActivationLayer.builder().activation(Activation.TANH).build()) + .layer(3, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) .inputType(InputType.convolutional(hw, hw, depth)); @@ -193,14 +193,14 @@ public class BNGradientCheckTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) .updater(new NoOp()) .dist(new UniformDistribution(-2, 2)).seed(12345L).list() - .layer(0, new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3) + .layer(0, ConvolutionLayer.builder(2, 2).stride(1, 1).nOut(3) .activation(afn).build()) - .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).build()) - .layer(2, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .layer(1,BatchNormalization.builder().useLogStd(useLogStd).build()) + .layer(2, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(1, 1).build()) - .layer(3, new BatchNormalization()) - .layer(4, new ActivationLayer.Builder().activation(afn).build()) - .layer(5, new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut) + .layer(3, BatchNormalization.builder().build()) + .layer(4, ActivationLayer.builder().activation(afn).build()) + .layer(5, OutputLayer.builder(lf).activation(outputActivation).nOut(nOut) .build()) .inputType(InputType.convolutional(hw, hw, depth)); @@ -300,12 +300,12 @@ public class BNGradientCheckTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .updater(new NoOp()) .dist(new UniformDistribution(-2, 2)).seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(4) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(4) .activation(afn).build()) - .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).build()) - .layer(2, new DenseLayer.Builder().nIn(4).nOut(4).build()) - .layer(3, new BatchNormalization.Builder().useLogStd(useLogStd).build()) - .layer(4, new OutputLayer.Builder(lf) + .layer(1,BatchNormalization.builder().useLogStd(useLogStd).build()) + .layer(2, DenseLayer.builder().nIn(4).nOut(4).build()) + .layer(3,BatchNormalization.builder().useLogStd(useLogStd).build()) + .layer(4, OutputLayer.builder(lf) .activation(outputActivation).nOut(nOut) .build()); @@ -373,11 +373,11 @@ public class BNGradientCheckTest extends BaseDL4JTest { .dataType(DataType.DOUBLE) .seed(12345L) .dist(new NormalDistribution(0, 1)).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY).build()) - .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).lockGammaBeta(true).gamma(2.0).beta(0.5).nOut(3) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).activation(Activation.IDENTITY).build()) + .layer(1,BatchNormalization.builder().useLogStd(useLogStd).lockGammaBeta(true).gamma(2.0).beta(0.5).nOut(3) .build()) - .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, ActivationLayer.builder().activation(Activation.TANH).build()) + .layer(3, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3).build()); MultiLayerNetwork mln = new MultiLayerNetwork(builder.build()); @@ -417,11 +417,11 @@ public class BNGradientCheckTest extends BaseDL4JTest { .dataType(DataType.DOUBLE) .seed(12345L) .dist(new NormalDistribution(0, 2)).list() - .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) + .layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) .activation(Activation.IDENTITY).build()) - .layer(1, new BatchNormalization.Builder().useLogStd(useLogStd).lockGammaBeta(true).gamma(2.0).beta(0.5).build()) - .layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1,BatchNormalization.builder().useLogStd(useLogStd).lockGammaBeta(true).gamma(2.0).beta(0.5).build()) + .layer(2, ActivationLayer.builder().activation(Activation.TANH).build()) + .layer(3, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) .inputType(InputType.convolutional(hw, hw, depth)); @@ -460,8 +460,8 @@ public class BNGradientCheckTest extends BaseDL4JTest { .dataType(DataType.DOUBLE) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") .setInputTypes(InputType.convolutional(height, width, channels)) - .addLayer("bn", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "in") - .addLayer("out", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .addLayer("bn",BatchNormalization.builder().useLogStd(useLogStd).build(), "in") + .addLayer("out", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(numClasses).build(), "bn") .setOutputs("out").build(); @@ -531,14 +531,14 @@ public class BNGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()) .dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder() .addInputs("in") - .addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3) + .addLayer("0", ConvolutionLayer.builder(2, 2).stride(1, 1).nOut(3) .activation(afn).build(), "in") - .addLayer("1", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "0") - .addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .addLayer("1",BatchNormalization.builder().useLogStd(useLogStd).build(), "0") + .addLayer("2", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(1, 1).build(), "1") - .addLayer("3", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "2") - .addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3") - .addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation) + .addLayer("3",BatchNormalization.builder().useLogStd(useLogStd).build(), "2") + .addLayer("4", ActivationLayer.builder().activation(afn).build(), "3") + .addLayer("5", OutputLayer.builder(lf).activation(outputActivation) .nOut(nOut).build(), "4") .setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java index 0f474bb16..b957fde47 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java @@ -20,6 +20,9 @@ package org.deeplearning4j.gradientcheck; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; @@ -42,483 +45,664 @@ import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - @Slf4j public class CNN1DGradientCheckTest extends BaseDL4JTest { - private static final boolean PRINT_RESULTS = true; - private static final boolean RETURN_ON_FIRST_FAILURE = false; - private static final double DEFAULT_EPS = 1e-6; - private static final double DEFAULT_MAX_REL_ERROR = 1e-3; - private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; + private static final boolean PRINT_RESULTS = true; + private static final boolean RETURN_ON_FIRST_FAILURE = false; + private static final double DEFAULT_EPS = 1e-6; + private static final double DEFAULT_MAX_REL_ERROR = 1e-3; + private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; - static { - Nd4j.setDataType(DataType.DOUBLE); - } + static { + Nd4j.setDataType(DataType.DOUBLE); + } - @Test - public void testCnn1DWithLocallyConnected1D() { - Nd4j.getRandom().setSeed(1337); + @Test + public void testCnn1DWithLocallyConnected1D() { + Nd4j.getRandom().setSeed(1337); - int[] minibatchSizes = {2, 3}; - int length = 7; - int convNIn = 2; - int convNOut1 = 3; - int convNOut2 = 4; - int finalNOut = 4; + int[] minibatchSizes = {2, 3}; + int length = 7; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; - int[] kernels = {1}; - int stride = 1; - int padding = 0; + int[] kernels = {1}; + int stride = 1; + int padding = 0; - Activation[] activations = {Activation.SIGMOID}; - - for (Activation afn : activations) { - for (int minibatchSize : minibatchSizes) { - for (int kernel : kernels) { - INDArray input = Nd4j.rand(minibatchSize, convNIn, length); - INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length); - for (int i = 0; i < minibatchSize; i++) { - for (int j = 0; j < length; j++) { - labels.putScalar(new int[]{i, i % finalNOut, j}, 1.0); - } - } - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .updater(new NoOp()) - .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list() - .layer(new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel) - .stride(stride).padding(padding).nIn(convNIn).nOut(convNOut1) - .rnnDataFormat(RNNFormat.NCW) - .build()) - .layer(new LocallyConnected1D.Builder().activation(afn).kernelSize(kernel) - .stride(stride).padding(padding).nIn(convNOut1).nOut(convNOut2).hasBias(false) - .build()) - .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .inputType(InputType.recurrent(convNIn, length)).build(); - - String json = conf.toJson(); - NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); - assertEquals(conf, c2); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - String msg = "Minibatch=" + minibatchSize + ", activationFn=" - + afn + ", kernel = " + kernel; - - if (PRINT_RESULTS) { - System.out.println(msg); -// for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); - } - - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(gradOK, msg); - - TestUtils.testModelSerialization(net); - } + Activation[] activations = {Activation.SIGMOID}; + for (Activation afn : activations) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < length; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .list() + .layer( + Convolution1DLayer.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nIn(convNIn) + .nOut(convNOut1) + .rnnDataFormat(RNNFormat.NCW) + .build()) + .layer( + LocallyConnected1D.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nIn(convNOut1) + .nOut(convNOut2) + .hasBias(false) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "Minibatch=" + minibatchSize + ", activationFn=" + afn + ", kernel = " + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + TestUtils.testModelSerialization(net); } + } } + } + @Test + public void testCnn1DWithCropping1D() { + Nd4j.getRandom().setSeed(1337); - @Test - public void testCnn1DWithCropping1D() { - Nd4j.getRandom().setSeed(1337); + int[] minibatchSizes = {1, 3}; + int length = 7; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; - int[] minibatchSizes = {1, 3}; - int length = 7; - int convNIn = 2; - int convNOut1 = 3; - int convNOut2 = 4; - int finalNOut = 4; + int[] kernels = {1, 2, 4}; + int stride = 1; + int padding = 0; + int cropping = 1; + int croppedLength = length - 2 * cropping; - int[] kernels = {1, 2, 4}; - int stride = 1; + Activation[] activations = {Activation.SIGMOID}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; - int padding = 0; - int cropping = 1; - int croppedLength = length - 2 * cropping; - - Activation[] activations = {Activation.SIGMOID}; - SubsamplingLayer.PoolingType[] poolingTypes = - new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, - SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; - - for (Activation afn : activations) { - for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { - for (int minibatchSize : minibatchSizes) { - for (int kernel : kernels) { - INDArray input = Nd4j.rand(minibatchSize, convNIn, length); - INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, croppedLength); - for (int i = 0; i < minibatchSize; i++) { - for (int j = 0; j < croppedLength; j++) { - labels.putScalar(new int[]{i, i % finalNOut, j}, 1.0); - } - } - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .updater(new NoOp()) - .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list() - .layer(new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel) - .stride(stride).padding(padding).nOut(convNOut1) - .build()) - .layer(new Cropping1D.Builder(cropping).build()) - .layer(new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel) - .stride(stride).padding(padding).nOut(convNOut2) - .build()) - .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .inputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); - - String json = conf.toJson(); - NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); - assertEquals(conf, c2); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" - + afn + ", kernel = " + kernel; - - if (PRINT_RESULTS) { - System.out.println(msg); -// for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); - } - - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(gradOK, msg); - - TestUtils.testModelSerialization(net); - } - } + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, croppedLength); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < croppedLength; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } } - } - } - - @Test - public void testCnn1DWithZeroPadding1D() { - Nd4j.getRandom().setSeed(1337); - - int[] minibatchSizes = {1, 3}; - int length = 7; - int convNIn = 2; - int convNOut1 = 3; - int convNOut2 = 4; - int finalNOut = 4; - - - int[] kernels = {1, 2, 4}; - int stride = 1; - int pnorm = 2; - - int padding = 0; - int zeroPadding = 2; - int paddedLength = length + 2 * zeroPadding; - - Activation[] activations = {Activation.SIGMOID}; - SubsamplingLayer.PoolingType[] poolingTypes = - new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, - SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; - - for (Activation afn : activations) { - for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { - for (int minibatchSize : minibatchSizes) { - for (int kernel : kernels) { - INDArray input = Nd4j.rand(minibatchSize, convNIn, length); - INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, paddedLength); - for (int i = 0; i < minibatchSize; i++) { - for (int j = 0; j < paddedLength; j++) { - labels.putScalar(new int[]{i, i % finalNOut, j}, 1.0); - } - } - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .updater(new NoOp()) - .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list() - .layer(new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel) - .stride(stride).padding(padding).nOut(convNOut1) - .build()) - .layer(new ZeroPadding1DLayer.Builder(zeroPadding).build()) - .layer(new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel) - .stride(stride).padding(padding).nOut(convNOut2) - .build()) - .layer(new ZeroPadding1DLayer.Builder(0).build()) - .layer(new Subsampling1DLayer.Builder(poolingType).kernelSize(kernel) - .stride(stride).padding(padding).pnorm(pnorm).build()) - .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .inputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); - - String json = conf.toJson(); - NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); - assertEquals(conf, c2); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" - + afn + ", kernel = " + kernel; - - if (PRINT_RESULTS) { - System.out.println(msg); -// for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); - } - - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(gradOK, msg); - TestUtils.testModelSerialization(net); - } - } - } - } - } - - - @Test - public void testCnn1DWithSubsampling1D() { - Nd4j.getRandom().setSeed(12345); - - int[] minibatchSizes = {1, 3}; - int length = 7; - int convNIn = 2; - int convNOut1 = 3; - int convNOut2 = 4; - int finalNOut = 4; - - int[] kernels = {1, 2, 4}; - int stride = 1; - int padding = 0; - int pnorm = 2; - - Activation[] activations = {Activation.SIGMOID, Activation.TANH}; - SubsamplingLayer.PoolingType[] poolingTypes = - new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, - SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; - - for (Activation afn : activations) { - for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { - for (int minibatchSize : minibatchSizes) { - for (int kernel : kernels) { - INDArray input = Nd4j.rand(minibatchSize, convNIn, length); - INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length); - for (int i = 0; i < minibatchSize; i++) { - for (int j = 0; j < length; j++) { - labels.putScalar(new int[]{i, i % finalNOut, j}, 1.0); - } - } - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .updater(new NoOp()) - .dist(new NormalDistribution(0, 1)).convolutionMode(ConvolutionMode.Same).list() - .layer(0, new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel) - .stride(stride).padding(padding).nOut(convNOut1) - .build()) - .layer(1, new Convolution1DLayer.Builder().activation(afn).kernelSize(kernel) - .stride(stride).padding(padding).nOut(convNOut2) - .build()) - .layer(2, new Subsampling1DLayer.Builder(poolingType).kernelSize(kernel) - .stride(stride).padding(padding).pnorm(pnorm).build()) - .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .inputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); - - String json = conf.toJson(); - NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); - assertEquals(conf, c2); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" - + afn + ", kernel = " + kernel; - - if (PRINT_RESULTS) { - System.out.println(msg); -// for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); - } - - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(gradOK, msg); - TestUtils.testModelSerialization(net); - } - } - } - } - } - - @Test - public void testCnn1dWithMasking(){ - int length = 12; - int convNIn = 2; - int convNOut1 = 3; - int convNOut2 = 4; - int finalNOut = 3; - - int pnorm = 2; - - SubsamplingLayer.PoolingType[] poolingTypes = - new SubsamplingLayer.PoolingType[] {SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG}; - - for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { - for(ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Same, ConvolutionMode.Truncate}) { - for( int stride : new int[]{1, 2}){ - String s = cm + ", stride=" + stride + ", pooling=" + poolingType; - log.info("Starting test: " + s); - Nd4j.getRandom().setSeed(12345); - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .updater(new NoOp()) - .activation(Activation.TANH) - .dist(new NormalDistribution(0, 1)).convolutionMode(cm) - .seed(12345) - .list() - .layer(new Convolution1DLayer.Builder().kernelSize(2) - .rnnDataFormat(RNNFormat.NCW) - .stride(stride).nIn(convNIn).nOut(convNOut1) - .build()) - .layer(new Subsampling1DLayer.Builder(poolingType).kernelSize(2) - .stride(stride).pnorm(pnorm).build()) - .layer(new Convolution1DLayer.Builder().kernelSize(2) - .rnnDataFormat(RNNFormat.NCW) - .stride(stride).nIn(convNOut1).nOut(convNOut2) - .build()) - .layer(new GlobalPoolingLayer(PoolingType.AVG)) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .inputType(InputType.recurrent(convNIn, length)).build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - INDArray f = Nd4j.rand(2, convNIn, length); - INDArray fm = Nd4j.create(2, length); - fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1); - fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0,6)).assign(1); - - INDArray label = TestUtils.randomOneHot(2, finalNOut); - - boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(f) - .labels(label).inputMask(fm)); - - assertTrue(gradOK, s); - TestUtils.testModelSerialization(net); - - //TODO also check that masked step values don't impact forward pass, score or gradients - - DataSet ds = new DataSet(f,label,fm,null); - double scoreBefore = net.score(ds); - net.setInput(f); - net.setLabels(label); - net.setLayerMaskArrays(fm, null); - net.computeGradientAndScore(); - INDArray gradBefore = net.getFlattenedGradients().dup(); - f.putScalar(1, 0, 10, 10.0); - f.putScalar(1, 1, 11, 20.0); - double scoreAfter = net.score(ds); - net.setInput(f); - net.setLabels(label); - net.setLayerMaskArrays(fm, null); - net.computeGradientAndScore(); - INDArray gradAfter = net.getFlattenedGradients().dup(); - - assertEquals(scoreBefore, scoreAfter, 1e-6); - assertEquals(gradBefore, gradAfter); - } - } - } - } - - @Test - public void testCnn1Causal() throws Exception { - int convNIn = 2; - int convNOut1 = 3; - int convNOut2 = 4; - int finalNOut = 3; - - int[] lengths = {11, 12, 13, 9, 10, 11}; - int[] kernels = {2, 3, 2, 4, 2, 3}; - int[] dilations = {1, 1, 2, 1, 2, 1}; - int[] strides = {1, 2, 1, 2, 1, 1}; - boolean[] masks = {false, true, false, true, false, true}; - boolean[] hasB = {true, false, true, false, true, true}; - for (int i = 0; i < lengths.length; i++) { - int length = lengths[i]; - int k = kernels[i]; - int d = dilations[i]; - int st = strides[i]; - boolean mask = masks[i]; - boolean hasBias = hasB[i]; - //TODO has bias - String s = "k=" + k + ", s=" + st + " d=" + d + ", seqLen=" + length; - log.info("Starting test: " + s); - Nd4j.getRandom().setSeed(12345); - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) - .activation(Activation.TANH) - .weightInit(new NormalDistribution(0, 1)) - .seed(12345) - .list() - .layer(new Convolution1DLayer.Builder().kernelSize(k) - .dilation(d) - .hasBias(hasBias) - .convolutionMode(ConvolutionMode.Causal) - .stride(st).nOut(convNOut1) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DLayer.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut1) .build()) - .layer(new Convolution1DLayer.Builder().kernelSize(k) - .dilation(d) - .convolutionMode(ConvolutionMode.Causal) - .stride(st).nOut(convNOut2) + .layer(Cropping1D.builder(cropping).build()) + .layer( + Convolution1DLayer.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut2) .build()) - .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nOut(finalNOut).build()) - .inputType(InputType.recurrent(convNIn, length,RNNFormat.NCW)).build(); + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - INDArray f = Nd4j.rand(DataType.DOUBLE, 2, convNIn, length); - INDArray fm = null; - if (mask) { - fm = Nd4j.create(2, length); - fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1); - fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, length - 2)).assign(1); + String msg = + "PoolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn + + ", kernel = " + + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); } - long outSize1 = Convolution1DUtils.getOutputSize(length, k, st, 0, ConvolutionMode.Causal, d); - long outSize2 = Convolution1DUtils.getOutputSize(outSize1, k, st, 0, ConvolutionMode.Causal, d); + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); - INDArray label = TestUtils.randomOneHotTimeSeries(2, finalNOut, (int)outSize2); + assertTrue(gradOK, msg); - boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(f) - .labels(label).inputMask(fm)); - - assertTrue(gradOK, s); TestUtils.testModelSerialization(net); + } } + } } + } + + @Test + public void testCnn1DWithZeroPadding1D() { + Nd4j.getRandom().setSeed(1337); + + int[] minibatchSizes = {1, 3}; + int length = 7; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; + + int[] kernels = {1, 2, 4}; + int stride = 1; + int pnorm = 2; + + int padding = 0; + int zeroPadding = 2; + int paddedLength = length + 2 * zeroPadding; + + Activation[] activations = {Activation.SIGMOID}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, paddedLength); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < paddedLength; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DLayer.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut1) + .build()) + .layer(ZeroPadding1DLayer.builder(zeroPadding).build()) + .layer( + Convolution1DLayer.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut2) + .build()) + .layer(ZeroPadding1DLayer.builder(0).build()) + .layer( + Subsampling1DLayer.builder(poolingType) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .pnorm(pnorm) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "PoolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn + + ", kernel = " + + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + TestUtils.testModelSerialization(net); + } + } + } + } + } + + @Test + public void testCnn1DWithSubsampling1D() { + Nd4j.getRandom().setSeed(12345); + + int[] minibatchSizes = {1, 3}; + int length = 7; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; + + int[] kernels = {1, 2, 4}; + int stride = 1; + int padding = 0; + int pnorm = 2; + + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < length; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .list() + .layer( + 0, + Convolution1DLayer.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut1) + .build()) + .layer( + 1, + Convolution1DLayer.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut2) + .build()) + .layer( + 2, + Subsampling1DLayer.builder(poolingType) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .pnorm(pnorm) + .build()) + .layer( + 3, + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "PoolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn + + ", kernel = " + + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + TestUtils.testModelSerialization(net); + } + } + } + } + } + + @Test + public void testCnn1dWithMasking() { + int length = 12; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 3; + + int pnorm = 2; + + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG + }; + + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (ConvolutionMode cm : + new ConvolutionMode[] {ConvolutionMode.Same, ConvolutionMode.Truncate}) { + for (int stride : new int[] {1, 2}) { + String s = cm + ", stride=" + stride + ", pooling=" + poolingType; + log.info("Starting test: " + s); + Nd4j.getRandom().setSeed(12345); + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .activation(Activation.TANH) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(cm) + .seed(12345) + .list() + .layer( + Convolution1DLayer.builder() + .kernelSize(2) + .rnnDataFormat(RNNFormat.NCW) + .stride(stride) + .nIn(convNIn) + .nOut(convNOut1) + .build()) + .layer( + Subsampling1DLayer.builder(poolingType) + .kernelSize(2) + .stride(stride) + .pnorm(pnorm) + .build()) + .layer( + Convolution1DLayer.builder() + .kernelSize(2) + .rnnDataFormat(RNNFormat.NCW) + .stride(stride) + .nIn(convNOut1) + .nOut(convNOut2) + .build()) + .layer( GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build()) + .layer( + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray f = Nd4j.rand(2, convNIn, length); + INDArray fm = Nd4j.create(2, length); + fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1); + fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, 6)).assign(1); + + INDArray label = TestUtils.randomOneHot(2, finalNOut); + + boolean gradOK = + GradientCheckUtil.checkGradients( + new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm)); + + assertTrue(gradOK, s); + TestUtils.testModelSerialization(net); + + // TODO also check that masked step values don't impact forward pass, score or gradients + + DataSet ds = new DataSet(f, label, fm, null); + double scoreBefore = net.score(ds); + net.setInput(f); + net.setLabels(label); + net.setLayerMaskArrays(fm, null); + net.computeGradientAndScore(); + INDArray gradBefore = net.getFlattenedGradients().dup(); + f.putScalar(1, 0, 10, 10.0); + f.putScalar(1, 1, 11, 20.0); + double scoreAfter = net.score(ds); + net.setInput(f); + net.setLabels(label); + net.setLayerMaskArrays(fm, null); + net.computeGradientAndScore(); + INDArray gradAfter = net.getFlattenedGradients().dup(); + + assertEquals(scoreBefore, scoreAfter, 1e-6); + assertEquals(gradBefore, gradAfter); + } + } + } + } + + @Test + public void testCnn1Causal() throws Exception { + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 3; + + int[] lengths = {11, 12, 13, 9, 10, 11}; + int[] kernels = {2, 3, 2, 4, 2, 3}; + int[] dilations = {1, 1, 2, 1, 2, 1}; + int[] strides = {1, 2, 1, 2, 1, 1}; + boolean[] masks = {false, true, false, true, false, true}; + boolean[] hasB = {true, false, true, false, true, true}; + for (int i = 0; i < lengths.length; i++) { + int length = lengths[i]; + int k = kernels[i]; + int d = dilations[i]; + int st = strides[i]; + boolean mask = masks[i]; + boolean hasBias = hasB[i]; + // TODO has bias + String s = "k=" + k + ", s=" + st + " d=" + d + ", seqLen=" + length; + log.info("Starting test: " + s); + Nd4j.getRandom().setSeed(12345); + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .activation(Activation.TANH) + .weightInit(new NormalDistribution(0, 1)) + .seed(12345) + .list() + .layer( + Convolution1DLayer.builder() + .kernelSize(k) + .dilation(d) + .hasBias(hasBias) + .convolutionMode(ConvolutionMode.Causal) + .stride(st) + .nOut(convNOut1) + .build()) + .layer( + Convolution1DLayer.builder() + .kernelSize(k) + .dilation(d) + .convolutionMode(ConvolutionMode.Causal) + .stride(st) + .nOut(convNOut2) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray f = Nd4j.rand(DataType.DOUBLE, 2, convNIn, length); + INDArray fm = null; + if (mask) { + fm = Nd4j.create(2, length); + fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1); + fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, length - 2)).assign(1); + } + + long outSize1 = Convolution1DUtils.getOutputSize(length, k, st, 0, ConvolutionMode.Causal, d); + long outSize2 = + Convolution1DUtils.getOutputSize(outSize1, k, st, 0, ConvolutionMode.Causal, d); + + INDArray label = TestUtils.randomOneHotTimeSeries(2, finalNOut, (int) outSize2); + + boolean gradOK = + GradientCheckUtil.checkGradients( + new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm)); + + assertTrue(gradOK, s); + TestUtils.testModelSerialization(net); + } + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java index ba60ca557..154716fb3 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java @@ -115,16 +115,16 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) .dist(new NormalDistribution(0, 1)) .list() - .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(kernel) + .layer(0, Convolution3D.builder().activation(afn).kernelSize(kernel) .stride(stride).nIn(convNIn).nOut(convNOut1).hasBias(false) .convolutionMode(mode).dataFormat(df) .build()) - .layer(1, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) + .layer(1, Convolution3D.builder().activation(afn).kernelSize(1, 1, 1) .nIn(convNOut1).nOut(convNOut2).hasBias(false) .convolutionMode(mode).dataFormat(df) .build()) - .layer(2, new DenseLayer.Builder().nOut(denseNOut).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, DenseLayer.builder().nOut(denseNOut).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) .inputPreProcessor(2, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, @@ -218,17 +218,17 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) .dist(new NormalDistribution(0, 1)) .list() - .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(kernel) + .layer(0, Convolution3D.builder().activation(afn).kernelSize(kernel) .nIn(convNIn).nOut(convNOut1).hasBias(false) .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) .build()) - .layer(1, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) + .layer(1, Convolution3D.builder().activation(afn).kernelSize(1, 1, 1) .nIn(convNOut1).nOut(convNOut2).hasBias(false) .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) .build()) - .layer(2, new ZeroPadding3DLayer.Builder(zeroPadding).build()) - .layer(3, new DenseLayer.Builder().nOut(denseNOut).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, ZeroPadding3DLayer.builder(zeroPadding).build()) + .layer(3, DenseLayer.builder().nOut(denseNOut).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) .inputPreProcessor(3, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, @@ -314,14 +314,14 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .dist(new NormalDistribution(0, 1)) .list() - .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) + .layer(0, Convolution3D.builder().activation(afn).kernelSize(1, 1, 1) .nIn(convNIn).nOut(convNOut).hasBias(false) .convolutionMode(mode).dataFormat(df) .build()) - .layer(1, new Subsampling3DLayer.Builder(kernel) - .poolingType(pool).convolutionMode(mode).dataFormat(df).build()) - .layer(2, new DenseLayer.Builder().nOut(denseNOut).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1,Subsampling3DLayer.builder(kernel) + .poolingType(pool.toPoolingType()).convolutionMode(mode).dataFormat(df).build()) + .layer(2, DenseLayer.builder().nOut(denseNOut).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) .inputPreProcessor(2, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth,convNOut, df)) @@ -401,13 +401,13 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .dist(new NormalDistribution(0, 1)) .seed(12345) .list() - .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) + .layer(0, Convolution3D.builder().activation(afn).kernelSize(1, 1, 1) .nIn(convNIn).nOut(convNOut).hasBias(false) .convolutionMode(mode).dataFormat(df) .build()) - .layer(1, new Upsampling3D.Builder(upsamplingSize[0]).dataFormat(df).build()) - .layer(2, new DenseLayer.Builder().nOut(denseNOut).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1, Upsampling3D.builder(upsamplingSize[0]).dataFormat(df).build()) + .layer(2, DenseLayer.builder().nOut(denseNOut).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) .inputPreProcessor(2, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, @@ -496,17 +496,17 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) .dist(new NormalDistribution(0, 1)) .list() - .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(kernel) + .layer(0, Convolution3D.builder().activation(afn).kernelSize(kernel) .nIn(convNIn).nOut(convNOut1).hasBias(false) .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) .build()) - .layer(1, new Convolution3D.Builder().activation(afn).kernelSize(1, 1, 1) + .layer(1, Convolution3D.builder().activation(afn).kernelSize(1, 1, 1) .nIn(convNOut1).nOut(convNOut2).hasBias(false) .convolutionMode(mode).dataFormat(Convolution3D.DataFormat.NCDHW) .build()) - .layer(2, new Cropping3D.Builder(cropping).build()) - .layer(3, new DenseLayer.Builder().nOut(denseNOut).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, Cropping3D.builder(cropping).build()) + .layer(3, DenseLayer.builder().nOut(denseNOut).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) .inputPreProcessor(3, new Cnn3DToFeedForwardPreProcessor(outDepth, outHeight, outWidth, @@ -595,15 +595,15 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()) .weightInit(new NormalDistribution(0, 0.1)) .list() - .layer(0, new Convolution3D.Builder().activation(afn).kernelSize(kernel) + .layer(0, Convolution3D.builder().activation(afn).kernelSize(kernel) .stride(stride).nIn(convNIn).nOut(dOut).hasBias(false) .convolutionMode(mode).dataFormat(df) .build()) - .layer(1, new Deconvolution3D.Builder().activation(afn).kernelSize(kernel) + .layer(1, Deconvolution3D.builder().activation(afn).kernelSize(kernel) .stride(stride).nOut(dOut).hasBias(false) .convolutionMode(mode).dataFormat(df) .build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) .inputType(InputType.convolutional3D(df, depth, height, width, convNIn)).build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java index d11bd33c6..a3ef0c082 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java @@ -20,6 +20,11 @@ package org.deeplearning4j.gradientcheck; +import static org.deeplearning4j.nn.conf.ConvolutionMode.Same; +import static org.deeplearning4j.nn.conf.ConvolutionMode.Truncate; +import static org.junit.jupiter.api.Assertions.*; + +import java.util.Arrays; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.TestUtils; import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; @@ -35,7 +40,6 @@ import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; @@ -45,1272 +49,1871 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.util.Arrays; - -import static org.deeplearning4j.nn.conf.ConvolutionMode.Same; -import static org.deeplearning4j.nn.conf.ConvolutionMode.Truncate; -import static org.junit.jupiter.api.Assertions.*; - public class CNNGradientCheckTest extends BaseDL4JTest { - private static final boolean PRINT_RESULTS = true; - private static final boolean RETURN_ON_FIRST_FAILURE = false; - private static final double DEFAULT_EPS = 1e-6; - private static final double DEFAULT_MAX_REL_ERROR = 1e-3; - private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; + private static final boolean PRINT_RESULTS = true; + private static final boolean RETURN_ON_FIRST_FAILURE = false; + private static final double DEFAULT_EPS = 1e-6; + private static final double DEFAULT_MAX_REL_ERROR = 1e-3; + private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; - static { - Nd4j.setDataType(DataType.DOUBLE); - } + static { + Nd4j.setDataType(DataType.DOUBLE); + } - private final CNN2DFormat format; + private final CNN2DFormat format; - public CNNGradientCheckTest(CNN2DFormat format){ - this.format = format; - } + public CNNGradientCheckTest(CNN2DFormat format) { + this.format = format; + } - public static Object[] params(){ - return CNN2DFormat.values(); - } + public static Object[] params() { + return CNN2DFormat.values(); + } - @Test - public void testGradientCNNMLN() { - if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format... - return; + @Test + public void testGradientCNNMLN() { + if (this.format != CNN2DFormat.NCHW) // Only test NCHW due to flat input format... + return; - //Parameterized test, testing combinations of: - // (a) activation function - // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') - // (c) Loss function (with specified output activations) - Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; - boolean[] characteristic = {false, true}; //If true: run some backprop steps first + // Parameterized test, testing combinations of: + // (a) activation function + // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic + // mode of operation') + // (c) Loss function (with specified output activations) + Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; + boolean[] characteristic = {false, true}; // If true: run some backprop steps first - LossFunctions.LossFunction[] lossFunctions = - {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; - Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here + LossFunctions.LossFunction[] lossFunctions = { + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE + }; + Activation[] outputActivations = { + Activation.SOFTMAX, Activation.TANH + }; // i.e., lossFunctions[i] used with outputActivations[i] here - DataSet ds = new IrisDataSetIterator(150, 150).next(); - ds.normalizeZeroMeanZeroUnitVariance(); - INDArray input = ds.getFeatures(); - INDArray labels = ds.getLabels(); + DataSet ds = new IrisDataSetIterator(150, 150).next(); + ds.normalizeZeroMeanZeroUnitVariance(); + INDArray input = ds.getFeatures(); + INDArray labels = ds.getLabels(); - for (Activation afn : activFns) { - for (boolean doLearningFirst : characteristic) { - for (int i = 0; i < lossFunctions.length; i++) { - LossFunctions.LossFunction lf = lossFunctions[i]; - Activation outputActivation = outputActivations[i]; + for (Activation afn : activFns) { + for (boolean doLearningFirst : characteristic) { + for (int i = 0; i < lossFunctions.length; i++) { + LossFunctions.LossFunction lf = lossFunctions[i]; + Activation outputActivation = outputActivations[i]; - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) - .weightInit(WeightInit.XAVIER).seed(12345L).list() - .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build()) - .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()) - .inputType(InputType.convolutionalFlat(1, 4, 1)); + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) + .updater(new NoOp()) + .weightInit(WeightInit.XAVIER) + .seed(12345L) + .list() + .layer(0, ConvolutionLayer.builder(1, 1).nOut(6).activation(afn).build()) + .layer(1, OutputLayer.builder(lf).activation(outputActivation).nOut(3).build()) + .inputType(InputType.convolutionalFlat(1, 4, 1)); - NeuralNetConfiguration conf = builder.build(); + NeuralNetConfiguration conf = builder.build(); - MultiLayerNetwork mln = new MultiLayerNetwork(conf); - mln.init(); - String name = new Object() { - }.getClass().getEnclosingMethod().getName(); + MultiLayerNetwork mln = new MultiLayerNetwork(conf); + mln.init(); + String name = new Object() {}.getClass().getEnclosingMethod().getName(); - if (doLearningFirst) { - //Run a number of iterations of learning - mln.setInput(ds.getFeatures()); - mln.setLabels(ds.getLabels()); - mln.computeGradientAndScore(); - double scoreBefore = mln.getScore(); - for (int j = 0; j < 10; j++) - mln.fit(ds); - mln.computeGradientAndScore(); - double scoreAfter = mln.getScore(); - //Can't test in 'characteristic mode of operation' if not learning - String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" - + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation - + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore - + ", scoreAfter=" + scoreAfter + ")"; - assertTrue(scoreAfter < 0.9 * scoreBefore, msg); - } + if (doLearningFirst) { + // Run a number of iterations of learning + mln.setInput(ds.getFeatures()); + mln.setLabels(ds.getLabels()); + mln.computeGradientAndScore(); + double scoreBefore = mln.getScore(); + for (int j = 0; j < 10; j++) mln.fit(ds); + mln.computeGradientAndScore(); + double scoreAfter = mln.getScore(); + // Can't test in 'characteristic mode of operation' if not learning + String msg = + name + + " - score did not (sufficiently) decrease during learning - activationFn=" + + afn + + ", lossFn=" + + lf + + ", outputActivation=" + + outputActivation + + ", doLearningFirst= " + + doLearningFirst + + " (before=" + + scoreBefore + + ", scoreAfter=" + + scoreAfter + + ")"; + assertTrue(scoreAfter < 0.9 * scoreBefore, msg); + } - if (PRINT_RESULTS) { - System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" - + outputActivation + ", doLearningFirst=" + doLearningFirst); -// for (int j = 0; j < mln.getnLayers(); j++) -// System.out.println("ILayer " + j + " # params: " + mln.getLayer(j).numParams()); - } + if (PRINT_RESULTS) { + System.out.println( + name + + " - activationFn=" + + afn + + ", lossFn=" + + lf + + ", outputActivation=" + + outputActivation + + ", doLearningFirst=" + + doLearningFirst); + // for (int j = 0; j < mln.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // mln.getLayer(j).numParams()); + } - boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); + boolean gradOK = + GradientCheckUtil.checkGradients( + mln, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); - assertTrue(gradOK); - TestUtils.testModelSerialization(mln); - } - } + assertTrue(gradOK); + TestUtils.testModelSerialization(mln); } + } } + } + @Test + public void testGradientCNNL1L2MLN() { + if (this.format != CNN2DFormat.NCHW) // Only test NCHW due to flat input format... + return; - @Test - public void testGradientCNNL1L2MLN() { - if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format... - return; + // Parameterized test, testing combinations of: + // (a) activation function + // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic + // mode of operation') + // (c) Loss function (with specified output activations) - //Parameterized test, testing combinations of: - // (a) activation function - // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') - // (c) Loss function (with specified output activations) + DataSet ds = new IrisDataSetIterator(150, 150).next(); + ds.normalizeZeroMeanZeroUnitVariance(); + INDArray input = ds.getFeatures(); + INDArray labels = ds.getLabels(); - DataSet ds = new IrisDataSetIterator(150, 150).next(); - ds.normalizeZeroMeanZeroUnitVariance(); - INDArray input = ds.getFeatures(); - INDArray labels = ds.getLabels(); + // use l2vals[i] with l1vals[i] + double[] l2vals = {0.4, 0.0, 0.4, 0.4}; + double[] l1vals = {0.0, 0.0, 0.5, 0.0}; + double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; + double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; + Activation[] activFns = { + Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS + }; + boolean[] characteristic = {false, true, false, true}; // If true: run some backprop steps first - //use l2vals[i] with l1vals[i] - double[] l2vals = {0.4, 0.0, 0.4, 0.4}; - double[] l1vals = {0.0, 0.0, 0.5, 0.0}; - double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; - double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; - Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS}; - boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first + LossFunctions.LossFunction[] lossFunctions = { + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, + LossFunctions.LossFunction.MSE, + LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, + LossFunctions.LossFunction.MSE + }; + Activation[] outputActivations = { + Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY + }; // i.e., lossFunctions[i] used with outputActivations[i] here - LossFunctions.LossFunction[] lossFunctions = - {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; - Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here + for (int i = 0; i < l2vals.length; i++) { + Activation afn = activFns[i]; + boolean doLearningFirst = characteristic[i]; + LossFunctions.LossFunction lf = lossFunctions[i]; + Activation outputActivation = outputActivations[i]; + double l2 = l2vals[i]; + double l1 = l1vals[i]; - for( int i=0; i (mb,4,2,2) + .layer( + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nIn(2 * 2 * 4) + .nOut(nOut) + .build()) + .inputType(InputType.convolutionalFlat(height, width, inputDepth)) + .build(); - int width = 5; - int height = 5; - int inputDepth = 1; + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); - int[] kernel = {2, 2}; - int blocks = 2; + String msg = + "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" + afn; - String[] activations = {"sigmoid"}; - SubsamplingLayer.PoolingType[] poolingTypes = - new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, - SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; - - for (String afn : activations) { - for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { - INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth); - INDArray labels = Nd4j.zeros(minibatchSize, nOut); - for (int i = 0; i < minibatchSize; i++) { - labels.putScalar(new int[]{i, i % nOut}, 1.0); - } - - NeuralNetConfiguration conf = - NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .updater(new NoOp()) - .dist(new NormalDistribution(0, 1)) - .list().layer(new ConvolutionLayer.Builder(kernel).nIn(inputDepth).hasBias(false) - .nOut(1).build()) //output: (5-2+0)/1+1 = 4 - .layer(new SpaceToDepthLayer.Builder(blocks, SpaceToDepthLayer.DataFormat.NCHW) - .build()) // (mb,1,4,4) -> (mb,4,2,2) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(2 * 2 * 4) - .nOut(nOut).build()) - .inputType(InputType.convolutionalFlat(height, width, inputDepth)) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" - + afn; - - if (PRINT_RESULTS) { - System.out.println(msg); -// for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); - } - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(gradOK, msg); - - TestUtils.testModelSerialization(net); - } + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); } + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + TestUtils.testModelSerialization(net); + } } + } - @Test - public void testCnnWithSpaceToBatch() { - Nd4j.getRandom().setSeed(12345); - int nOut = 4; + @Test + public void testCnnWithSpaceToBatch() { + Nd4j.getRandom().setSeed(12345); + int nOut = 4; - int[] minibatchSizes = {2, 4}; - int width = 5; - int height = 5; - int inputDepth = 1; + int[] minibatchSizes = {2, 4}; + int width = 5; + int height = 5; + int inputDepth = 1; - int[] kernel = {2, 2}; - int[] blocks = {2, 2}; + int[] kernel = {2, 2}; + int[] blocks = {2, 2}; - String[] activations = {"sigmoid", "tanh"}; - SubsamplingLayer.PoolingType[] poolingTypes = - new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, - SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; - - boolean nchw = format == CNN2DFormat.NCHW; - for (String afn : activations) { - for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { - for (int minibatchSize : minibatchSizes) { - long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth}; - INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); - INDArray labels = Nd4j.zeros(4 * minibatchSize, nOut); - for (int i = 0; i < 4 * minibatchSize; i++) { - labels.putScalar(new int[]{i, i % nOut}, 1.0); - } - - NeuralNetConfiguration conf = - NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .updater(new NoOp()).weightInit(new NormalDistribution(0, 1)) - .list() - .layer(new ConvolutionLayer.Builder(kernel) - .nIn(inputDepth).nOut(3) - .dataFormat(format) - .build()) - .layer(new SpaceToBatchLayer.Builder(blocks) - .dataFormat(format) - .build()) //trivial space to batch - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX) - .nOut(nOut).build()) - .inputType(InputType.convolutional(height, width, inputDepth, format)) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - String msg = format + " - poolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" - + afn; - - if (PRINT_RESULTS) { - System.out.println(msg); -// for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); - } - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(gradOK, msg); - - //Also check compgraph: - ComputationGraph cg = net.toComputationGraph(); - gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(cg).inputs(new INDArray[]{input}) - .labels(new INDArray[]{labels})); - assertTrue(gradOK, msg + " - compgraph"); - - TestUtils.testModelSerialization(net); - } - } - } - } - - - @Test - public void testCnnWithUpsampling() { - Nd4j.getRandom().setSeed(12345); - int nOut = 4; - - int[] minibatchSizes = {1, 3}; - int width = 5; - int height = 5; - int inputDepth = 1; - - int[] kernel = {2, 2}; - int[] stride = {1, 1}; - int[] padding = {0, 0}; - int size = 2; - - boolean nchw = format == CNN2DFormat.NCHW; + String[] activations = {"sigmoid", "tanh"}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + boolean nchw = format == CNN2DFormat.NCHW; + for (String afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { for (int minibatchSize : minibatchSizes) { - long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth}; + long[] inShape = + nchw + ? new long[] {minibatchSize, inputDepth, height, width} + : new long[] {minibatchSize, height, width, inputDepth}; + INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); + INDArray labels = Nd4j.zeros(4 * minibatchSize, nOut); + for (int i = 0; i < 4 * minibatchSize; i++) { + labels.putScalar(new int[] {i, i % nOut}, 1.0); + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .weightInit(new NormalDistribution(0, 1)) + .list() + .layer( + ConvolutionLayer.builder(kernel) + .nIn(inputDepth) + .nOut(3) + .convFormat(format) + .build()) + .layer( + SpaceToBatchLayer.builder(blocks) + .dataFormat(format) + .build()) // trivial space to batch + .layer( + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(nOut) + .build()) + .inputType(InputType.convolutional(height, width, inputDepth, format)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + format + + " - poolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + // Also check compgraph: + ComputationGraph cg = net.toComputationGraph(); + gradOK = + GradientCheckUtil.checkGradients( + new GradientCheckUtil.GraphConfig() + .net(cg) + .inputs(new INDArray[] {input}) + .labels(new INDArray[] {labels})); + assertTrue(gradOK, msg + " - compgraph"); + + TestUtils.testModelSerialization(net); + } + } + } + } + + @Test + public void testCnnWithUpsampling() { + Nd4j.getRandom().setSeed(12345); + int nOut = 4; + + int[] minibatchSizes = {1, 3}; + int width = 5; + int height = 5; + int inputDepth = 1; + + int[] kernel = {2, 2}; + int[] stride = {1, 1}; + int[] padding = {0, 0}; + int size = 2; + + boolean nchw = format == CNN2DFormat.NCHW; + + for (int minibatchSize : minibatchSizes) { + long[] inShape = + nchw + ? new long[] {minibatchSize, inputDepth, height, width} + : new long[] {minibatchSize, height, width, inputDepth}; + INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); + INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut); + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .list() + .layer( + ConvolutionLayer.builder(kernel, stride, padding) + .nIn(inputDepth) + .convFormat(format) + .nOut(3) + .build()) // output: (5-2+0)/1+1 = 4 + .layer( + Upsampling2D.builder() + .size(size) + .dataFormat(format) + .build()) // output: 4*2 =8 -> 8x8x3 + .layer( + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nIn(8 * 8 * 3) + .nOut(4) + .build()) + .inputType(InputType.convolutional(height, width, inputDepth, format)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "Upsampling - minibatch=" + minibatchSize; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + TestUtils.testModelSerialization(net); + } + } + + @Test + public void testCnnWithSubsampling() { + Nd4j.getRandom().setSeed(12345); + int nOut = 4; + + int[] minibatchSizes = {1, 3}; + int width = 5; + int height = 5; + int inputDepth = 1; + + int[] kernel = {2, 2}; + int[] stride = {1, 1}; + int[] padding = {0, 0}; + int pnorm = 2; + + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + + boolean nchw = format == CNN2DFormat.NCHW; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + long[] inShape = + nchw + ? new long[] {minibatchSize, inputDepth, height, width} + : new long[] {minibatchSize, height, width, inputDepth}; + INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[] {i, i % nOut}, 1.0); + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .updater(new NoOp()) + .dataType(DataType.DOUBLE) + .dist(new NormalDistribution(0, 1)) + .list() + .layer( + 0, + ConvolutionLayer.builder(kernel, stride, padding) + .nIn(inputDepth) + .convFormat(format) + .nOut(3) + .build()) // output: (5-2+0)/1+1 = 4 + .layer( + 1, + SubsamplingLayer.builder(poolingType) + .dataFormat(format) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .pnorm(pnorm) + .build()) // output: (4-2+0)/1+1 =3 -> 3x3x3 + .layer( + 2, + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nIn(3 * 3 * 3) + .nOut(4) + .build()) + .inputType(InputType.convolutional(height, width, inputDepth, format)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + format + + " - poolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + TestUtils.testModelSerialization(net); + } + } + } + } + + @Test + public void testCnnWithSubsamplingV2() { + Nd4j.getRandom().setSeed(12345); + int nOut = 4; + + int[] minibatchSizes = {1, 3}; + int width = 5; + int height = 5; + int inputDepth = 1; + + int[] kernel = {2, 2}; + int[] stride = {1, 1}; + int[] padding = {0, 0}; + int pNorm = 3; + + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + + boolean nchw = format == CNN2DFormat.NCHW; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + long[] inShape = + nchw + ? new long[] {minibatchSize, inputDepth, height, width} + : new long[] {minibatchSize, height, width, inputDepth}; + INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[] {i, i % nOut}, 1.0); + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .updater(new NoOp()) + .dataType(DataType.DOUBLE) + .dist(new NormalDistribution(0, 1)) + .list() + .layer( + 0, + ConvolutionLayer.builder(kernel, stride, padding) + .nIn(inputDepth) + .convFormat(format) + .nOut(3) + .build()) // output: (5-2+0)/1+1 = 4 + .layer( + 1, + SubsamplingLayer.builder(poolingType) + .dataFormat(format) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .pnorm(pNorm) + .build()) // output: (4-2+0)/1+1 =3 -> 3x3x3 + .layer( + 2, + ConvolutionLayer.builder(kernel, stride, padding) + .convFormat(format) + .nIn(3) + .nOut(2) + .build()) // Output: (3-2+0)/1+1 = 2 + .layer( + 3, + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nIn(2 * 2 * 2) + .nOut(4) + .build()) + .inputType(InputType.convolutional(height, width, inputDepth, format)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "PoolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn; + System.out.println(msg); + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + TestUtils.testModelSerialization(net); + } + } + } + } + + @Test + public void testCnnLocallyConnected2D() { + int nOut = 3; + int width = 5; + int height = 5; + + Nd4j.getRandom().setSeed(12345); + + int[] inputDepths = new int[] {1, 2, 4}; + Activation[] activations = {Activation.SIGMOID, Activation.TANH, Activation.SOFTPLUS}; + int[] minibatch = {2, 1, 3}; + + boolean nchw = format == CNN2DFormat.NCHW; + + for (int i = 0; i < inputDepths.length; i++) { + int inputDepth = inputDepths[i]; + Activation afn = activations[i]; + int minibatchSize = minibatch[i]; + + long[] inShape = + nchw + ? new long[] {minibatchSize, inputDepth, height, width} + : new long[] {minibatchSize, height, width, inputDepth}; + INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); + INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut); + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .seed(12345) + .updater(new NoOp()) + .dataType(DataType.DOUBLE) + .activation(afn) + .list() + .layer( + 0, + ConvolutionLayer.builder() + .kernelSize(2, 2) + .stride(1, 1) + .convFormat(format) + .padding(0, 0) + .nIn(inputDepth) + .nOut(2) + .build()) // output: (5-2+0)/1+1 = 4 + .layer( + 1, + LocallyConnected2D.builder() + .nIn(2) + .nOut(7) + .kernelSize(2, 2) + .dataFormat(format) + .inputSize(new int[]{4, 4}) + .convolutionMode(ConvolutionMode.Strict) + .hasBias(false) + .stride(1, 1) + .padding(0, 0) + .build()) // (4-2+0)/1+1 = 3 + .layer( + 2, + ConvolutionLayer.builder() + .nIn(7) + .nOut(2) + .kernelSize(2, 2) + .convFormat(format) + .stride(1, 1) + .padding(0, 0) + .build()) // (3-2+0)/1+1 = 2 + .layer( + 3, + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nIn(2 * 2 * 2) + .nOut(nOut) + .build()) + .inputType(InputType.convolutional(height, width, inputDepth, format)) + .build(); + + assertEquals(ConvolutionMode.Truncate, conf.getConvolutionMode()); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = "Minibatch=" + minibatchSize + ", activationFn=" + afn; + System.out.println(msg); + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + TestUtils.testModelSerialization(net); + } + } + + @Test + public void testCnnMultiLayer() { + int nOut = 2; + + int[] minibatchSizes = {1, 2, 5}; + int width = 5; + int height = 5; + int[] inputDepths = {1, 2, 4}; + + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG + }; + + Nd4j.getRandom().setSeed(12345); + + boolean nchw = format == CNN2DFormat.NCHW; + + for (int inputDepth : inputDepths) { + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + long[] inShape = + nchw + ? new long[] {minibatchSize, inputDepth, height, width} + : new long[] {minibatchSize, height, width, inputDepth}; INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); - INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut); + + INDArray labels = Nd4j.zeros(minibatchSize, nOut); + for (int i = 0; i < minibatchSize; i++) { + labels.putScalar(new int[] {i, i % nOut}, 1.0); + } NeuralNetConfiguration conf = - NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .updater(new NoOp()) - .dist(new NormalDistribution(0, 1)) - .list().layer(new ConvolutionLayer.Builder(kernel, - stride, padding).nIn(inputDepth) - .dataFormat(format) - .nOut(3).build())//output: (5-2+0)/1+1 = 4 - .layer(new Upsampling2D.Builder().size(size).dataFormat(format).build()) //output: 4*2 =8 -> 8x8x3 - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(8 * 8 * 3) - .nOut(4).build()) - .inputType(InputType.convolutional(height, width, inputDepth, format)) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - String msg = "Upsampling - minibatch=" + minibatchSize; - - if (PRINT_RESULTS) { - System.out.println(msg); -// for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); - } - - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(gradOK, msg); - - TestUtils.testModelSerialization(net); - } - } - - - @Test - public void testCnnWithSubsampling() { - Nd4j.getRandom().setSeed(12345); - int nOut = 4; - - int[] minibatchSizes = {1, 3}; - int width = 5; - int height = 5; - int inputDepth = 1; - - int[] kernel = {2, 2}; - int[] stride = {1, 1}; - int[] padding = {0, 0}; - int pnorm = 2; - - Activation[] activations = {Activation.SIGMOID, Activation.TANH}; - SubsamplingLayer.PoolingType[] poolingTypes = - new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, - SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; - - boolean nchw = format == CNN2DFormat.NCHW; - - for (Activation afn : activations) { - for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { - for (int minibatchSize : minibatchSizes) { - long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth}; - INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); - INDArray labels = Nd4j.zeros(minibatchSize, nOut); - for (int i = 0; i < minibatchSize; i++) { - labels.putScalar(new int[]{i, i % nOut}, 1.0); - } - - NeuralNetConfiguration conf = - NeuralNetConfiguration.builder().updater(new NoOp()) - .dataType(DataType.DOUBLE) - .dist(new NormalDistribution(0, 1)) - .list().layer(0, - new ConvolutionLayer.Builder(kernel, - stride, padding).nIn(inputDepth) - .dataFormat(format) - .nOut(3).build())//output: (5-2+0)/1+1 = 4 - .layer(1, new SubsamplingLayer.Builder(poolingType) - .dataFormat(format) - .kernelSize(kernel).stride(stride).padding(padding) - .pnorm(pnorm).build()) //output: (4-2+0)/1+1 =3 -> 3x3x3 - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(3 * 3 * 3) - .nOut(4).build()) - .inputType(InputType.convolutional(height, width, inputDepth, format)) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - String msg = format + " - poolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" - + afn; - - if (PRINT_RESULTS) { - System.out.println(msg); -// for (int j = 0; j < net.getnLayers(); j++) -// System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); - } - - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(gradOK, msg); - - TestUtils.testModelSerialization(net); - } - } - } - } - - @Test - public void testCnnWithSubsamplingV2() { - Nd4j.getRandom().setSeed(12345); - int nOut = 4; - - int[] minibatchSizes = {1, 3}; - int width = 5; - int height = 5; - int inputDepth = 1; - - int[] kernel = {2, 2}; - int[] stride = {1, 1}; - int[] padding = {0, 0}; - int pNorm = 3; - - Activation[] activations = {Activation.SIGMOID, Activation.TANH}; - SubsamplingLayer.PoolingType[] poolingTypes = - new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX, - SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM}; - - boolean nchw = format == CNN2DFormat.NCHW; - - for (Activation afn : activations) { - for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { - for (int minibatchSize : minibatchSizes) { - long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth}; - INDArray input = Nd4j.rand(DataType.DOUBLE, inShape); - INDArray labels = Nd4j.zeros(minibatchSize, nOut); - for (int i = 0; i < minibatchSize; i++) { - labels.putScalar(new int[]{i, i % nOut}, 1.0); - } - - NeuralNetConfiguration conf = - NeuralNetConfiguration.builder().updater(new NoOp()) - .dataType(DataType.DOUBLE) - .dist(new NormalDistribution(0, 1)) - .list().layer(0, - new ConvolutionLayer.Builder(kernel, - stride, padding).nIn(inputDepth).dataFormat(format) - .nOut(3).build())//output: (5-2+0)/1+1 = 4 - .layer(1, new SubsamplingLayer.Builder(poolingType).dataFormat(format) - .kernelSize(kernel).stride(stride).padding(padding) - .pnorm(pNorm).build()) //output: (4-2+0)/1+1 =3 -> 3x3x3 - .layer(2, new ConvolutionLayer.Builder(kernel, stride, padding).dataFormat(format) - .nIn(3).nOut(2).build()) //Output: (3-2+0)/1+1 = 2 - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(2 * 2 * 2) - .nOut(4).build()) - .inputType(InputType.convolutional(height, width, inputDepth, format)) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn=" - + afn; - System.out.println(msg); - - boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, - DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); - - assertTrue(gradOK, msg); - - TestUtils.testModelSerialization(net); - } - } - } - } - - @Test - public void testCnnLocallyConnected2D() { - int nOut = 3; - int width = 5; - int height = 5; - - Nd4j.getRandom().setSeed(12345); - - int[] inputDepths = new int[]{1, 2, 4}; - Activation[] activations = {Activation.SIGMOID, Activation.TANH, Activation.SOFTPLUS}; - int[] minibatch = {2, 1, 3}; - - boolean nchw = format == CNN2DFormat.NCHW; - - for( int i=0; i 6x6x5 - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(1, 1).build()) //Out: (6-2)/1+1 = 5 -> 5x5x5 - .layer(2, new DenseLayer.Builder().nIn(27).nOut(4).activation(Activation.TANH).build()) - .layer(3, new GravesLSTM.Builder().nIn(4).nOut(3).activation(Activation.TANH).build()) - .layer(4, new RnnOutputLayer.Builder().lossFunction(LossFunction.MCXENT).nIn(3).nOut(nClasses) + .layer(2, DenseLayer.builder().nIn(27).nOut(4).activation(Activation.TANH).build()) + .layer(3, GravesLSTM.builder().nIn(4).nOut(3).activation(Activation.TANH).build()) + .layer(4, RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT).nIn(3).nOut(nClasses) .activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(6, 6, 2)).build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java index 6197f73d3..345c0251f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java @@ -187,8 +187,8 @@ public class LossFunctionGradientCheck extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345) .updater(new NoOp()) .dist(new UniformDistribution(-2, 2)).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()) - .layer(1, new OutputLayer.Builder().lossFunction(lossFunctions[i]) + .layer(0, DenseLayer.builder().nIn(4).nOut(4).activation(Activation.TANH).build()) + .layer(1, OutputLayer.builder().lossFunction(lossFunctions[i]) .activation(outputActivationFn[i]).nIn(4).nOut(nOut[i]).build()) .validateOutputLayerConfig(false) .build(); @@ -351,9 +351,9 @@ public class LossFunctionGradientCheck extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345) .updater(new NoOp()) .dist(new UniformDistribution(-2, 2)).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH) + .layer(0, DenseLayer.builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH) .build()) - .layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i]) + .layer(1, LossLayer.builder().lossFunction(lossFunctions[i]) .activation(outputActivationFn[i]).build()) .validateOutputLayerConfig(false) .build(); @@ -361,7 +361,7 @@ public class LossFunctionGradientCheck extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertSame(((LossLayer) net.getLayer(1).getLayerConfiguration()).getLossFn().getClass(), lossFunctions[i] + assertSame(((LossLayer) net.getLayer(1).getLayerConfiguration()).getLossFunction().getClass(), lossFunctions[i] .getClass()); INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, nOut[i], 12345); @@ -655,9 +655,9 @@ public class LossFunctionGradientCheck extends BaseDL4JTest { // .dist(new UniformDistribution(-3, 3)) .dist(new NormalDistribution(0, 1)) .list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH) + .layer(0, DenseLayer.builder().nIn(4).nOut(4).activation(Activation.TANH) .build()) - .layer(1, new OutputLayer.Builder().lossFunction(lossFunctions[i]) + .layer(1, OutputLayer.builder().lossFunction(lossFunctions[i]) .activation(outputActivationFn[i]).nIn(4).nOut(3).build()) .validateOutputLayerConfig(false) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java index f47a4ee0e..e609e8871 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/NoBiasGradientCheckTests.java @@ -73,19 +73,19 @@ public class NoBiasGradientCheckTests extends BaseDL4JTest { .updater(new NoOp()) .seed(12345L) .list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)) .activation(Activation.TANH) .hasBias(true) //ILayer 0: Always have a bias .build()) - .layer(1, new DenseLayer.Builder().nIn(layerSize).nOut(layerSize) + .layer(1, DenseLayer.builder().nIn(layerSize).nOut(layerSize) .dist(new NormalDistribution(0, 1)) .activation(Activation.TANH) .hasBias(denseHasBias) .build()) - .layer(2, new OutputLayer.Builder(LossFunction.MCXENT) + .layer(2, OutputLayer.builder().lossFunction(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .dist(new NormalDistribution(0, 1)) @@ -144,12 +144,12 @@ public class NoBiasGradientCheckTests extends BaseDL4JTest { .updater(new NoOp()) .seed(12345L) .list() - .layer(0, new LSTM.Builder().nIn(nIn).nOut(layerSize) + .layer(0, LSTM.builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)) .activation(Activation.TANH) .build()) - .layer(1, new RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(1, RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .dist(new NormalDistribution(0, 1)) @@ -205,13 +205,13 @@ public class NoBiasGradientCheckTests extends BaseDL4JTest { .updater(new NoOp()) .seed(12345L) .list() - .layer(0, new EmbeddingLayer.Builder().nIn(nIn).nOut(layerSize) + .layer(0, EmbeddingLayer.builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)) .activation(Activation.TANH) .hasBias(embeddingHasBias) .build()) - .layer(1, new OutputLayer.Builder(LossFunction.MCXENT) + .layer(1, OutputLayer.builder().lossFunction(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .dist(new NormalDistribution(0, 1)) @@ -271,17 +271,17 @@ public class NoBiasGradientCheckTests extends BaseDL4JTest { .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 1)) .list() - .layer(new ConvolutionLayer.Builder(kernel, + .layer(ConvolutionLayer.builder(kernel, stride, padding).nIn(inputDepth) .hasBias(false) .nOut(3).build())//output: (5-2+0)/1+1 = 4 - .layer(new SubsamplingLayer.Builder(PoolingType.MAX) + .layer(SubsamplingLayer.builder(PoolingType.MAX) .kernelSize(kernel).stride(stride).padding(padding) .pnorm(pNorm).build()) //output: (4-2+0)/1+1 =3 -> 3x3x3 - .layer(new ConvolutionLayer.Builder(kernel, stride, padding) + .layer(ConvolutionLayer.builder(kernel, stride, padding) .hasBias(cnnHasBias) .nOut(2).build()) //Output: (3-2+0)/1+1 = 2 - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nOut(4).build()) .inputType(InputType.convolutionalFlat(height, width, inputDepth)) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java index 7556178b9..d49eaf535 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/OutputLayerGradientChecks.java @@ -121,10 +121,10 @@ public class OutputLayerGradientChecks extends BaseDL4JTest { .dataType(DataType.DOUBLE) .updater(new NoOp()) .list() - .layer(new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) + .layer(LSTM.builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build()) - .layer(new RnnLossLayer.Builder(lf) + .layer(RnnLossLayer.builder().lossFunction(lf) .activation(oa) .build()) .validateOutputLayerConfig(false).build(); @@ -228,10 +228,10 @@ public class OutputLayerGradientChecks extends BaseDL4JTest { .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) .list() - .layer(new ConvolutionLayer.Builder().nIn(dIn).nOut(dOut).activation(Activation.TANH) + .layer(ConvolutionLayer.builder().nIn(dIn).nOut(dOut).activation(Activation.TANH) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build()) - .layer(new CnnLossLayer.Builder(lf) + .layer(CnnLossLayer.builder().lossFunction(lf) .activation(oa) .build()) .validateOutputLayerConfig(false).build(); @@ -375,11 +375,11 @@ public class OutputLayerGradientChecks extends BaseDL4JTest { .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) .list() - .layer(new Convolution3D.Builder().nIn(chIn).nOut(chOut).activation(Activation.TANH) + .layer(Convolution3D.builder().nIn(chIn).nOut(chOut).activation(Activation.TANH) .dist(new NormalDistribution(0, 1.0)) .dataFormat(dataFormat) .updater(new NoOp()).build()) - .layer(new Cnn3DLossLayer.Builder(dataFormat) + .layer(Cnn3DLossLayer.builder().dataFormat(dataFormat) .lossFunction(lf) .activation(oa) .build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java index 44e904d7e..08dcc008f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java @@ -112,12 +112,12 @@ public class RnnGradientChecks extends BaseDL4JTest { .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .list() - .layer(new LSTM.Builder().nIn(nIn).nOut(3).build()) - .layer(new Bidirectional(m, + .layer(LSTM.builder().nIn(nIn).nOut(3).build()) + .layer(Bidirectional.builder(m, (simple ? - new SimpleRnn.Builder().nIn(3).nOut(3).hasLayerNorm(hasLayerNorm).build() : - new LSTM.Builder().nIn(3).nOut(3).build()))) - .layer(new RnnOutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).build()) + SimpleRnn.builder().nIn(3).nOut(3).hasLayerNorm(hasLayerNorm).build() : + LSTM.builder().nIn(3).nOut(3).build())).build()) + .layer(RnnOutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); @@ -194,9 +194,9 @@ public class RnnGradientChecks extends BaseDL4JTest { .l1(l1s[l]) .l2(l2s[l]) .list() - .layer(new SimpleRnn.Builder().nIn(nIn).nOut(layerSize).hasLayerNorm(hasLayerNorm).build()) - .layer(new SimpleRnn.Builder().nIn(layerSize).nOut(layerSize).hasLayerNorm(hasLayerNorm).build()) - .layer(new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut) + .layer(SimpleRnn.builder().nIn(nIn).nOut(layerSize).hasLayerNorm(hasLayerNorm).build()) + .layer(SimpleRnn.builder().nIn(layerSize).nOut(layerSize).hasLayerNorm(hasLayerNorm).build()) + .layer(RnnOutputLayer.builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT) .build()) .build(); @@ -267,12 +267,12 @@ public class RnnGradientChecks extends BaseDL4JTest { .activation(Activation.TANH) .updater(new NoOp()) .weightInit(WeightInit.XAVIER) - .list() - .layer(simple ? new SimpleRnn.Builder().nOut(layerSize).hasLayerNorm(hasLayerNorm).build() : - new LSTM.Builder().nOut(layerSize).build()) - .layer(new LastTimeStep(simple ? new SimpleRnn.Builder().nOut(layerSize).hasLayerNorm(hasLayerNorm).build() : - new LSTM.Builder().nOut(layerSize).build())) - .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) + + .layer(simple ? SimpleRnn.builder().nOut(layerSize).hasLayerNorm(hasLayerNorm).build() : + LSTM.builder().nOut(layerSize).build()) + .layer(LastTimeStep.builder().underlying(simple ? SimpleRnn.builder().nOut(layerSize).hasLayerNorm(hasLayerNorm).build() : + LSTM.builder().nOut(layerSize).build()).build()) + .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(nIn)) .build(); @@ -334,9 +334,9 @@ public class RnnGradientChecks extends BaseDL4JTest { .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .list() - .layer(new LSTM.Builder().nOut(layerSize).build()) - .layer(new TimeDistributed(new DenseLayer.Builder().nOut(layerSize).activation(Activation.SOFTMAX).build())) - .layer(new RnnOutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) + .layer(LSTM.builder().nOut(layerSize).build()) + .layer(TimeDistributed.builder().underlying(DenseLayer.builder().nOut(layerSize).activation(Activation.SOFTMAX).build()).build()) + .layer(RnnOutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(nIn)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java index 212bd29da..7b883e689 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/UtilLayerGradientChecks.java @@ -132,27 +132,27 @@ public class UtilLayerGradientChecks extends BaseDL4JTest { InputType it; switch (inputRank){ case 2: - l1 = new DenseLayer.Builder().nOut(3).build(); - l2 = new DenseLayer.Builder().nOut(3).build(); - l3 = new OutputLayer.Builder().nOut(3).lossFunction(LossFunctions.LossFunction.MSE) + l1 = DenseLayer.builder().nOut(3).build(); + l2 = DenseLayer.builder().nOut(3).build(); + l3 = OutputLayer.builder().nOut(3).lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.TANH).build(); it = InputType.feedForward(3); break; case 3: - l1 = new SimpleRnn.Builder().nIn(3).nOut(3).activation(Activation.TANH).build(); - l2 = new SimpleRnn.Builder().nIn(3).nOut(3).activation(Activation.TANH).build(); - l3 = new RnnOutputLayer.Builder().nIn(3).nOut(3).lossFunction(LossFunctions.LossFunction.SQUARED_LOSS) + l1 = SimpleRnn.builder().nIn(3).nOut(3).activation(Activation.TANH).build(); + l2 = SimpleRnn.builder().nIn(3).nOut(3).activation(Activation.TANH).build(); + l3 = RnnOutputLayer.builder().nIn(3).nOut(3).lossFunction(LossFunctions.LossFunction.SQUARED_LOSS) .activation(Activation.IDENTITY).build(); it = InputType.recurrent(3); break; case 4: - l1 = new ConvolutionLayer.Builder().nOut(5).convolutionMode(ConvolutionMode.Truncate) + l1 = ConvolutionLayer.builder().nOut(5).convolutionMode(ConvolutionMode.Truncate) .stride(1,1).kernelSize(2,2).padding(0,0) .build(); - l2 = new ConvolutionLayer.Builder().nOut(5).convolutionMode(ConvolutionMode.Truncate) + l2 = ConvolutionLayer.builder().nOut(5).convolutionMode(ConvolutionMode.Truncate) .stride(1,1).kernelSize(2,2).padding(0,0) .build(); - l3 = new OutputLayer.Builder().nOut(5).lossFunction(LossFunctions.LossFunction.SQUARED_LOSS) + l3 = OutputLayer.builder().nOut(5).lossFunction(LossFunctions.LossFunction.SQUARED_LOSS) .activation(Activation.IDENTITY) .build(); it = InputType.convolutional(5,5,1); @@ -162,19 +162,19 @@ public class UtilLayerGradientChecks extends BaseDL4JTest { } - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .updater(new NoOp()) - .activation(Activation.TANH) - .dataType(DataType.DOUBLE) - .dist(new NormalDistribution(0,2)) - .list() - .layer(l1) - .layer(new MaskLayer()) - .layer(l2) - .layer(l3) - .inputType(it) - .build(); - + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .updater(new NoOp()) + .activation(Activation.TANH) + .dataType(DataType.DOUBLE) + .dist(new NormalDistribution(0, 2)) + .list() + .layer(l1) + .layer(MaskLayer.builder().build()) + .layer(l2) + .layer(l3) + .inputType(it) + .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -201,14 +201,14 @@ public class UtilLayerGradientChecks extends BaseDL4JTest { .seed(12345) .updater(Updater.NONE.getIUpdaterWithDefaultConfig()) .list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10) + .layer(DenseLayer.builder().nIn(10).nOut(10) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()) - .layer(new FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(10).nOut(10) - .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())) - .layer(new FrozenLayerWithBackprop( - new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) - .weightInit(WeightInit.XAVIER).build())) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(FrozenLayerWithBackprop.builder().underlying(DenseLayer.builder().nIn(10).nOut(10) + .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()).build()) + .layer(FrozenLayerWithBackprop.builder().underlying( + DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) + .weightInit(WeightInit.XAVIER).build()).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf2); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java index 233836066..f02d9e986 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/VaeGradientCheckTests.java @@ -99,14 +99,14 @@ public class VaeGradientCheckTests extends BaseDL4JTest { .updater(new NoOp()) .l2Bias(biasL2[i]).l1Bias(biasL1[i]) .updater(new NoOp()).seed(12345L).list() - .layer(0, new VariationalAutoencoder.Builder().nIn(4) + .layer(0, VariationalAutoencoder.builder().nIn(4) .nOut(3).encoderLayerSizes(encoderSizes) .decoderLayerSizes(decoderSizes) .dist(new NormalDistribution(0, 1)) .activation(afn) .build()) - .layer(1, new OutputLayer.Builder(lf) + .layer(1, OutputLayer.builder(lf) .activation(outputActivation).nIn(3).nOut(3) .dist(new NormalDistribution(0, 1)) @@ -173,7 +173,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest { .dataType(DataType.DOUBLE) .l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i]).updater(new NoOp()) .seed(12345L).weightInit(WeightInit.XAVIER).list() - .layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3) + .layer(0, VariationalAutoencoder.builder().nIn(4).nOut(3) .encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes) .pzxActivationFunction(pzxAfn) .reconstructionDistribution( @@ -263,7 +263,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest { .updater(new NoOp()) .seed(12345L).dist(new NormalDistribution(0, 1)) .list().layer(0, - new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3) + VariationalAutoencoder.builder().nIn(inOutSize).nOut(3) .encoderLayerSizes(4).decoderLayerSizes(3) .pzxActivationFunction(Activation.TANH) .reconstructionDistribution( @@ -306,7 +306,7 @@ public class VaeGradientCheckTests extends BaseDL4JTest { .dataType(DataType.DOUBLE) .updater(new NoOp()) .seed(12345L).weightInit(WeightInit.XAVIER).list() - .layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(2, 3) + .layer(0, VariationalAutoencoder.builder().nIn(4).nOut(3).encoderLayerSizes(2, 3) .decoderLayerSizes(4, 3).pzxActivationFunction(Activation.TANH) .reconstructionDistribution( new GaussianReconstructionDistribution(Activation.TANH)) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java index 1eb72b1bd..0ea61a969 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java @@ -115,12 +115,11 @@ public class YoloGradientCheckTests extends BaseDL4JTest { .activation(a) .l1(l1[i]).l2(l2[i]) .convolutionMode(ConvolutionMode.Same) - .list() - .layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1) - .dataFormat(format) + .layer(ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1) + .convFormat(format) .nIn(depthIn).nOut(yoloDepth).build())//output: (5-2+0)/1+1 = 4 - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPrior) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPrior) .build()) .inputType(InputType.convolutional(h, w, depthIn, format)) .build(); @@ -234,11 +233,11 @@ public class YoloGradientCheckTests extends BaseDL4JTest { .dist(new GaussianDistribution(0,0.1)) .seed(12345) .list() - .layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(1,1).nOut(4).build()) - .layer(new SubsamplingLayer.Builder().kernelSize(2,2).stride(2,2).build()) - .layer(new ConvolutionLayer.Builder().activation(Activation.IDENTITY).kernelSize(3,3).stride(1,1).nOut(depthOut).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPriors) + .layer(ConvolutionLayer.builder().kernelSize(3,3).stride(1,1).nOut(4).build()) + .layer(SubsamplingLayer.builder().kernelSize(2,2).stride(2,2).build()) + .layer(ConvolutionLayer.builder().activation(Activation.IDENTITY).kernelSize(3,3).stride(1,1).nOut(depthOut).build()) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPriors) .build()) .inputType(InputType.convolutional(h,w,c)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java index 6e0cbd770..1efecf6a1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java @@ -62,9 +62,9 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { .dist(new NormalDistribution(0, 1)).updater(new NoOp()) .graphBuilder().addInputs("input") .appendLayer("firstLayer", - new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build()) + DenseLayer.builder().nIn(4).nOut(5).activation(Activation.TANH).build()) .addLayer("outputLayer", - new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(5).nOut(3).build(), "firstLayer") .setOutputs("outputLayer").build(); @@ -83,20 +83,20 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("input") .addLayer("cnn1", - new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(5) + ConvolutionLayer.builder(2, 2).stride(2, 2).nIn(1).nOut(5) .build(), "input") .addLayer("cnn2", - new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(5) + ConvolutionLayer.builder(2, 2).stride(2, 2).nIn(1).nOut(5) .build(), "input") .addLayer("max1", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).build(), "cnn1", "cnn2") - .addLayer("dnn1", new DenseLayer.Builder().nOut(7).build(), "max1") - .addLayer("max2", new SubsamplingLayer.Builder().build(), "max1") - .addLayer("output", new OutputLayer.Builder().nIn(7).nOut(10).activation(Activation.SOFTMAX).build(), "dnn1", + .addLayer("dnn1", DenseLayer.builder().nOut(7).build(), "max1") + .addLayer("max2", SubsamplingLayer.builder().build(), "max1") + .addLayer("output", OutputLayer.builder().nIn(7).nOut(10).activation(Activation.SOFTMAX).build(), "dnn1", "max2") .setOutputs("output") .inputPreProcessor("cnn1", new FeedForwardToCnnPreProcessor(32, 32, 3)) @@ -119,20 +119,20 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("input1", "input2") .addLayer("cnn1", - new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(5) + ConvolutionLayer.builder(2, 2).stride(2, 2).nIn(1).nOut(5) .build(), "input1") .addLayer("cnn2", - new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(5) + ConvolutionLayer.builder(2, 2).stride(2, 2).nIn(1).nOut(5) .build(), "input2") .addVertex("merge1", new MergeVertex(), "cnn1", "cnn2") .addVertex("subset1", new SubsetVertex(0, 1), "merge1") - .addLayer("dense1", new DenseLayer.Builder().nIn(20).nOut(5).build(), "subset1") - .addLayer("dense2", new DenseLayer.Builder().nIn(20).nOut(5).build(), "subset1") + .addLayer("dense1", DenseLayer.builder().nIn(20).nOut(5).build(), "subset1") + .addLayer("dense2", DenseLayer.builder().nIn(20).nOut(5).build(), "subset1") .addVertex("add", new ElementWiseVertex(ElementWiseVertex.Op.Add), "dense1", "dense2") - .addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "add") + .addLayer("out", OutputLayer.builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "add") .setOutputs("out").build(); String json = conf.toJson(); @@ -150,8 +150,8 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test no inputs for a layer: try { NeuralNetConfiguration.builder().graphBuilder().addInputs("input1") - .addLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "input1") - .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).build()).setOutputs("out") + .addLayer("dense1", DenseLayer.builder().nIn(2).nOut(2).build(), "input1") + .addLayer("out", OutputLayer.builder().nIn(2).nOut(2).build()).setOutputs("out") .build(); fail("No exception thrown for invalid configuration"); } catch (IllegalStateException e) { @@ -162,8 +162,8 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { // Use appendLayer on first layer try { NeuralNetConfiguration.builder().graphBuilder() - .appendLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build()) - .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).build()).setOutputs("out") + .appendLayer("dense1", DenseLayer.builder().nIn(2).nOut(2).build()) + .addLayer("out", OutputLayer.builder().nIn(2).nOut(2).build()).setOutputs("out") .build(); fail("No exception thrown for invalid configuration"); } catch (IllegalStateException e) { @@ -174,8 +174,8 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test no network inputs try { NeuralNetConfiguration.builder().graphBuilder() - .addLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "input1") - .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).build(), "dense1") + .addLayer("dense1", DenseLayer.builder().nIn(2).nOut(2).build(), "input1") + .addLayer("out", OutputLayer.builder().nIn(2).nOut(2).build(), "dense1") .setOutputs("out").build(); fail("No exception thrown for invalid configuration"); } catch (IllegalStateException e) { @@ -186,8 +186,8 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test no network outputs try { NeuralNetConfiguration.builder().graphBuilder().addInputs("input1") - .addLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "input1") - .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).build(), "dense1").build(); + .addLayer("dense1", DenseLayer.builder().nIn(2).nOut(2).build(), "input1") + .addLayer("out", OutputLayer.builder().nIn(2).nOut(2).build(), "dense1").build(); fail("No exception thrown for invalid configuration"); } catch (IllegalStateException e) { //OK - exception is good @@ -197,8 +197,8 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test: invalid input try { NeuralNetConfiguration.builder().graphBuilder().addInputs("input1") - .addLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "input1") - .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).build(), "thisDoesntExist") + .addLayer("dense1", DenseLayer.builder().nIn(2).nOut(2).build(), "input1") + .addLayer("out", OutputLayer.builder().nIn(2).nOut(2).build(), "thisDoesntExist") .setOutputs("out").build(); fail("No exception thrown for invalid configuration"); } catch (IllegalStateException e) { @@ -209,10 +209,10 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { //Test: graph with cycles try { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("input1") - .addLayer("dense1", new DenseLayer.Builder().nIn(2).nOut(2).build(), "input1", "dense3") - .addLayer("dense2", new DenseLayer.Builder().nIn(2).nOut(2).build(), "dense1") - .addLayer("dense3", new DenseLayer.Builder().nIn(2).nOut(2).build(), "dense2") - .addLayer("out", new OutputLayer.Builder().nIn(2).nOut(2).lossFunction(LossFunctions.LossFunction.MSE).build(), "dense1") + .addLayer("dense1", DenseLayer.builder().nIn(2).nOut(2).build(), "input1", "dense3") + .addLayer("dense2", DenseLayer.builder().nIn(2).nOut(2).build(), "dense1") + .addLayer("dense3", DenseLayer.builder().nIn(2).nOut(2).build(), "dense2") + .addLayer("out", OutputLayer.builder().nIn(2).nOut(2).lossFunction(LossFunctions.LossFunction.MSE).build(), "dense1") .setOutputs("out").build(); //Cycle detection happens in ComputationGraph.init() ComputationGraph graph = new ComputationGraph(conf); @@ -229,20 +229,20 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { NeuralNetConfiguration.builder().graphBuilder().addInputs("input1", "input2") .setInputTypes(new InputType.InputTypeRecurrent(10, 12)) .addLayer("cnn1", - new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(5) + ConvolutionLayer.builder(2, 2).stride(2, 2).nIn(1).nOut(5) .build(), "input1") .addLayer("cnn2", - new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(5) + ConvolutionLayer.builder(2, 2).stride(2, 2).nIn(1).nOut(5) .build(), "input2") .addVertex("merge1", new MergeVertex(), "cnn1", "cnn2") .addVertex("subset1", new SubsetVertex(0, 1), "merge1") - .addLayer("dense1", new DenseLayer.Builder().nIn(20).nOut(5).build(), "subset1") - .addLayer("dense2", new DenseLayer.Builder().nIn(20).nOut(5).build(), "subset1") + .addLayer("dense1", DenseLayer.builder().nIn(20).nOut(5).build(), "subset1") + .addLayer("dense2", DenseLayer.builder().nIn(20).nOut(5).build(), "subset1") .addVertex("add", new ElementWiseVertex(ElementWiseVertex.Op.Add), "dense1", "dense2") - .addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "add") + .addLayer("out", OutputLayer.builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "add") .setOutputs("out").build(); fail("No exception thrown for invalid configuration"); } catch (IllegalArgumentException e) { @@ -283,9 +283,9 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { @Test public void testOutputOrderDoesntChangeWhenCloning() { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("out1", new OutputLayer.Builder().nIn(1).nOut(1).build(), "in") - .addLayer("out2", new OutputLayer.Builder().nIn(1).nOut(1).build(), "in") - .addLayer("out3", new OutputLayer.Builder().nIn(1).nOut(1).build(), "in") + .addLayer("out1", OutputLayer.builder().nIn(1).nOut(1).build(), "in") + .addLayer("out2", OutputLayer.builder().nIn(1).nOut(1).build(), "in") + .addLayer("out3", OutputLayer.builder().nIn(1).nOut(1).build(), "in") .validateOutputLayerConfig(false) .setOutputs("out1", "out2", "out3").build(); @@ -301,14 +301,14 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { public void testAllowDisconnectedLayers() { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("bidirectional", - new Bidirectional(new LSTM.Builder().activation(Activation.TANH).nOut(10).build()), + Bidirectional.builder(LSTM.builder().activation(Activation.TANH).nOut(10).build()).build(), "in") - .addLayer("out", new RnnOutputLayer.Builder().nOut(6) + .addLayer("out", RnnOutputLayer.builder().nOut(6) .lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .build(), "bidirectional") .addLayer("disconnected_layer", - new Bidirectional(new LSTM.Builder().activation(Activation.TANH).nOut(10).build()), + Bidirectional.builder(LSTM.builder().activation(Activation.TANH).nOut(10).build()).build(), "in") .setOutputs("out") .setInputTypes(new InputType.InputTypeRecurrent(10, 12)) @@ -323,9 +323,9 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { public void testBidirectionalGraphSummary() { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addLayer("bidirectional", - new Bidirectional(new LSTM.Builder().activation(Activation.TANH).nOut(10).build()), + Bidirectional.builder(LSTM.builder().activation(Activation.TANH).nOut(10).build()).build(), "in") - .addLayer("out", new RnnOutputLayer.Builder().nOut(6) + .addLayer("out", RnnOutputLayer.builder().nOut(6) .lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .build(), "bidirectional") @@ -411,10 +411,10 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest { NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") + .layer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in") .layer("1", - !lossLayer ? new OutputLayer.Builder().nIn(10).nOut(nOut[i]).activation(activations[i]).lossFunction(lf[i]).build() - : new LossLayer.Builder().activation(activations[i]).lossFunction(lf[i]).build(), "0") + !lossLayer ? OutputLayer.builder().nIn(10).nOut(nOut[i]).activation(activations[i]).lossFunction(lf[i]).build() + : LossLayer.builder().activation(activations[i]).lossFunction(lf[i].getILossFunction()).build(), "0") .setOutputs("1") .validateOutputLayerConfig(validate) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java index 3e43bfdbe..cee6652d2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/JsonTest.java @@ -99,8 +99,8 @@ public class JsonTest extends BaseDL4JTest { for (int i = 0; i < lossFunctions.length; i++) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(Updater.ADAM.getIUpdaterWithDefaultConfig()) - .layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build()) - .layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i]) + .layer(0, DenseLayer.builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build()) + .layer(1, LossLayer.builder().lossFunction(lossFunctions[i]) .activation(outputActivationFn[i]).build()) .validateOutputLayerConfig(false).build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java index eead1511f..9f1ebfd77 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java @@ -69,9 +69,9 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { private static NeuralNetConfiguration getConf() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2) + .layer(0, DenseLayer.builder().nIn(2).nOut(2) .dist(new NormalDistribution(0, 1)).build()) - .layer(1, new OutputLayer.Builder().nIn(2).nOut(1) + .layer(1, OutputLayer.builder().nIn(2).nOut(1) .activation(Activation.TANH) .dist(new NormalDistribution(0, 1)).lossFunction(LossFunctions.LossFunction.MSE) .build()) @@ -82,7 +82,7 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { @Test public void testJson() throws Exception { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(0, new DenseLayer.Builder().dist(new NormalDistribution(1, 1e-1)).build()) + .layer(0, DenseLayer.builder().dist(new NormalDistribution(1, 1e-1)).build()) .inputPreProcessor(0, new CnnToFeedForwardPreProcessor()).build(); String json = conf.toJson(); @@ -123,17 +123,17 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { .l1(1e-1).l2(2e-4).weightNoise(new DropConnect(0.5)).miniBatch(true) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .layer(0, - new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) + ConvolutionLayer.builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[]{2, 2}) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[]{2, 2}) .build()) .layer(2, - new ConvolutionLayer.Builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER) + ConvolutionLayer.builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[]{2, 2}) + .layer(3, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[]{2, 2}) .build()) - .layer(4, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(4, DenseLayer.builder().nOut(100).activation(Activation.RELU).build()) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) @@ -157,15 +157,15 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .l1(1e-1).l2(2e-4).dropOut(0.5).miniBatch(true) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) - .layer(new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) + .layer(ConvolutionLayer.builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(new Upsampling2D.Builder().size(2).build()) + .layer(Upsampling2D.builder().size(2).build()) .layer(2, - new ConvolutionLayer.Builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER) + ConvolutionLayer.builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(new Upsampling2D.Builder().size(2).build()) - .layer(4, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(Upsampling2D.builder().size(2).build()) + .layer(4, DenseLayer.builder().nOut(100).activation(Activation.RELU).build()) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) @@ -181,9 +181,9 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { public void testGlobalPoolingJson() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new NoOp()) .dist(new NormalDistribution(0, 1.0)).seed(12345L) - .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(5).build()) - .layer(1, new GlobalPoolingLayer.Builder().poolingType(PoolingType.PNORM).pnorm(3).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(5).build()) + .layer(1, GlobalPoolingLayer.builder().poolingType(PoolingType.PNORM).pnorm(3).build()) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(3).build()) .inputType(InputType.convolutional(32, 32, 1)).build(); @@ -196,7 +196,7 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { @Test public void testYaml() throws Exception { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(0, new DenseLayer.Builder().dist(new NormalDistribution(1, 1e-1)).build()) + .layer(0, DenseLayer.builder().dist(new NormalDistribution(1, 1e-1)).build()) .inputPreProcessor(0, new CnnToFeedForwardPreProcessor()).build(); String json = conf.toYaml(); NeuralNetConfiguration from = NeuralNetConfiguration.fromYaml(json); @@ -226,8 +226,8 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { @Test public void testClone() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(0, new DenseLayer.Builder().build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(0, DenseLayer.builder().build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).build()) .inputPreProcessor(1, new CnnToFeedForwardPreProcessor()).build(); NeuralNetConfiguration conf2 = conf.clone(); @@ -301,8 +301,8 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(4).build()) - .layer(2, new OutputLayer.Builder().nIn(4).nOut(5).build()) + .layer(1, DenseLayer.builder().nIn(3).nOut(4).build()) + .layer(2, OutputLayer.builder().nIn(4).nOut(5).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -317,8 +317,8 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) - .layer(0, new DenseLayer.Builder().nIn(3).nOut(4).build()) - .layer(2, new OutputLayer.Builder().nIn(4).nOut(5).build()) + .layer(0, DenseLayer.builder().nIn(3).nOut(4).build()) + .layer(2, OutputLayer.builder().nIn(4).nOut(5).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -336,8 +336,8 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { public void testListOverloads() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) - .layer(0, new DenseLayer.Builder().nIn(3).nOut(4).build()) - .layer(1, new OutputLayer.Builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) + .layer(0, DenseLayer.builder().nIn(3).nOut(4).build()) + .layer(1, OutputLayer.builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -350,16 +350,16 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { assertEquals(5, ol.getNOut()); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345) - .layer(0, new DenseLayer.Builder().nIn(3).nOut(4).build()) - .layer(1, new OutputLayer.Builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) + .layer(0, DenseLayer.builder().nIn(3).nOut(4).build()) + .layer(1, OutputLayer.builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); NeuralNetConfiguration conf3 = NeuralNetConfiguration.builder().seed(12345) - .layer(new DenseLayer.Builder().nIn(3).nOut(4).build()) + .layer(DenseLayer.builder().nIn(3).nOut(4).build()) .layer( - new OutputLayer.Builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) + OutputLayer.builder().nIn(4).nOut(5).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net3 = new MultiLayerNetwork(conf3); net3.init(); @@ -375,14 +375,16 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .updater(new Adam(1e-2)) .biasUpdater(new Adam(0.5)) - .layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).weightInit(WeightInit.XAVIER) + .layer(0, ConvolutionLayer.builder(5, 5).nOut(5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(1, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) - .layer(2, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10) + .layer(1, DenseLayer.builder().nOut(100).activation(Activation.RELU).build()) + .layer(2, DenseLayer.builder().nOut(100).activation(Activation.RELU).build()) + .layer(3, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)).build(); + conf.init(); + BaseLayerConfiguration l0 = (BaseLayerConfiguration) conf.getConf(0).getLayer(); BaseLayerConfiguration l1 = (BaseLayerConfiguration) conf.getConf(1).getLayer(); BaseLayerConfiguration l2 = (BaseLayerConfiguration) conf.getConf(2).getLayer(); @@ -432,10 +434,10 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { try { NeuralNetConfiguration.builder() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(!lossLayer ? new OutputLayer.Builder().nIn(10).nOut(nOut[i]) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(!lossLayer ? OutputLayer.builder().nIn(10).nOut(nOut[i]) .activation(activations[i]).lossFunction(lf[i]).build() - : new LossLayer.Builder().activation(activations[i]).lossFunction(lf[i]) + : LossLayer.builder().activation(activations[i]).lossFunction(lf[i].getILossFunction()) .build()) .validateOutputLayerConfig(validate) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java index 4282505a7..73018534a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiNeuralNetConfLayerBuilderTest.java @@ -67,9 +67,9 @@ public class MultiNeuralNetConfLayerBuilderTest extends BaseDL4JTest { NeuralNetConfiguration multiConf1 = NeuralNetConfiguration.builder() - .layer(0, new DenseLayer.Builder().nIn(newNumIn).nOut(newNumOut).activation(act) + .layer(0, DenseLayer.builder().nIn(newNumIn).nOut(newNumOut).activation(act) .build()) - .layer(1, new DenseLayer.Builder().nIn(newNumIn + 1).nOut(newNumOut + 1) + .layer(1, DenseLayer.builder().nIn(newNumIn + 1).nOut(newNumOut + 1) .activation(act).build()) .build(); NeuralNetConfiguration firstLayer = multiConf1.getConf(0).getNetConfiguration(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java index 0ef220c25..69a5fa797 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/NeuralNetConfigurationTest.java @@ -113,7 +113,7 @@ public class NeuralNetConfigurationTest extends BaseDL4JTest { @Test public void testRNG() { - DenseLayer layer = new DenseLayer.Builder().nIn(trainingSet.numInputs()).nOut(trainingSet.numOutcomes()) + DenseLayer layer = DenseLayer.builder().nIn(trainingSet.numInputs()).nOut(trainingSet.numOutcomes()) .weightInit(WeightInit.UNIFORM).activation(Activation.TANH).build(); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) @@ -125,7 +125,7 @@ public class NeuralNetConfigurationTest extends BaseDL4JTest { INDArray modelWeights = model.getParam(DefaultParamInitializer.WEIGHT_KEY); - DenseLayer layer2 = new DenseLayer.Builder().nIn(trainingSet.numInputs()).nOut(trainingSet.numOutcomes()) + DenseLayer layer2 = DenseLayer.builder().nIn(trainingSet.numInputs()).nOut(trainingSet.numOutcomes()) .weightInit(WeightInit.UNIFORM).activation(Activation.TANH).build(); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).layer(layer2).build(); @@ -197,7 +197,7 @@ public class NeuralNetConfigurationTest extends BaseDL4JTest { private static NeuralNetConfiguration getConfig(int nIn, int nOut, IWeightInit weightInit, boolean pretrain) { - DenseLayer layer = new DenseLayer.Builder().nIn(nIn).nOut(nOut).weightInit(weightInit) + DenseLayer layer = DenseLayer.builder().nIn(nIn).nOut(nOut).weightInit(weightInit) .activation(Activation.TANH).build(); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() @@ -226,10 +226,10 @@ public class NeuralNetConfigurationTest extends BaseDL4JTest { INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.3)) - .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]) + .layer(0, DenseLayer.builder().nIn(nIns[0]).nOut(nOuts[0]) .updater(new Sgd(lr)).biasUpdater(new Sgd(biasLr)).build()) - .layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).updater(new Sgd(0.7)).build()) - .layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(1,BatchNormalization.builder().nIn(nIns[1]).nOut(nOuts[1]).updater(new Sgd(0.7)).build()) + .layer(2, OutputLayer.builder().nIn(nIns[2]).nOut(nOuts[2]).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -287,9 +287,9 @@ public class NeuralNetConfigurationTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l1(l1) .l2(l2) - .layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).build()) - .layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).l2(0.5).build()) - .layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(0, DenseLayer.builder().nIn(nIns[0]).nOut(nOuts[0]).build()) + .layer(1,BatchNormalization.builder().nIn(nIns[1]).nOut(nOuts[1]).l2(0.5).build()) + .layer(2, OutputLayer.builder().nIn(nIns[2]).nOut(nOuts[2]).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -318,7 +318,7 @@ public class NeuralNetConfigurationTest extends BaseDL4JTest { public void testLayerPretrainConfig() { boolean pretrain = true; - VariationalAutoencoder layer = new VariationalAutoencoder.Builder() + VariationalAutoencoder layer = VariationalAutoencoder.builder() .nIn(10).nOut(5).updater(new Sgd(1e-1)) .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java index d1aae72e9..616052892 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java @@ -48,6 +48,7 @@ import org.nd4j.linalg.learning.config.RmsProp; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; +import java.util.List; import java.util.Map; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -71,9 +72,9 @@ public class TestConstraints extends BaseDL4JTest { .updater(new Sgd(0.0)) .dist(new NormalDistribution(0, 5)) - .layer(new LSTM.Builder().nIn(12).nOut(10) + .layer(LSTM.builder().nIn(12).nOut(10) .constrainRecurrent(lc).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -124,9 +125,9 @@ public class TestConstraints extends BaseDL4JTest { .dist(new NormalDistribution(0, 5)) .biasInit(10.0) - .layer(new DenseLayer.Builder().nIn(12).nOut(10) + .layer(DenseLayer.builder().nIn(12).nOut(10) .constrainBias(lc).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -176,9 +177,9 @@ public class TestConstraints extends BaseDL4JTest { .updater(new Sgd(0.0)) .dist(new NormalDistribution(0, 5)) - .layer(new DenseLayer.Builder().nIn(12).nOut(10) + .layer(DenseLayer.builder().nIn(12).nOut(10) .constrainWeights(lc).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -229,9 +230,10 @@ public class TestConstraints extends BaseDL4JTest { .dist(new NormalDistribution(0, 5)) .biasInit(0.2) - .layer(new DenseLayer.Builder().nIn(12).nOut(10) - .constrainAllParameters(lc).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) + .layer(DenseLayer.builder().nIn(12).nOut(10) + .allParamConstraints(List.of(lc)) + .build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -290,9 +292,9 @@ public class TestConstraints extends BaseDL4JTest { .dist(new NormalDistribution(0, 5)) .biasInit(0.2) - .layer(new DenseLayer.Builder().nIn(12).nOut(10) + .layer(DenseLayer.builder().nIn(12).nOut(10) .constrainWeights(lc).constrainBias(lc).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -351,8 +353,8 @@ public class TestConstraints extends BaseDL4JTest { .dist(new NormalDistribution(0,5)) .biasInit(1) - .layer(new DenseLayer.Builder().nIn(12).nOut(10).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) + .layer(DenseLayer.builder().nIn(12).nOut(10).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -406,7 +408,7 @@ public class TestConstraints extends BaseDL4JTest { .graphBuilder() .addInputs("input_lstm", "input_cpc") .addLayer("first_lstm_layer", - new LSTM.Builder() + LSTM.builder() .nIn(nIn) .nOut(lstmLayerSize) .activation(Activation.RELU) @@ -417,7 +419,7 @@ public class TestConstraints extends BaseDL4JTest { .addVertex("merge", new MergeVertex(), "lastTimeStep", "input_cpc") .addLayer("dense", - new DenseLayer.Builder() + DenseLayer.builder() .constrainWeights(new NonNegativeConstraint()) .nIn(lstmLayerSize + 1) .nOut(lstmLayerSize/2) @@ -425,7 +427,7 @@ public class TestConstraints extends BaseDL4JTest { .build(), "merge") .addLayer("second_dense", - new DenseLayer.Builder() + DenseLayer.builder() .constrainWeights(new NonNegativeConstraint()) .nIn(lstmLayerSize/2) .nOut(lstmLayerSize/8) @@ -433,7 +435,7 @@ public class TestConstraints extends BaseDL4JTest { .build(), "dense") .addLayer("output_layer", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .constrainWeights(new NonNegativeConstraint()) .nIn(lstmLayerSize/8) .nOut(1) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java index f574ae089..453d1bf3a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/dropout/TestDropout.java @@ -62,29 +62,29 @@ public class TestDropout extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dropOut(0.6) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).dropOut(0.7).build()) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).dropOut(new AlphaDropout(0.5)).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).dropOut(0.7).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).dropOut(new AlphaDropout(0.5)).build()) .build(); - assertEquals(new Dropout(0.6), conf.getFlattenedLayerConfigurations().get(0).getIDropout()); - assertEquals(new Dropout(0.7), conf.getFlattenedLayerConfigurations().get(1).getIDropout()); - assertEquals(new AlphaDropout(0.5), conf.getFlattenedLayerConfigurations().get(2).getIDropout()); + assertEquals(new Dropout(0.6), conf.getFlattenedLayerConfigurations().get(0).getDropOut()); + assertEquals(new Dropout(0.7), conf.getFlattenedLayerConfigurations().get(1).getDropOut()); + assertEquals(new AlphaDropout(0.5), conf.getFlattenedLayerConfigurations().get(2).getDropOut()); ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .dropOut( new Dropout(0.6)) .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).dropOut(0.7).build(), "0") - .addLayer("2", new DenseLayer.Builder().nIn(10).nOut(10).dropOut(new AlphaDropout(0.5)).build(), "1") + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(10).nOut(10).dropOut(0.7).build(), "0") + .addLayer("2", DenseLayer.builder().nIn(10).nOut(10).dropOut(new AlphaDropout(0.5)).build(), "1") .setOutputs("2") .build(); - assertEquals(new Dropout(0.6), ((LayerVertex)conf2.getVertices().get("0")).getLayerConfiguration().getIDropout()); - assertEquals(new Dropout(0.7), ((LayerVertex)conf2.getVertices().get("1")).getLayerConfiguration().getIDropout()); - assertEquals(new AlphaDropout(0.5), ((LayerVertex)conf2.getVertices().get("2")).getLayerConfiguration().getIDropout()); + assertEquals(new Dropout(0.6), ((LayerVertex)conf2.getVertices().get("0")).getLayerConfiguration().getDropOut()); + assertEquals(new Dropout(0.7), ((LayerVertex)conf2.getVertices().get("1")).getLayerConfiguration().getDropOut()); + assertEquals(new AlphaDropout(0.5), ((LayerVertex)conf2.getVertices().get("2")).getLayerConfiguration().getDropOut()); } @Test @@ -95,8 +95,8 @@ public class TestDropout extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new DenseLayer.Builder().nIn(4).nOut(3).dropOut(d1).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).dropOut(d2).nIn(3).nOut(3).build()) + .layer(DenseLayer.builder().nIn(4).nOut(3).dropOut(d1).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).dropOut(d2).nIn(3).nOut(3).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -131,8 +131,8 @@ public class TestDropout extends BaseDL4JTest { ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).dropOut(d1).build(), "in") - .addLayer("1", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).dropOut(d2).nIn(3).nOut(3).build(), "0") + .addLayer("0", DenseLayer.builder().nIn(4).nOut(3).dropOut(d1).build(), "in") + .addLayer("1", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).dropOut(d2).nIn(3).nOut(3).build(), "0") .setOutputs("1") .build(); @@ -188,8 +188,8 @@ public class TestDropout extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dropOut(id) - .layer(new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(3).nOut(3).build()) + .layer(DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3).nOut(3).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -200,8 +200,8 @@ public class TestDropout extends BaseDL4JTest { .dropOut(id) .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in") - .addLayer("1", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(3).nOut(3).build(), "0") + .addLayer("0", DenseLayer.builder().nIn(4).nOut(3).build(), "in") + .addLayer("1", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3).nOut(3).build(), "0") .setOutputs("1") .build(); @@ -602,7 +602,7 @@ public class TestDropout extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new DropoutLayer.Builder(new SpatialDropout(0.5)).build()) + .layer(DropoutLayer.builder(new SpatialDropout(0.5)).build()) .build(); String asJson = conf.toJson(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java index c3ec4a87c..ca990ac05 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java @@ -73,7 +73,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().graphBuilder() .addInputs("input1", "input2", "input3") .addLayer("denselayer", - new DenseLayer.Builder().nIn(featuresz).nOut(1).activation(Activation.IDENTITY) + DenseLayer.builder().nIn(featuresz).nOut(1).activation(Activation.IDENTITY) .build(), "input1") /* denselayer is not actually used, but it seems that you _need_ to have trainable parameters, otherwise, you get @@ -87,7 +87,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { */ .addVertex("elementwiseAdd", new ElementWiseVertex(ElementWiseVertex.Op.Add), "input1", "input2", "input3") - .addLayer("Add", new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), + .addLayer("Add", ActivationLayer.builder().activation(Activation.IDENTITY).build(), "elementwiseAdd") .setOutputs("Add", "denselayer").build(); @@ -114,7 +114,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().graphBuilder() .addInputs("input1", "input2", "input3") .addLayer("denselayer", - new DenseLayer.Builder().nIn(featuresz).nOut(1).activation(Activation.IDENTITY) + DenseLayer.builder().nIn(featuresz).nOut(1).activation(Activation.IDENTITY) .build(), "input1") /* denselayer is not actually used, but it seems that you _need_ to have trainable parameters, otherwise, you get @@ -128,7 +128,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { */ .addVertex("elementwiseProduct", new ElementWiseVertex(ElementWiseVertex.Op.Product), "input1", "input2", "input3") - .addLayer("Product", new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), + .addLayer("Product", ActivationLayer.builder().activation(Activation.IDENTITY).build(), "elementwiseProduct") .setOutputs("Product", "denselayer").build(); @@ -155,7 +155,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().graphBuilder() .addInputs("input1", "input2") .addLayer("denselayer", - new DenseLayer.Builder().nIn(featuresz).nOut(1).activation(Activation.IDENTITY) + DenseLayer.builder().nIn(featuresz).nOut(1).activation(Activation.IDENTITY) .build(), "input1") /* denselayer is not actually used, but it seems that you _need_ to have trainable parameters, otherwise, you get @@ -169,7 +169,7 @@ public class ElementWiseVertexTest extends BaseDL4JTest { */ .addVertex("elementwiseSubtract", new ElementWiseVertex(ElementWiseVertex.Op.Subtract), "input1", "input2") - .addLayer("Subtract", new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), + .addLayer("Subtract", ActivationLayer.builder().activation(Activation.IDENTITY).build(), "elementwiseSubtract") .setOutputs("Subtract", "denselayer").build(); @@ -200,22 +200,22 @@ public class ElementWiseVertexTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input1", "input2", "input3") .addLayer("dense1", - new DenseLayer.Builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + DenseLayer.builder().nIn(featuresz).nOut(midsz) + .activation(Activation.TANH).build(), "input1") .addLayer("dense2", - new DenseLayer.Builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + DenseLayer.builder().nIn(featuresz).nOut(midsz) + .activation(Activation.TANH).build(), "input2") .addLayer("dense3", - new DenseLayer.Builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + DenseLayer.builder().nIn(featuresz).nOut(midsz) + .activation(Activation.TANH).build(), "input3") .addVertex("elementwiseAdd", new ElementWiseVertex(ElementWiseVertex.Op.Add), "dense1", "dense2", "dense3") .addLayer("output", - new OutputLayer.Builder().nIn(midsz).nOut(outputsz) - .activation(new ActivationSigmoid()) + OutputLayer.builder().nIn(midsz).nOut(outputsz) + .activation(Activation.SIGMOID) .lossFunction(LossFunction.MSE).build(), "elementwiseAdd") .setOutputs("output").build(); @@ -376,22 +376,22 @@ public class ElementWiseVertexTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input1", "input2", "input3") .addLayer("dense1", - new DenseLayer.Builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + DenseLayer.builder().nIn(featuresz).nOut(midsz) + .activation(Activation.TANH).build(), "input1") .addLayer("dense2", - new DenseLayer.Builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + DenseLayer.builder().nIn(featuresz).nOut(midsz) + .activation(Activation.TANH).build(), "input2") .addLayer("dense3", - new DenseLayer.Builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + DenseLayer.builder().nIn(featuresz).nOut(midsz) + .activation(Activation.TANH).build(), "input3") .addVertex("elementwiseProduct", new ElementWiseVertex(ElementWiseVertex.Op.Product), "dense1", "dense2", "dense3") .addLayer("output", - new OutputLayer.Builder().nIn(midsz).nOut(outputsz) - .activation(new ActivationSigmoid()) + OutputLayer.builder().nIn(midsz).nOut(outputsz) + .activation(Activation.SIGMOID) .lossFunction(LossFunction.MSE).build(), "elementwiseProduct") .setOutputs("output").build(); @@ -551,18 +551,18 @@ public class ElementWiseVertexTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input1", "input2") .addLayer("dense1", - new DenseLayer.Builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + DenseLayer.builder().nIn(featuresz).nOut(midsz) + .activation(Activation.TANH).build(), "input1") .addLayer("dense2", - new DenseLayer.Builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + DenseLayer.builder().nIn(featuresz).nOut(midsz) + .activation(Activation.TANH).build(), "input2") .addVertex("elementwiseSubtract", new ElementWiseVertex(ElementWiseVertex.Op.Subtract), "dense1", "dense2") .addLayer("output", - new OutputLayer.Builder().nIn(midsz).nOut(outputsz) - .activation(new ActivationSigmoid()) + OutputLayer.builder().nIn(midsz).nOut(outputsz) + .activation(Activation.SIGMOID) .lossFunction(LossFunction.MSE).build(), "elementwiseSubtract") .setOutputs("output").build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java index be78b1ecf..72175f813 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ShiftVertexTest.java @@ -86,7 +86,7 @@ public class ShiftVertexTest extends BaseDL4JTest { double sf = 4.1; ComputationGraphConfiguration cgc = NeuralNetConfiguration.builder().graphBuilder().addInputs("input") .addLayer("denselayer", - new DenseLayer.Builder().nIn(input.columns()).nOut(1) + DenseLayer.builder().nIn(input.columns()).nOut(1) .activation(Activation.IDENTITY).build(), "input") /* denselayer is not actually used, but it seems that you _need_ to have trainable parameters, otherwise, you get @@ -99,10 +99,10 @@ public class ShiftVertexTest extends BaseDL4JTest { * at org.deeplearning4j.nn.graph.ComputationGraph.init(ComputationGraph.java:341) */ .addLayer("identityinputactivation", - new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), "input") + ActivationLayer.builder().activation(Activation.IDENTITY).build(), "input") .addVertex("shiftvertex", new ShiftVertex(sf), "identityinputactivation") .addLayer("identityshiftvertex", - new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), + ActivationLayer.builder().activation(Activation.IDENTITY).build(), "shiftvertex") .setOutputs("identityshiftvertex", "denselayer").build(); @@ -144,12 +144,12 @@ public class ShiftVertexTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input") .addLayer("denselayer", - new DenseLayer.Builder().nIn(input.columns()).nOut(input.columns()) + DenseLayer.builder().nIn(input.columns()).nOut(input.columns()) .activation(a1).build(), "input") .addVertex("shiftvertex", new ShiftVertex(sf), "denselayer") .addLayer("output", - new OutputLayer.Builder().nIn(input.columns()).nOut(target.columns()) + OutputLayer.builder().nIn(input.columns()).nOut(target.columns()) .activation(a2).lossFunction(LossFunction.MSE).build(), "shiftvertex") .setOutputs("output").build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java index 680681920..b8e8909e5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java @@ -67,7 +67,7 @@ public class LayerBuilderTest extends BaseDL4JTest { @Test public void testLayer() throws Exception { - DenseLayer layer = new DenseLayer.Builder().activation(act).weightInit(weight).dropOut(dropOut) + DenseLayer layer = DenseLayer.builder().activation(act).weightInit(weight).dropOut(dropOut) .updater(updater).gradientNormalization(gradNorm) .gradientNormalizationThreshold(gradNormThreshold).build(); @@ -75,7 +75,7 @@ public class LayerBuilderTest extends BaseDL4JTest { assertEquals(act, layer.getActivationFn()); assertEquals(weight.getWeightInitFunction(), layer.getWeightInit()); - assertEquals(new Dropout(dropOut), layer.getIDropout()); + assertEquals(new Dropout(dropOut), layer.getDropOut()); assertEquals(updater, layer.getIUpdater()); assertEquals(gradNorm, layer.getGradientNormalization()); assertEquals(gradNormThreshold, layer.getGradientNormalizationThreshold(), 0.0); @@ -83,7 +83,7 @@ public class LayerBuilderTest extends BaseDL4JTest { @Test public void testFeedForwardLayer() throws Exception { - DenseLayer ff = new DenseLayer.Builder().nIn(numIn).nOut(numOut).build(); + DenseLayer ff = DenseLayer.builder().nIn(numIn).nOut(numOut).build(); checkSerialization(ff); @@ -93,7 +93,7 @@ public class LayerBuilderTest extends BaseDL4JTest { @Test public void testConvolutionLayer() throws Exception { - ConvolutionLayer conv = new ConvolutionLayer.Builder(kernelSize, stride, padding).build(); + ConvolutionLayer conv = ConvolutionLayer.builder(kernelSize, stride, padding).build(); checkSerialization(conv); @@ -106,7 +106,7 @@ public class LayerBuilderTest extends BaseDL4JTest { @Test public void testSubsamplingLayer() throws Exception { SubsamplingLayer sample = - new SubsamplingLayer.Builder(poolType, stride).kernelSize(kernelSize).padding(padding).build(); + SubsamplingLayer.builder(poolType, stride).kernelSize(kernelSize).padding(padding).build(); checkSerialization(sample); @@ -118,21 +118,21 @@ public class LayerBuilderTest extends BaseDL4JTest { @Test public void testOutputLayer() throws Exception { - OutputLayer out = new OutputLayer.Builder(loss).build(); + OutputLayer out = OutputLayer.builder(loss).build(); checkSerialization(out); } @Test public void testRnnOutputLayer() throws Exception { - RnnOutputLayer out = new RnnOutputLayer.Builder(loss).build(); + RnnOutputLayer out = RnnOutputLayer.builder(loss).build(); checkSerialization(out); } @Test public void testAutoEncoder() throws Exception { - AutoEncoder enc = new AutoEncoder.Builder().corruptionLevel(corruptionLevel).sparsity(sparsity).build(); + AutoEncoder enc = AutoEncoder.builder().corruptionLevel(corruptionLevel).sparsity(sparsity).build(); checkSerialization(enc); @@ -142,7 +142,7 @@ public class LayerBuilderTest extends BaseDL4JTest { @Test public void testGravesLSTM() throws Exception { - GravesLSTM glstm = new GravesLSTM.Builder().forgetGateBiasInit(1.5).activation(Activation.TANH).nIn(numIn) + GravesLSTM glstm = GravesLSTM.builder().forgetGateBiasInit(1.5).activation(Activation.TANH).nIn(numIn) .nOut(numOut).build(); checkSerialization(glstm); @@ -155,7 +155,7 @@ public class LayerBuilderTest extends BaseDL4JTest { @Test public void testGravesBidirectionalLSTM() throws Exception { - final GravesBidirectionalLSTM glstm = new GravesBidirectionalLSTM.Builder().forgetGateBiasInit(1.5) + final GravesBidirectionalLSTM glstm = GravesBidirectionalLSTM.builder().forgetGateBiasInit(1.5) .activation(Activation.TANH).nIn(numIn).nOut(numOut).build(); checkSerialization(glstm); @@ -168,7 +168,7 @@ public class LayerBuilderTest extends BaseDL4JTest { @Test public void testEmbeddingLayer() throws Exception { - EmbeddingLayer el = new EmbeddingLayer.Builder().nIn(10).nOut(5).build(); + EmbeddingLayer el = EmbeddingLayer.builder().nIn(10).nOut(5).build(); checkSerialization(el); assertEquals(10, el.getNIn()); @@ -177,7 +177,7 @@ public class LayerBuilderTest extends BaseDL4JTest { @Test public void testBatchNormLayer() throws Exception { - BatchNormalization bN = new BatchNormalization.Builder().nIn(numIn).nOut(numOut).gamma(2).beta(1).decay(0.5) + BatchNormalization bN =BatchNormalization.builder().nIn(numIn).nOut(numOut).gamma(2).beta(1).decay(0.5) .lockGammaBeta(true).build(); checkSerialization(bN); @@ -192,11 +192,11 @@ public class LayerBuilderTest extends BaseDL4JTest { @Test public void testActivationLayer() throws Exception { - ActivationLayer activationLayer = new ActivationLayer.Builder().activation(act).build(); + ActivationLayer activationLayer = ActivationLayer.builder().activation(act).build(); checkSerialization(activationLayer); - assertEquals(act, activationLayer.activationFn); + assertEquals(act, activationLayer.getActivation()); } private void checkSerialization(LayerConfiguration layer) throws Exception { @@ -225,7 +225,7 @@ public class LayerBuilderTest extends BaseDL4JTest { assertEquals(confExpected.getFlattenedLayerConfigurations().get(0), confActual.getFlattenedLayerConfigurations().get(0), "unequal YAML serialization"); // check the layer's use of callSuper on equals method - confActual.getFlattenedLayerConfigurations().get(0).setIDropout(new Dropout(new java.util.Random().nextDouble())); + confActual.getFlattenedLayerConfigurations().get(0).setDropOut(new Dropout(new java.util.Random().nextDouble())); assertNotEquals( confExpected, confActual, "broken equals method (missing callSuper?)"); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java index 7777475e6..bce68f0c9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigTest.java @@ -53,13 +53,13 @@ public class LayerConfigTest extends BaseDL4JTest { String name2 = "bill"; NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).name(name1).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).name(name2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).name(name1).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).name(name2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(name1, conf.getConf(0).getLayer().getLayerName()); - assertEquals(name2, conf.getConf(1).getLayer().getLayerName()); + assertEquals(name1, conf.getConf(0).getLayer().getName()); + assertEquals(name2, conf.getConf(1).getLayer().getName()); } @@ -67,8 +67,8 @@ public class LayerConfigTest extends BaseDL4JTest { public void testActivationLayerwiseOverride() { //Without layerwise override: NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.RELU) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -77,8 +77,8 @@ public class LayerConfigTest extends BaseDL4JTest { //With conf = NeuralNetConfiguration.builder().activation(Activation.RELU) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).activation(Activation.TANH).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).activation(Activation.TANH).build()).build(); net = new MultiLayerNetwork(conf); net.init(); @@ -94,8 +94,8 @@ public class LayerConfigTest extends BaseDL4JTest { final Distribution defaultDistribution = new NormalDistribution(0, 1.0); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dist(defaultDistribution).biasInit(1) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -109,8 +109,8 @@ public class LayerConfigTest extends BaseDL4JTest { final Distribution overriddenDistribution = new UniformDistribution(0, 1); conf = NeuralNetConfiguration.builder() .dist(defaultDistribution).biasInit(1) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, - new DenseLayer.Builder().nIn(2).nOut(2) + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()).layer(1, + DenseLayer.builder().nIn(2).nOut(2) .dist(overriddenDistribution).biasInit(0).build()) .build(); @@ -181,23 +181,23 @@ public class LayerConfigTest extends BaseDL4JTest { @Test public void testDropoutLayerwiseOverride() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().dropOut(1.0) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(new Dropout(1.0), conf.getConf(0).getLayer().getIDropout()); - assertEquals(new Dropout(1.0), conf.getConf(1).getLayer().getIDropout()); + assertEquals(new Dropout(1.0), conf.getConf(0).getLayer().getDropOut()); + assertEquals(new Dropout(1.0), conf.getConf(1).getLayer().getDropOut()); conf = NeuralNetConfiguration.builder().dropOut(1.0) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).dropOut(2.0).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).dropOut(2.0).build()).build(); net = new MultiLayerNetwork(conf); net.init(); - assertEquals(new Dropout(1.0), conf.getConf(0).getLayer().getIDropout()); - assertEquals(new Dropout(2.0), conf.getConf(1).getLayer().getIDropout()); + assertEquals(new Dropout(1.0), conf.getConf(0).getLayer().getDropOut()); + assertEquals(new Dropout(2.0), conf.getConf(1).getLayer().getDropOut()); } @Test @@ -208,8 +208,8 @@ public class LayerConfigTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter))) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -221,7 +221,7 @@ public class LayerConfigTest extends BaseDL4JTest { conf = NeuralNetConfiguration.builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter) )) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder() + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()).layer(1, DenseLayer.builder() .nIn(2).nOut(2).updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter2))).build()) .build(); @@ -234,8 +234,8 @@ public class LayerConfigTest extends BaseDL4JTest { @Test public void testUpdaterRhoRmsDecayLayerwiseOverride() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new AdaDelta(0.5, 0.9)) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new AdaDelta(0.01,0.9)).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).updater(new AdaDelta(0.01,0.9)).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -245,8 +245,8 @@ public class LayerConfigTest extends BaseDL4JTest { assertEquals(0.01, ((AdaDelta)((BaseLayerConfiguration) conf.getConf(1).getLayer()).getIUpdater()).getRho(), 0.0); conf = NeuralNetConfiguration.builder().updater(new RmsProp(1.0, 2.0, RmsProp.DEFAULT_RMSPROP_EPSILON)) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).updater(new RmsProp(1.0, 1.0, RmsProp.DEFAULT_RMSPROP_EPSILON)).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new AdaDelta(0.5,AdaDelta.DEFAULT_ADADELTA_EPSILON)).build()) + .layer(0, DenseLayer.builder().nIn(2).nOut(2).updater(new RmsProp(1.0, 1.0, RmsProp.DEFAULT_RMSPROP_EPSILON)).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).updater(new AdaDelta(0.5,AdaDelta.DEFAULT_ADADELTA_EPSILON)).build()) .build(); net = new MultiLayerNetwork(conf); @@ -264,8 +264,8 @@ public class LayerConfigTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1.0, 0.5, 0.5, 1e-8)) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new Adam(1.0, 0.6, 0.7, 1e-8)).build()) + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).updater(new Adam(1.0, 0.6, 0.7, 1e-8)).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -283,8 +283,8 @@ public class LayerConfigTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); BaseLayerConfiguration bconf = (BaseLayerConfiguration) conf.getConf(0).getLayer(); @@ -297,8 +297,8 @@ public class LayerConfigTest extends BaseDL4JTest { conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2) + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2) .gradientNormalization(GradientNormalization.None) .gradientNormalizationThreshold(2.5).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java index b813b2b5f..f15342535 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerConfigValidationTest.java @@ -56,8 +56,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { public void testDropConnect() { // Warning thrown only since some layers may not have l1 or l2 NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)).weightNoise(new DropConnect(0.5)) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); } @@ -67,8 +67,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { public void testL1L2NotSet() { // Warning thrown only since some layers may not have l1 or l2 NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.3)) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); } @@ -78,8 +78,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { public void testRegNotSetL1Global() { assertThrows(IllegalStateException.class, () -> { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.3)).l1(0.5) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); }); @@ -90,8 +90,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { public void testRegNotSetL2Local() { assertThrows(IllegalStateException.class, () -> { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.3)) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).l2(0.5).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); }); @@ -102,8 +102,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { // Warning thrown only since global dist can be set with a different weight init locally NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.3)).dist(new GaussianDistribution(1e-3, 2)) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); } @@ -116,8 +116,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter))) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); } @@ -130,12 +130,12 @@ public class LayerConfigValidationTest extends BaseDL4JTest { /* Graph Builder */ .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).graphBuilder().addInputs("in") .addLayer("L" + 1, - new GravesLSTM.Builder().nIn(20).updater(Updater.RMSPROP).nOut(10) + GravesLSTM.builder().nIn(20).updater(Updater.RMSPROP).nOut(10) .weightInit(WeightInit.XAVIER) .dropOut(0.4).l1(0.3).activation(Activation.SIGMOID).build(), "in") .addLayer("output", - new RnnOutputLayer.Builder().nIn(20).nOut(10).activation(Activation.SOFTMAX) + RnnOutputLayer.builder().nIn(20).nOut(10).activation(Activation.SOFTMAX) .weightInit(WeightInit.RELU_UNIFORM).build(), "L" + 1) .setOutputs("output"); @@ -157,8 +157,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { // Nesterovs Updater NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Nesterovs(0.9)) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new Nesterovs(0.3, 0.4)).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).l2(0.5).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).updater(new Nesterovs(0.3, 0.4)).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -173,8 +173,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { // Adam Updater conf = NeuralNetConfiguration.builder().updater(new Adam(0.3)) .weightInit(expectedDist) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).l1(0.3).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).l2(0.5).l1(0.3).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).build()).build(); net = new MultiLayerNetwork(conf); net.init(); @@ -191,8 +191,8 @@ public class LayerConfigValidationTest extends BaseDL4JTest { //RMSProp Updater conf = NeuralNetConfiguration.builder().updater(new RmsProp(0.3)) - .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).updater(new RmsProp(0.3, 0.4, RmsProp.DEFAULT_RMSPROP_EPSILON)).build()).build(); + .layer(0, DenseLayer.builder().nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(2).nOut(2).updater(new RmsProp(0.3, 0.4, RmsProp.DEFAULT_RMSPROP_EPSILON)).build()).build(); net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CNNProcessorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CNNProcessorTest.java index d530e416d..b6e4fb2d0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CNNProcessorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CNNProcessorTest.java @@ -249,7 +249,7 @@ public class CNNProcessorTest extends BaseDL4JTest { .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) // Building the DL4J network - .layer(0, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) + .layer(0, ConvolutionLayer.builder(kernelArray, strideArray, zeroPaddingArray) .name("cnn1") .convolutionMode(ConvolutionMode.Strict) .nIn(2) // 2 input channels @@ -258,7 +258,7 @@ public class CNNProcessorTest extends BaseDL4JTest { .activation(Activation.RELU) .biasInit(1e-2).build()) - .layer(1, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) + .layer(1, ConvolutionLayer.builder(kernelArray, strideArray, zeroPaddingArray) .name("cnn2") .convolutionMode(ConvolutionMode.Strict) .nOut(processWidth) @@ -267,21 +267,21 @@ public class CNNProcessorTest extends BaseDL4JTest { .biasInit(1e-2) .build()) - .layer(2, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) + .layer(2, ConvolutionLayer.builder(kernelArray, strideArray, zeroPaddingArray) .name("cnn3") .convolutionMode(ConvolutionMode.Strict) .nOut(processWidth) .weightInit(WeightInit.XAVIER_UNIFORM) .activation(Activation.RELU).build()) - .layer(3, new ConvolutionLayer.Builder(kernelArray, strideArray, zeroPaddingArray) + .layer(3, ConvolutionLayer.builder(kernelArray, strideArray, zeroPaddingArray) .name("cnn4") .convolutionMode(ConvolutionMode.Strict) .nOut(processWidth) .weightInit(WeightInit.XAVIER_UNIFORM) .activation(Activation.RELU).build()) - .layer(4, new OutputLayer.Builder(LossFunctions.LossFunction.MSE) + .layer(4, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .name("output") .nOut(1) .activation(Activation.TANH) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java index c5755753a..1c554bdd4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/CustomPreprocessorTest.java @@ -39,8 +39,8 @@ public class CustomPreprocessorTest extends BaseDL4JTest { //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(10) .activation(Activation.SOFTMAX).nOut(10).build()) .inputPreProcessor(0, new MyCustomPreprocessor()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java index 798762556..a6c391ec4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/preprocessor/TestPreProcessors.java @@ -58,7 +58,7 @@ public class TestPreProcessors extends BaseDL4JTest { RnnToFeedForwardPreProcessor proc = new RnnToFeedForwardPreProcessor(); NeuralNetConfiguration nnc = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(layerSize) + .layer(org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(layerSize) .nOut(layerSize).build()) .build(); @@ -143,7 +143,7 @@ public class TestPreProcessors extends BaseDL4JTest { FeedForwardToRnnPreProcessor proc = new FeedForwardToRnnPreProcessor(); NeuralNetConfiguration nnc = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(layerSize) + .layer(org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(layerSize) .nOut(layerSize).build()) .build(); @@ -227,7 +227,7 @@ public class TestPreProcessors extends BaseDL4JTest { NeuralNetConfiguration nnc = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( + .layer(org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder( inputWidth, inputHeight).nIn(cnnNChannelsIn) .nOut(nChannels).build()) .build(); @@ -309,7 +309,7 @@ public class TestPreProcessors extends BaseDL4JTest { NeuralNetConfiguration nnc = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( + .layer(org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder( inputWidth, inputHeight).nIn(cnnNChannelsIn) .nOut(nChannels).build()) .build(); @@ -397,12 +397,12 @@ public class TestPreProcessors extends BaseDL4JTest { //FF->RNN and RNN->FF NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(5) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(5) .nOut(6).build()) - .layer(1, new GravesLSTM.Builder().nIn(6).nOut(7).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(7) + .layer(1, GravesLSTM.builder().nIn(6).nOut(7).build()) + .layer(2, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(7) .nOut(8).build()) - .layer(3, new RnnOutputLayer.Builder().nIn(8).nOut(9).activation(Activation.SOFTMAX).build()).build(); + .layer(3, RnnOutputLayer.builder().nIn(8).nOut(9).activation(Activation.SOFTMAX).build()).build(); //Expect preprocessors: layer1: FF->RNN; 2: RNN->FF; 3: FF->RNN assertEquals(3, conf1.getInputPreProcessors().size()); assertTrue(conf1.getInputPreProcess(1) instanceof FeedForwardToRnnPreProcessor); @@ -412,10 +412,10 @@ public class TestPreProcessors extends BaseDL4JTest { //FF-> CNN, CNN-> FF, FF->RNN NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder().nOut(10) + .layer(0, org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder().nOut(10) .kernelSize(5, 5).stride(1, 1).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nOut(6).build()) - .layer(2, new RnnOutputLayer.Builder().nIn(6).nOut(5).activation(Activation.SOFTMAX).build()) + .layer(1, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nOut(6).build()) + .layer(2, RnnOutputLayer.builder().nIn(6).nOut(5).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); //Expect preprocessors: 0: FF->CNN; 1: CNN->FF; 2: FF->RNN assertEquals(3, conf2.getInputPreProcessors().size()); @@ -425,10 +425,10 @@ public class TestPreProcessors extends BaseDL4JTest { //CNN-> FF, FF->RNN - InputType.convolutional instead of convolutionalFlat NeuralNetConfiguration conf2a = NeuralNetConfiguration.builder() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder().nOut(10) + .layer(0, org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder().nOut(10) .kernelSize(5, 5).stride(1, 1).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nOut(6).build()) - .layer(2, new RnnOutputLayer.Builder().nIn(6).nOut(5).activation(Activation.SOFTMAX).build()) + .layer(1, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nOut(6).build()) + .layer(2, RnnOutputLayer.builder().nIn(6).nOut(5).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)).build(); //Expect preprocessors: 1: CNN->FF; 2: FF->RNN assertEquals(2, conf2a.getInputPreProcessors().size()); @@ -438,10 +438,10 @@ public class TestPreProcessors extends BaseDL4JTest { //FF->CNN and CNN->RNN: NeuralNetConfiguration conf3 = NeuralNetConfiguration.builder().list() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder().nOut(10) + .layer(0, org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder().nOut(10) .kernelSize(5, 5).stride(1, 1).build()) - .layer(1, new GravesLSTM.Builder().nOut(6).build()) - .layer(2, new RnnOutputLayer.Builder().nIn(6).nOut(5).activation(Activation.SOFTMAX).build()) + .layer(1, GravesLSTM.builder().nOut(6).build()) + .layer(2, RnnOutputLayer.builder().nIn(6).nOut(5).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); //Expect preprocessors: 0: FF->CNN, 1: CNN->RNN; assertEquals(2, conf3.getInputPreProcessors().size()); @@ -454,16 +454,16 @@ public class TestPreProcessors extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list().layer(0, - new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( + org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder( 4, 4) // 28*28*1 => 15*15*10 .nIn(1).nOut(10).padding(2, 2) .stride(2, 2) .weightInit(WeightInit.RELU) .activation(Activation.RELU) .build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder() + .layer(1, org.deeplearning4j.nn.conf.layers.DenseLayer.builder() .activation(Activation.RELU).nOut(200).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(200) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(200) .nOut(5).weightInit(WeightInit.RELU) .activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java index 8977d1b3f..6fce9023e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/weightnoise/TestWeightNoise.java @@ -67,9 +67,9 @@ public class TestWeightNoise extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .weightNoise(wn) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -86,9 +86,9 @@ public class TestWeightNoise extends BaseDL4JTest { .weightNoise(wn) .graphBuilder() .addInputs("in") - .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") - .layer("1", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build(), "0") - .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") + .layer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in") + .layer("1", DenseLayer.builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build(), "0") + .layer("2", OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") .setOutputs("2") .build(); @@ -145,9 +145,9 @@ public class TestWeightNoise extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn1).build()) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn2).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).weightNoise(wn3).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).weightNoise(wn1).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).weightNoise(wn2).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).weightNoise(wn3).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -170,9 +170,9 @@ public class TestWeightNoise extends BaseDL4JTest { ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn1).build(), "in") - .layer("1", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(wn2).build(), "0") - .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).weightNoise(wn3).build(), "1") + .layer("0", DenseLayer.builder().nIn(10).nOut(10).weightNoise(wn1).build(), "in") + .layer("1", DenseLayer.builder().nIn(10).nOut(10).weightNoise(wn2).build(), "0") + .layer("2", OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).weightNoise(wn3).build(), "1") .setOutputs("2") .build(); @@ -249,7 +249,7 @@ public class TestWeightNoise extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.ONES) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java index 2f2a316dd..1aabd53e2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java @@ -305,9 +305,9 @@ public class DTypeTests extends BaseDL4JTest { .updater(new Adam(0.01)) .dataType(DataType.DOUBLE) .list() - .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(10).nOut(10).build()) - .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(10).nOut(10).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(DenseLayer.builder().activation(Activation.TANH).nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().activation(Activation.TANH).nIn(10).nOut(10).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -389,9 +389,9 @@ public class DTypeTests extends BaseDL4JTest { .dataType(DataType.DOUBLE) .graphBuilder() .addInputs("in") - .layer("l0", new DenseLayer.Builder().activation(Activation.TANH).nIn(10).nOut(10).build(), "in") - .layer("l1", new DenseLayer.Builder().activation(Activation.TANH).nIn(10).nOut(10).build(), "l0") - .layer("out", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "l1") + .layer("l0", DenseLayer.builder().activation(Activation.TANH).nIn(10).nOut(10).build(), "in") + .layer("l1", DenseLayer.builder().activation(Activation.TANH).nIn(10).nOut(10).build(), "l0") + .layer("out", OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "l1") .setOutputs("out") .build(); @@ -477,24 +477,24 @@ public class DTypeTests extends BaseDL4JTest { LayerConfiguration secondLast; switch (outputLayer) { case 0: - ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new GlobalPoolingLayer(PoolingType.MAX); + ol = OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); + secondLast = GlobalPoolingLayer.builder(PoolingType.MAX).build(); break; case 1: - ol = new LossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new FrozenLayerWithBackprop(new DenseLayer.Builder().nOut(10).activation(Activation.SIGMOID).build()); + ol = LossLayer.builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT.getILossFunction()).build(); + secondLast = FrozenLayerWithBackprop.builder().underlying(DenseLayer.builder().nOut(10).activation(Activation.SIGMOID).build()).build(); break; case 2: - ol = new CenterLossOutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new VariationalAutoencoder.Builder().encoderLayerSizes(10).decoderLayerSizes(10).nOut(10).activation(Activation.SIGMOID).build(); + ol =CenterLossOutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); + secondLast = VariationalAutoencoder.builder().encoderLayerSizes(10).decoderLayerSizes(10).nOut(10).activation(Activation.SIGMOID).build(); break; case 3: - ol = new CnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(3).activation(Activation.TANH).build(); + ol = CnnLossLayer.builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT.getILossFunction()).build(); + secondLast = ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(3).activation(Activation.TANH).build(); break; case 4: - ol = new Yolo2OutputLayer.Builder().boundingBoxPriors(Nd4j.create(new double[][]{{1.0, 1.0}, {2.0, 2.0}}).castTo(networkDtype)).build(); - secondLast = new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(14).activation(Activation.TANH).build(); + ol = Yolo2OutputLayer.builder().boundingBoxes(Nd4j.create(new double[][]{{1.0, 1.0}, {2.0, 2.0}}).castTo(networkDtype)).build(); + secondLast = ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(14).activation(Activation.TANH).build(); break; default: throw new RuntimeException(); @@ -505,28 +505,28 @@ public class DTypeTests extends BaseDL4JTest { .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) - .list() - .layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(3).activation(Activation.TANH).build()) - .layer(new LocalResponseNormalization()) - .layer(new DropoutLayer(0.5)) - .layer(new DropoutLayer(new AlphaDropout(0.5))) - .layer(new DropoutLayer(new GaussianDropout(0.5))) - .layer(new DropoutLayer(new GaussianNoise(0.1))) - .layer(new DropoutLayer(new SpatialDropout(0.5))) - .layer(new SubsamplingLayer.Builder().poolingType(SubsamplingLayer.PoolingType.AVG).kernelSize(3, 3).stride(2, 2).build()) - .layer(new Pooling2D.Builder().poolingType(SubsamplingLayer.PoolingType.AVG).kernelSize(2, 2).stride(1, 1).build()) - .layer(new Deconvolution2D.Builder().kernelSize(2, 2).stride(2, 2).nOut(3).activation(Activation.TANH).build()) -// .layer(new LocallyConnected2D.Builder().nOut(3).kernelSize(2,2).stride(1,1).activation(Activation.SIGMOID).build()) //EXCEPTION - .layer(new ZeroPaddingLayer(1, 1)) - .layer(new Cropping2D(1, 1)) - .layer(new IdentityLayer()) - .layer(new Upsampling2D.Builder().size(2).build()) - .layer(new SubsamplingLayer.Builder().kernelSize(2, 2).stride(2, 2).build()) - .layer(new DepthwiseConvolution2D.Builder().nOut(3).activation(Activation.RELU).build()) - .layer(new SeparableConvolution2D.Builder().nOut(3).activation(Activation.HARDTANH).build()) - .layer(new MaskLayer()) - .layer(new BatchNormalization.Builder().build()) - .layer(new ActivationLayer(Activation.LEAKYRELU)) + + .layer(ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(3).activation(Activation.TANH).build()) + .layer(LocalResponseNormalization.builder()) + .layer(DropoutLayer.builder(0.5).build()) + .layer(DropoutLayer.builder(new AlphaDropout(0.5)).build()) + .layer(DropoutLayer.builder(new GaussianDropout(0.5)).build()) + .layer(DropoutLayer.builder(new GaussianNoise(0.1)).build()) + .layer(DropoutLayer.builder(new SpatialDropout(0.5)).build()) + .layer(SubsamplingLayer.builder().poolingType(SubsamplingLayer.PoolingType.AVG.toPoolingType()).kernelSize(3, 3).stride(2, 2).build()) + .layer(Pooling2D.builder().poolingType(SubsamplingLayer.PoolingType.AVG.toPoolingType()).kernelSize(2, 2).stride(1, 1).build()) + .layer(Deconvolution2D.builder().kernelSize(2, 2).stride(2, 2).nOut(3).activation(Activation.TANH).build()) +// .layer(LocallyConnected2D.builder().nOut(3).kernelSize(2,2).stride(1,1).activation(Activation.SIGMOID).build()) //EXCEPTION + .layer(ZeroPaddingLayer.builder(1, 1).build()) + .layer(Cropping2D.builder(1, 1).build()) + .layer(IdentityLayer.builder()) + .layer(Upsampling2D.builder().size(2).build()) + .layer(SubsamplingLayer.builder().kernelSize(2, 2).stride(2, 2).build()) + .layer(DepthwiseConvolution2D.builder().nOut(3).activation(Activation.RELU).build()) + .layer(SeparableConvolution2D.builder().nOut(3).activation(Activation.HARDTANH).build()) + .layer(MaskLayer.builder()) + .layer(BatchNormalization.builder().build()) + .layer(ActivationLayer.builder().activation(Activation.LEAKYRELU).build()) .layer(secondLast) .layer(ol) .inputType(InputType.convolutionalFlat(8, 8, 1)) @@ -603,16 +603,16 @@ public class DTypeTests extends BaseDL4JTest { LayerConfiguration secondLast; switch (outputLayer) { case 0: - ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new GlobalPoolingLayer(PoolingType.AVG); + ol = OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); + secondLast = GlobalPoolingLayer.builder(PoolingType.AVG).build(); break; case 1: - ol = new Cnn3DLossLayer.Builder(Convolution3D.DataFormat.NCDHW).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new Convolution3D.Builder().nOut(3).activation(Activation.ELU).build(); + ol = Cnn3DLossLayer.builder().dataFormat(Convolution3D.DataFormat.NCDHW).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT.getILossFunction()).build(); + secondLast = Convolution3D.builder().nOut(3).activation(Activation.ELU).build(); break; case 2: - ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new Convolution3D.Builder().nOut(3).activation(Activation.ELU).build(); + ol = OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); + secondLast = Convolution3D.builder().nOut(3).activation(Activation.ELU).build(); break; default: throw new RuntimeException(); @@ -623,15 +623,15 @@ public class DTypeTests extends BaseDL4JTest { .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Nesterovs(1e-2, 0.9)) - .list() - .layer(new Convolution3D.Builder().kernelSize(2, 2, 2).stride(1, 1, 1).nOut(3).activation(Activation.TANH).build()) - .layer(new Convolution3D.Builder().kernelSize(2, 2, 2).stride(1, 1, 1).nOut(3).activation(Activation.TANH).build()) - .layer(new Subsampling3DLayer.Builder().poolingType(PoolingType.AVG).kernelSize(2, 2, 2).stride(2, 2, 2).build()) - .layer(new Deconvolution3D.Builder().kernelSize(2,2,2).stride(1,1,1).nIn(3).nOut(3).activation(Activation.TANH).build()) - .layer(new Cropping3D.Builder(1, 1, 1, 1, 1, 1).build()) - .layer(new ZeroPadding3DLayer.Builder(1, 1, 1, 1, 1, 1).build()) - .layer(new ActivationLayer(Activation.LEAKYRELU)) - .layer(new Upsampling3D.Builder().size(2).build()) + + .layer(Convolution3D.builder().kernelSize(2, 2, 2).stride(1, 1, 1).nOut(3).activation(Activation.TANH).build()) + .layer(Convolution3D.builder().kernelSize(2, 2, 2).stride(1, 1, 1).nOut(3).activation(Activation.TANH).build()) + .layer(Subsampling3DLayer.builder().poolingType(PoolingType.AVG).kernelSize(2, 2, 2).stride(2, 2, 2).build()) + .layer(Deconvolution3D.builder().kernelSize(2,2,2).stride(1,1,1).nIn(3).nOut(3).activation(Activation.TANH).build()) + .layer(Cropping3D.builder(1, 1, 1, 1, 1, 1).build()) + .layer(ZeroPadding3DLayer.builder(1, 1, 1, 1, 1, 1).build()) + .layer(ActivationLayer.builder(Activation.LEAKYRELU).build()) + .layer(Upsampling3D.builder().size(2).build()) .layer(secondLast) .layer(ol) .inputType(InputType.convolutional3D(Convolution3D.DataFormat.NCDHW, 8, 8, 8, 1)) @@ -714,16 +714,16 @@ public class DTypeTests extends BaseDL4JTest { LayerConfiguration secondLast; switch (outputLayer) { case 0: - ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new GlobalPoolingLayer(PoolingType.MAX); + ol = OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); + secondLast = GlobalPoolingLayer.builder(PoolingType.MAX).build(); break; case 1: - ol = new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(5).build(); - secondLast = new Convolution1D.Builder().kernelSize(2).nOut(5).build(); + ol = RnnOutputLayer.builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(5).build(); + secondLast = Convolution1D.builder().kernelSize(2).nOut(5).build(); break; case 2: - ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new Convolution1D.Builder().kernelSize(2).nOut(5).build(); + ol = RnnLossLayer.builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT.getILossFunction()).build(); + secondLast = Convolution1D.builder().kernelSize(2).nOut(5).build(); break; default: throw new RuntimeException(); @@ -737,14 +737,14 @@ public class DTypeTests extends BaseDL4JTest { .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) .list() - .layer(new Convolution1D.Builder() + .layer(Convolution1D.builder() .kernelSize(2) .stride(1).nOut(3). activation(Activation.TANH).build()) - .layer(new Subsampling1DLayer.Builder().poolingType(PoolingType.MAX).kernelSize(5).stride(1).build()) - .layer(new Cropping1D.Builder(1).build()) - .layer(new ZeroPadding1DLayer(1)) - .layer(new Upsampling1D.Builder(2).build()) + .layer(Subsampling1DLayer.builder().poolingType(PoolingType.MAX).kernelSize(5).stride(1).build()) + .layer(Cropping1D.builder(1).build()) + .layer(ZeroPadding1DLayer.builder(1).build()) + .layer(Upsampling1D.builder(2).build()) .layer(secondLast) .layer(ol) .inputType(InputType.recurrent(5, 10,RNNFormat.NCW)) @@ -817,9 +817,9 @@ public class DTypeTests extends BaseDL4JTest { .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) .list() - .layer(new SpaceToBatchLayer.Builder().blocks(1, 1).build()) - .layer(new SpaceToDepthLayer.Builder().blocks(2).build()) - .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(SpaceToBatchLayer.builder().blockSize(1, 1).build()) + .layer(SpaceToDepthLayer.builder().blockSize(2).build()) + .layer(OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.convolutional(28, 28, 5)) .build(); @@ -880,16 +880,16 @@ public class DTypeTests extends BaseDL4JTest { LayerConfiguration secondLast; switch (outputLayer) { case 0: - ol = new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build(); + ol = RnnOutputLayer.builder().nOut(5).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); + secondLast = SimpleRnn.builder().nOut(5).activation(Activation.TANH).build(); break; case 1: - ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); - secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build(); + ol = RnnLossLayer.builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); + secondLast = SimpleRnn.builder().nOut(5).activation(Activation.TANH).build(); break; case 2: - ol = new OutputLayer.Builder().nOut(5).build(); - secondLast = new LastTimeStep(new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build()); + ol = OutputLayer.builder().nOut(5).build(); + secondLast = LastTimeStep.builder().underlying(SimpleRnn.builder().nOut(5).activation(Activation.TANH).build()).build(); break; default: throw new RuntimeException(); @@ -899,15 +899,15 @@ public class DTypeTests extends BaseDL4JTest { .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) - .list() - .layer(new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) - .layer(new GravesLSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) - .layer(new DenseLayer.Builder().nOut(5).build()) - .layer(new GravesBidirectionalLSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) - .layer(new Bidirectional(new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())) - .layer(new TimeDistributed(new DenseLayer.Builder().nIn(10).nOut(5).activation(Activation.TANH).build())) - .layer(new SimpleRnn.Builder().nIn(5).nOut(5).build()) - .layer(new MaskZeroLayer.Builder().underlying(new SimpleRnn.Builder().nIn(5).nOut(5).build()).maskValue(0.0).build()) + + .layer(LSTM.builder().nIn(5).nOut(5).activation(Activation.TANH).build()) + .layer(GravesLSTM.builder().nIn(5).nOut(5).activation(Activation.TANH).build()) + .layer(DenseLayer.builder().nOut(5).build()) + .layer(GravesBidirectionalLSTM.builder().nIn(5).nOut(5).activation(Activation.TANH).build()) + .layer(Bidirectional.builder(LSTM.builder().nIn(5).nOut(5).activation(Activation.TANH).build()).build()) + .layer(TimeDistributed.builder().underlying(DenseLayer.builder().nIn(10).nOut(5).activation(Activation.TANH).build()).build()) + .layer(SimpleRnn.builder().nIn(5).nOut(5).build()) + .layer(MaskZeroLayer.builder().underlying(SimpleRnn.builder().nIn(5).nOut(5).build()).maskingValue(0.0).build()) .layer(secondLast) .layer(ol) .build(); @@ -986,14 +986,14 @@ public class DTypeTests extends BaseDL4JTest { .updater(new NoOp()) .dist(new UniformDistribution(-6, 6)) - .layer(new PrimaryCapsules.Builder(primaryCapsDim, primarpCapsChannel) + .layer(PrimaryCapsules.builder(primaryCapsDim, primarpCapsChannel) .kernelSize(3, 3) .stride(2, 2) .build()) - .layer(new CapsuleLayer.Builder(capsule, capsuleDim, routing).build()) - .layer(new CapsuleStrengthLayer.Builder().build()) - .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build()) - .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build()) + .layer(CapsuleLayer.builder(capsule, capsuleDim, routing).build()) + .layer(CapsuleStrengthLayer.builder().build()) + .layer(ActivationLayer.builder(new ActivationSoftmax()).build()) + .layer(LossLayer.builder().lossFunction(new LossNegativeLogLikelihood()).build()) .inputType(InputType.convolutional(height, width, inputDepth)) .build(); @@ -1062,33 +1062,33 @@ public class DTypeTests extends BaseDL4JTest { INDArray input; if (test == 0) { if (frozen) { - conf.layer("0", new FrozenLayer(new EmbeddingLayer.Builder().nIn(5).nOut(5).build()), "in"); + conf.layer("0", FrozenLayer.builder(EmbeddingLayer.builder().nIn(5).nOut(5).build()).build(), "in"); } else { - conf.layer("0", new EmbeddingLayer.Builder().nIn(5).nOut(5).build(), "in"); + conf.layer("0", EmbeddingLayer.builder().nIn(5).nOut(5).build(), "in"); } input = Nd4j.zeros(networkDtype, 10, 1).muli(5).castTo(DataType.INT); conf.setInputTypes(InputType.feedForward(1)); } else if (test == 1) { if (frozen) { - conf.layer("0", new FrozenLayer(new EmbeddingSequenceLayer.Builder().nIn(5).nOut(5).build()), "in"); + conf.layer("0", FrozenLayer.builder(EmbeddingSequenceLayer.builder().nIn(5).nOut(5).build()).build(), "in"); } else { - conf.layer("0", new EmbeddingSequenceLayer.Builder().nIn(5).nOut(5).build(), "in"); + conf.layer("0", EmbeddingSequenceLayer.builder().nIn(5).nOut(5).build(), "in"); } - conf.layer("gp", new GlobalPoolingLayer.Builder(PoolingType.PNORM).pnorm(2).poolingDimensions(2).build(), "0"); + conf.layer("gp", GlobalPoolingLayer.builder(PoolingType.PNORM).pnorm(2).poolingDimensions(2).build(), "0"); input = Nd4j.zeros(networkDtype, 10, 1, 5).muli(5).castTo(DataType.INT); conf.setInputTypes(InputType.recurrent(1)); } else { - conf.layer("0", new RepeatVector.Builder().repetitionFactor(5).nOut(5).build(), "in"); - conf.layer("gp", new GlobalPoolingLayer.Builder(PoolingType.SUM).build(), "0"); + conf.layer("0", RepeatVector.builder().repetitionFactor(5).nOut(5).build(), "in"); + conf.layer("gp", GlobalPoolingLayer.builder(PoolingType.SUM).build(), "0"); input = Nd4j.zeros(networkDtype, 10, 5); conf.setInputTypes(InputType.feedForward(5)); } - conf.appendLayer("el", new ElementWiseMultiplicationLayer.Builder().nOut(5).build()) - .appendLayer("ae", new AutoEncoder.Builder().nOut(5).build()) - .appendLayer("prelu", new PReLULayer.Builder().nOut(5).inputShape(5).build()) - .appendLayer("out", new OutputLayer.Builder().nOut(10).build()); + conf.appendLayer("el", ElementWiseMultiplicationLayer.builder().nOut(5).build()) + .appendLayer("ae", AutoEncoder.builder().nOut(5).build()) + .appendLayer("prelu", PReLULayer.builder().nOut(5).inputShape(5).build()) + .appendLayer("out", OutputLayer.builder().nOut(10).build()); ComputationGraph net = new ComputationGraph(conf.build()); net.init(); @@ -1153,34 +1153,34 @@ public class DTypeTests extends BaseDL4JTest { switch (test) { case 0: b.addInputs("in") - .addLayer("l", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(1).build(), "in") + .addLayer("l", ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(1).build(), "in") .addVertex("preproc", new PreprocessorVertex(new CnnToRnnPreProcessor(28, 28, 1)), "l") - .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "preproc") + .addLayer("out", OutputLayer.builder().nOut(10).build(), "preproc") .setInputTypes(InputType.convolutional(28, 28, 1)) .setOutputs("out"); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 28, 28)}; break; case 1: b.addInputs("in") - .addLayer("l", new DenseLayer.Builder().nOut(16).build(), "in") + .addLayer("l", DenseLayer.builder().nOut(16).build(), "in") .addVertex("preproc", new PreprocessorVertex(new FeedForwardToCnn3DPreProcessor(2, 2, 2, 2, true)), "l") .addVertex("preproc2", new PreprocessorVertex(new PermutePreprocessor(0, 2, 3, 4, 1)), "preproc") .addVertex("preproc3", new PreprocessorVertex(new ReshapePreprocessor(new long[]{2, 2, 2, 2}, new long[]{16}, false)), "preproc2") - .addLayer("out", new OutputLayer.Builder().nIn(16).nOut(10).build(), "preproc3") + .addLayer("out", OutputLayer.builder().nIn(16).nOut(10).build(), "preproc3") .setInputTypes(InputType.feedForward(5)) .setOutputs("out"); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 5)}; break; case 2: b.addInputs("in") - .addLayer("1", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(1).build(), "in") + .addLayer("1", ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(1).build(), "in") .addVertex("1a", new PoolHelperVertex(), "1") .addVertex("2", new ShiftVertex(1), "1a") .addVertex("3", new ScaleVertex(2), "2") .addVertex("4", new ReshapeVertex(2, -1), "3") .addVertex("5", new SubsetVertex(0, 99), "4") .addVertex("6", new L2NormalizeVertex(), "5") - .addLayer("out", new OCNNOutputLayer.Builder().hiddenLayerSize(10).nIn(100).build(), "6") + .addLayer("out",OCNNOutputLayer.builder().hiddenLayerSize(10).nIn(100).build(), "6") .setInputTypes(InputType.convolutional(28, 28, 1)) .setOutputs("out"); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 28, 28)}; @@ -1193,23 +1193,23 @@ public class DTypeTests extends BaseDL4JTest { .addVertex("3", new StackVertex(), "2a", "2b") .addVertex("4", new DuplicateToTimeSeriesVertex("in3"), "3") .addVertex("5", new ReverseTimeSeriesVertex(), "4") - .addLayer("6", new GlobalPoolingLayer(PoolingType.AVG), "5") + .addLayer("6", GlobalPoolingLayer.builder(PoolingType.AVG).build(), "5") .addVertex("7", new LastTimeStepVertex("in3"), "in3") .addVertex("8", new MergeVertex(), "6", "7") .addVertex("9", new PreprocessorVertex(new ComposableInputPreProcessor()), "8") - .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "9") + .addLayer("out", OutputLayer.builder().nOut(10).build(), "9") .setInputTypes(InputType.feedForward(8), InputType.feedForward(8), InputType.recurrent(8)) .setOutputs("out"); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 8), Nd4j.rand(networkDtype, 2, 8), Nd4j.rand(networkDtype, 2, 8, 5)}; break; case 4: b.addInputs("in1", "in2") - .addLayer("1", new LSTM.Builder().nOut(8).build(), "in1") + .addLayer("1", LSTM.builder().nOut(8).build(), "in1") .addVertex("preproc1", new PreprocessorVertex(new RnnToCnnPreProcessor(2, 2, 2)), "1") .addVertex("preproc2", new PreprocessorVertex(new CnnToRnnPreProcessor(2, 2, 2)), "preproc1") - .addLayer("pool", new GlobalPoolingLayer(), "preproc2") - .addLayer("pool2", new GlobalPoolingLayer(), "in2") - .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "pool", "pool2") + .addLayer("pool", GlobalPoolingLayer.builder().build(), "preproc2") + .addLayer("pool2", GlobalPoolingLayer.builder().build(), "in2") + .addLayer("out", OutputLayer.builder().nOut(10).build(), "pool", "pool2") .setInputTypes(InputType.recurrent(8), InputType.convolutional(28, 28, 1)) .setOutputs("out"); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 8, 5), Nd4j.rand(networkDtype, 2, 1, 28, 28)}; @@ -1217,28 +1217,28 @@ public class DTypeTests extends BaseDL4JTest { case 5: b.addInputs("in1", "in2") .addVertex("fv", new FrozenVertex(new ScaleVertex(2.0)), "in1") - .addLayer("1", new DenseLayer.Builder().nOut(5).build(), "fv") - .addLayer("2", new DenseLayer.Builder().nOut(5).build(), "in2") + .addLayer("1", DenseLayer.builder().nOut(5).build(), "fv") + .addLayer("2", DenseLayer.builder().nOut(5).build(), "in2") .addVertex("v", new L2Vertex(), "1", "2") - .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "v") + .addLayer("out", OutputLayer.builder().nOut(10).build(), "v") .setInputTypes(InputType.feedForward(5), InputType.feedForward(5)) .setOutputs("out"); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 5), Nd4j.rand(networkDtype, 2, 5)}; break; case 6: b.addInputs("in") - .addLayer("1", new LSTM.Builder().nOut(5).build(), "in") + .addLayer("1", LSTM.builder().nOut(5).build(), "in") .addVertex("2", new PreprocessorVertex(new KerasFlattenRnnPreprocessor(5, 4)), "1") - .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2") + .addLayer("out", OutputLayer.builder().nOut(10).build(), "2") .setOutputs("out") .setInputTypes(InputType.recurrent(5, 4)); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 5, 4)}; break; case 7: b.addInputs("in") - .addLayer("1", new ConvolutionLayer.Builder().kernelSize(2, 2).nOut(5).convolutionMode(ConvolutionMode.Same).build(), "in") + .addLayer("1", ConvolutionLayer.builder().kernelSize(2, 2).nOut(5).convolutionMode(ConvolutionMode.Same).build(), "in") .addVertex("2", new PreprocessorVertex(new CnnToFeedForwardPreProcessor(28, 28, 5)), "1") - .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2") + .addLayer("out", OutputLayer.builder().nOut(10).build(), "2") .setOutputs("out") .setInputTypes(InputType.convolutional(28, 28, 1)); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 28, 28)}; @@ -1311,9 +1311,9 @@ public class DTypeTests extends BaseDL4JTest { switch (test) { case 0: b.addInputs("in") - .addLayer("1", new LSTM.Builder().nOut(5).build(), "in") - .addLayer("2", new LocallyConnected1D.Builder().kernelSize(2).nOut(4).build(), "1") - .addLayer("out", new RnnOutputLayer.Builder().nOut(10).build(), "2") + .addLayer("1", LSTM.builder().nOut(5).build(), "in") + .addLayer("2", LocallyConnected1D.builder().kernelSize(2).nOut(4).build(), "1") + .addLayer("out", RnnOutputLayer.builder().nOut(10).build(), "2") .setOutputs("out") .setInputTypes(InputType.recurrent(5, 2)); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 5, 2)}; @@ -1321,9 +1321,9 @@ public class DTypeTests extends BaseDL4JTest { break; case 1: b.addInputs("in") - .addLayer("1", new ConvolutionLayer.Builder().kernelSize(2, 2).nOut(5).convolutionMode(ConvolutionMode.Same).build(), "in") - .addLayer("2", new LocallyConnected2D.Builder().kernelSize(2, 2).nOut(5).build(), "1") - .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2") + .addLayer("1", ConvolutionLayer.builder().kernelSize(2, 2).nOut(5).convolutionMode(ConvolutionMode.Same).build(), "in") + .addLayer("2", LocallyConnected2D.builder().kernelSize(2, 2).nOut(5).build(), "1") + .addLayer("out", OutputLayer.builder().nOut(10).build(), "2") .setOutputs("out") .setInputTypes(InputType.convolutional(8, 8, 1)); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 8, 8)}; @@ -1399,12 +1399,12 @@ public class DTypeTests extends BaseDL4JTest { .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .list() - .layer(new LSTM.Builder().nOut(layerSize).build()) - .layer(new SelfAttentionLayer.Builder().nOut(8).nHeads(2).projectInput(true).build()) - .layer(new LearnedSelfAttentionLayer.Builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build()) - .layer(new RecurrentAttentionLayer.Builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) - .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) - .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) + .layer(LSTM.builder().nOut(layerSize).build()) + .layer(SelfAttentionLayer.builder().nOut(8).nHeads(2).projectInput(true).build()) + .layer(LearnedSelfAttentionLayer.builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build()) + .layer(RecurrentAttentionLayer.builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build()) + .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(nIn)) .build(); @@ -1487,12 +1487,12 @@ public class DTypeTests extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .graphBuilder() .addInputs("input") - .addLayer("lstmKeys", new LSTM.Builder().nOut(layerSize).build(), "input") - .addLayer("lstmQueries", new LSTM.Builder().nOut(layerSize).build(), "input") - .addLayer("lstmValues", new LSTM.Builder().nOut(layerSize).build(), "input") + .addLayer("lstmKeys", LSTM.builder().nOut(layerSize).build(), "input") + .addLayer("lstmQueries", LSTM.builder().nOut(layerSize).build(), "input") + .addLayer("lstmValues", LSTM.builder().nOut(layerSize).build(), "input") .addVertex("attention", new AttentionVertex.Builder().nOut(8).nHeads(2).projectInput(true).nInQueries(layerSize).nInKeys(layerSize).nInValues(layerSize).build(), "lstmQueries", "lstmKeys", "lstmValues") - .addLayer("pooling", new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build(), "attention") - .addLayer("output", new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling") + .addLayer("pooling", GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build(), "attention") + .addLayer("output", OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "pooling") .setOutputs("output") .setInputTypes(InputType.recurrent(nIn)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java index 4197263b6..e0891ee77 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java @@ -68,18 +68,18 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { //4 layer network: 2 GravesLSTM + DenseLayerConfiguration + RnnOutputLayer. Hence also tests preprocessors. ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7) + .addLayer("0", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(5).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .addLayer("1", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(), "0") - .addLayer("2", new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH) + .addLayer("2", DenseLayer.builder().nIn(8).nOut(9).activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)) .build(), "1") - .addLayer("3", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("3", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(9).nOut(4) .activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)).build(), "2") @@ -157,15 +157,15 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { int timeSeriesLength = 6; ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7) + .addLayer("0", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(5).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .addLayer("1", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)) .build(), "0") - .addLayer("2", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("2", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(8).nOut(4) .activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)).build(), "1") @@ -214,27 +214,27 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in0", "in1") .addLayer("lstm0", - new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(6) + org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(5).nOut(6) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(), "in0") .addLayer("lstm1", - new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(4).nOut(5) + org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(4).nOut(5) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(), "in1") - .addLayer("dense", new DenseLayer.Builder().nIn(6 + 5).nOut(9).activation(Activation.TANH) + .addLayer("dense", DenseLayer.builder().nIn(6 + 5).nOut(9).activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)) .build(), "lstm0", "lstm1") - .addLayer("out0", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("out0", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(9).nOut(3) .activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)) .build(), "dense") - .addLayer("out1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("out1", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(9).nOut(4) .activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)).build(), "dense") @@ -344,15 +344,15 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { .trainingWorkspaceMode(WorkspaceMode.NONE).inferenceWorkspaceMode(WorkspaceMode.NONE) .graphBuilder() .addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) + .addLayer("0", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .addLayer("1", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)) .build(), "0") - .addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("out", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(8).nOut(nOut) .activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)).build(), "1") @@ -364,15 +364,15 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { .trainingWorkspaceMode(WorkspaceMode.NONE).inferenceWorkspaceMode(WorkspaceMode.NONE) .graphBuilder() .addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) + .addLayer("0", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .addLayer("1", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)) .build(), "0") - .addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("out", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(8).nOut(nOut) .activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)).build(), "1") @@ -459,15 +459,15 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) + .addLayer("0", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .addLayer("1", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)) .build(), "0") - .addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("out", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(8).nOut(nOut) .activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)).build(), "1") @@ -496,15 +496,15 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) + .addLayer("0", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .addLayer("1", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)) .build(), "0") - .addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("out", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(8).nOut(nOut) .activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)).build(), "1") @@ -532,7 +532,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { //Simple "does it throw an exception" type test... ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .graphBuilder().addInputs("in") - .addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE) + .addLayer("out", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY).nIn(1).nOut(1).build(), "in") .setOutputs("out").backpropType(BackpropType.TruncatedBPTT).tbpttFwdLength(8) .setInputTypes(InputType.recurrent(1,1,RNNFormat.NCW)) @@ -555,7 +555,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { //Simple "does it throw an exception" type test... ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .graphBuilder().addInputs("in") - .addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE) + .addLayer("out", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY).nIn(1).nOut(1).build(), "in") .setOutputs("out").backpropType(tbptt ? BackpropType.TruncatedBPTT : BackpropType.Standard) .tbpttFwdLength(8).tbpttBackLength(8).build(); @@ -619,9 +619,9 @@ public class ComputationGraphTestRNN extends BaseDL4JTest { NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("0", new org.deeplearning4j.nn.conf.layers.LSTM.Builder().nIn(nIn).nOut(nHiddenUnits).build(), "in") - .layer("1", new GlobalPoolingLayer(), "0") - .layer("2", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(nHiddenUnits) + .layer("0", org.deeplearning4j.nn.conf.layers.LSTM.builder().nIn(nIn).nOut(nHiddenUnits).build(), "in") + .layer("1", GlobalPoolingLayer.builder().build(), "0") + .layer("2", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(nHiddenUnits) .nOut(nOut) .activation(Activation.TANH).build(), "1") .setOutputs("2") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java index 4129592b6..c3d030e45 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java @@ -62,19 +62,19 @@ public class TestCompGraphCNN extends BaseDL4JTest { .graphBuilder().addInputs("input") .setInputTypes(InputType.convolutional(32, 32, 3)) .addLayer("cnn1", - new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(3).nOut(3) + ConvolutionLayer.builder(4, 4).stride(2, 2).nIn(3).nOut(3) .build(), "input") .addLayer("cnn2", - new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(3).nOut(3) + ConvolutionLayer.builder(4, 4).stride(2, 2).nIn(3).nOut(3) .build(), "input") .addLayer("max1", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .stride(1, 1).kernelSize(2, 2).build(), "cnn1", "cnn2") - .addLayer("dnn1", new DenseLayer.Builder().nOut(7).build(), "max1") - .addLayer("output", new OutputLayer.Builder().nIn(7).nOut(10).activation(Activation.SOFTMAX).build(), "dnn1") + .addLayer("dnn1", DenseLayer.builder().nOut(7).build(), "max1") + .addLayer("output", OutputLayer.builder().nIn(7).nOut(10).activation(Activation.SOFTMAX).build(), "dnn1") .setOutputs("output").build(); return conf; @@ -159,19 +159,19 @@ public class TestCompGraphCNN extends BaseDL4JTest { .seed(123).graphBuilder().addInputs("input") .setInputTypes(InputType.convolutional(nChannels, imageWidth, imageHeight)) - .addLayer("conv1", new ConvolutionLayer.Builder() + .addLayer("conv1", ConvolutionLayer.builder() .kernelSize(kernelHeight, kernelWidth).stride(1, 1) - .dataFormat(CNN2DFormat.NCHW) + .convFormat(CNN2DFormat.NCHW) .nIn(nChannels).nOut(2).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build(), "input") .addLayer("pool1", - new SubsamplingLayer.Builder() + SubsamplingLayer.builder() .dataFormat(CNN2DFormat.NCHW) - .poolingType(SubsamplingLayer.PoolingType.MAX) + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()) .kernelSize(imageHeight - kernelHeight + 1, 1) .stride(1, 1).build(), "conv1") - .addLayer("output", new OutputLayer.Builder().nOut(classes).activation(Activation.SOFTMAX).build(), "pool1") + .addLayer("output", OutputLayer.builder().nOut(classes).activation(Activation.SOFTMAX).build(), "pool1") .setOutputs("output").build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java index 2cf9e0db4..51a7d55e8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphUnsupervised.java @@ -67,7 +67,7 @@ public class TestCompGraphUnsupervised extends BaseDL4JTest { .trainingWorkspaceMode(wsm) .graphBuilder() .addInputs("in") - .addLayer("vae1", new VariationalAutoencoder.Builder() + .addLayer("vae1", VariationalAutoencoder.builder() .nIn(784) .nOut(32) .encoderLayerSizes(16) @@ -76,7 +76,7 @@ public class TestCompGraphUnsupervised extends BaseDL4JTest { .pzxActivationFunction(Activation.SIGMOID) .reconstructionDistribution(new BernoulliReconstructionDistribution(Activation.SIGMOID)) .build(), "in") - .addLayer("vae2", new VariationalAutoencoder.Builder() + .addLayer("vae2", VariationalAutoencoder.builder() .nIn(32) .nOut(8) .encoderLayerSizes(16) @@ -142,7 +142,7 @@ public class TestCompGraphUnsupervised extends BaseDL4JTest { .inferenceWorkspaceMode(wsm) .trainingWorkspaceMode(wsm) - .layer(new VariationalAutoencoder.Builder() + .layer(VariationalAutoencoder.builder() .nIn(784) .nOut(32) .encoderLayerSizes(16) @@ -151,7 +151,7 @@ public class TestCompGraphUnsupervised extends BaseDL4JTest { .pzxActivationFunction(Activation.SIGMOID) .reconstructionDistribution(new BernoulliReconstructionDistribution(Activation.SIGMOID)) .build()) - .layer(new VariationalAutoencoder.Builder() + .layer(VariationalAutoencoder.builder() .nIn(32) .nOut(8) .encoderLayerSizes(16) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java index 46180da6d..01295d921 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java @@ -101,16 +101,16 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { return NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input") - .addLayer("firstLayer", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input") - .addLayer("outputLayer", new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "firstLayer") + .addLayer("firstLayer", DenseLayer.builder().nIn(4).nOut(5).build(), "input") + .addLayer("outputLayer", OutputLayer.builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "firstLayer") .setOutputs("outputLayer").build(); } private static NeuralNetConfiguration getIrisMLNConfiguration() { return NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .layer(0, new DenseLayer.Builder().nIn(4).nOut(5).build()) - .layer(1, new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build()).build(); + .layer(0, DenseLayer.builder().nIn(4).nOut(5).build()) + .layer(1, OutputLayer.builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build()).build(); } private static int getNumParams() { @@ -335,8 +335,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .updater(new Sgd(0.1)) .graphBuilder().addInputs("in") - .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(2).nOut(3) + .addLayer("dense", DenseLayer.builder().nIn(4).nOut(2).build(), "in").addLayer("out", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(2).nOut(3) .build(), "dense") .setOutputs("out").build(); @@ -403,8 +403,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //First: check FF -> RNN ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.feedForward(5)) - .addLayer("rnn", new GravesLSTM.Builder().nOut(5).build(), "in") - .addLayer("out", new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).build(), "rnn").setOutputs("out").build(); + .addLayer("rnn", GravesLSTM.builder().nOut(5).build(), "in") + .addLayer("out", RnnOutputLayer.builder().nOut(5).activation(Activation.SOFTMAX).build(), "rnn").setOutputs("out").build(); assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf1.getVertices().get("rnn")).getNetConfiguration().getFlattenedLayerConfigurations().get(0)) .getNIn()); @@ -419,8 +419,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //Check RNN -> FF -> RNN ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.recurrent(5)) - .addLayer("ff", new DenseLayer.Builder().nOut(5).build(), "in") - .addLayer("out", new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).build(), "ff") + .addLayer("ff", DenseLayer.builder().nOut(5).build(), "in") + .addLayer("out", RnnOutputLayer.builder().nOut(5).activation(Activation.SOFTMAX).build(), "ff") .setOutputs("out").build(); assertEquals(5, ((FeedForwardLayer) ((LayerVertex) conf2.getVertices().get("ff")).getNetConfiguration().getFlattenedLayerConfigurations().get(0)) @@ -436,14 +436,14 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //CNN -> Dense ComputationGraphConfiguration conf3 = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.convolutional(28, 28, 1)) - .addLayer("cnn", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(2, 2) + .addLayer("cnn", ConvolutionLayer.builder().kernelSize(2, 2).padding(0, 0).stride(2, 2) .nOut(3).build(), "in") //(28-2+0)/2+1 = 14 .addLayer("pool", - new SubsamplingLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(2, 2) + SubsamplingLayer.builder().kernelSize(2, 2).padding(0, 0).stride(2, 2) .build(), "cnn") //(14-2+0)/2+1=7 - .addLayer("dense", new DenseLayer.Builder().nOut(10).build(), "pool") - .addLayer("out", new OutputLayer.Builder().nIn(10).nOut(5).activation(Activation.SOFTMAX).build(), "dense").setOutputs("out") + .addLayer("dense", DenseLayer.builder().nOut(10).build(), "pool") + .addLayer("out", OutputLayer.builder().nIn(10).nOut(5).activation(Activation.SOFTMAX).build(), "dense").setOutputs("out") .build(); //Check preprocessors: lv1 = (LayerVertex) conf3.getVertices().get("cnn"); @@ -466,16 +466,16 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf4 = NeuralNetConfiguration.builder().graphBuilder().addInputs("inCNN", "inRNN") .setInputTypes(InputType.convolutional(28, 28, 1), InputType.recurrent(5)) - .addLayer("cnn", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0) + .addLayer("cnn", ConvolutionLayer.builder().kernelSize(2, 2).padding(0, 0) .stride(2, 2).nOut(3).build(), "inCNN") //(28-2+0)/2+1 = 14 .addLayer("pool", - new SubsamplingLayer.Builder().kernelSize(2, 2).padding(0, 0) + SubsamplingLayer.builder().kernelSize(2, 2).padding(0, 0) .stride(2, 2).build(), "cnn") //(14-2+0)/2+1=7 - .addLayer("dense", new DenseLayer.Builder().nOut(10).build(), "pool") - .addLayer("dense2", new DenseLayer.Builder().nOut(10).build(), "inRNN") + .addLayer("dense", DenseLayer.builder().nOut(10).build(), "pool") + .addLayer("dense2", DenseLayer.builder().nOut(10).build(), "inRNN") .addVertex("merge", new MergeVertex(), "dense", "dense2") - .addLayer("out", new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).build(), "merge") + .addLayer("out", RnnOutputLayer.builder().nOut(5).activation(Activation.SOFTMAX).build(), "merge") .setOutputs("out").build(); //Check preprocessors: @@ -507,18 +507,18 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .graphBuilder().addInputs("input") .setInputTypes(InputType.convolutional(28, 28, 1)) .addLayer("cnn_1", - new ConvolutionLayer.Builder(2, 2).stride(2, 2).nIn(1).nOut(3) + ConvolutionLayer.builder(2, 2).stride(2, 2).nIn(1).nOut(3) .build(), "input") .addLayer("cnn_2", - new ConvolutionLayer.Builder(4, 4).stride(2, 2).padding(1, 1) + ConvolutionLayer.builder(4, 4).stride(2, 2).padding(1, 1) .nIn(1).nOut(3).build(), "input") .addLayer("max_1", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).build(), "cnn_1", "cnn_2") - .addLayer("output", new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build(), "max_1") //.nIn(7 * 7 * 6) + .addLayer("output", OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build(), "max_1") //.nIn(7 * 7 * 6) .setOutputs("output").build(); lv1 = (LayerVertex) conf5.getVertices().get("cnn_1"); assertNull(lv1.getPreProcessor()); //Expect no preprocessor: cnn data -> cnn layer @@ -578,8 +578,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input") - .addLayer("first_layer", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input") - .addLayer("output_layer", new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "first_layer") + .addLayer("first_layer", DenseLayer.builder().nIn(4).nOut(5).build(), "input") + .addLayer("output_layer", OutputLayer.builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "first_layer") .setOutputs("output_layer").build(); ComputationGraph net = new ComputationGraph(conf); @@ -599,7 +599,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .updater(new Sgd(1e-6)) .l2(2e-4).graphBuilder().addInputs("in") .addLayer("layer0", - new VariationalAutoencoder.Builder().nIn(4).nOut(3) + VariationalAutoencoder.builder().nIn(4).nOut(3) .dist(new UniformDistribution(0, 1)) @@ -608,7 +608,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .build(), "in") .addLayer("layer1", - new VariationalAutoencoder.Builder().nIn(4).nOut(3) + VariationalAutoencoder.builder().nIn(4).nOut(3) .dist(new UniformDistribution(0, 1)) @@ -617,7 +617,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .build(), "in") .addLayer("layer2", - new VariationalAutoencoder.Builder().nIn(3).nOut(3) + VariationalAutoencoder.builder().nIn(3).nOut(3) .dist(new UniformDistribution(0, 1)) @@ -625,7 +625,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE) .build(), "layer1") - .addLayer("out", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("out", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3 + 3).nOut(3) .dist(new UniformDistribution(0, 1)) @@ -652,9 +652,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .updater(new Sgd(0.1)) .activation(Activation.TANH).weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(20).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(20).nOut(30).build(), "0") - .addLayer("2", new OutputLayer.Builder() + .addLayer("0", DenseLayer.builder().nIn(nIn).nOut(20).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(20).nOut(30).build(), "0") + .addLayer("2", OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut) .build(), "1") .setOutputs("2").build(); @@ -662,9 +662,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration confNoReg = NeuralNetConfiguration.builder().seed(12345).updater(new Sgd(0.1)).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(20).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(20).nOut(30).build(), "0") - .addLayer("2", new OutputLayer.Builder() + .addLayer("0", DenseLayer.builder().nIn(nIn).nOut(20).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(20).nOut(30).build(), "0") + .addLayer("2", OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut) .build(), "1") .setOutputs("2").build(); @@ -720,8 +720,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration standard = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .trainingWorkspaceMode(ws).inferenceWorkspaceMode(ws) .seed(12345).graphBuilder().addInputs("in") - .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") - .addLayer("out", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) + .addLayer("l0", DenseLayer.builder().nIn(10).nOut(10).build(), "in") + .addLayer("out", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) .nOut(10).build(), "l0") .setOutputs("out").build(); ComputationGraph s = new ComputationGraph(standard); @@ -732,7 +732,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration external = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .trainingWorkspaceMode(ws).inferenceWorkspaceMode(ws) .seed(12345).graphBuilder().addInputs("in") - .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("l0") + .addLayer("l0", DenseLayer.builder().nIn(10).nOut(10).build(), "in").setOutputs("l0") .build(); ComputationGraph e = new ComputationGraph(external); @@ -778,9 +778,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .graphBuilder() .addInputs("features") .addVertex("rnn2ffn", new PreprocessorVertex(new RnnToFeedForwardPreProcessor()), "features") - .addLayer("predict", new DenseLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.RELU).build(), "rnn2ffn") + .addLayer("predict", DenseLayer.builder().nIn(nIn).nOut(nOut).activation(Activation.RELU).build(), "rnn2ffn") .addVertex("ffn2rnn", new PreprocessorVertex(new FeedForwardToRnnPreProcessor()), "predict") - .addLayer("output", new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), "ffn2rnn") + .addLayer("output", ActivationLayer.builder().activation(Activation.IDENTITY).build(), "ffn2rnn") .setOutputs("output") .build(); @@ -822,9 +822,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(4).activation(Activation.RELU).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.RELU).build(), "0") - .addLayer("out", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(nOut).build(), "1") + .addLayer("0", DenseLayer.builder().nIn(nIn).nOut(4).activation(Activation.RELU).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(4).nOut(4).activation(Activation.RELU).build(), "0") + .addLayer("out", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(nOut).build(), "1") .setOutputs("out") .setInputTypes(InputType.feedForward(nIn)) .build(); @@ -859,8 +859,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() - .addInputs("input").addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input") - .addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "first") + .addInputs("input").addLayer("first", DenseLayer.builder().nIn(4).nOut(5).build(), "input") + .addLayer("output", OutputLayer.builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "first") .setOutputs("output").build(); ComputationGraph net = new ComputationGraph(conf); @@ -896,10 +896,10 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.convolutional(10, 8, 3)) .addLayer("layer", - new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(1, 1) + ConvolutionLayer.builder().kernelSize(2, 2).padding(0, 0).stride(1, 1) .build(), "in") - .addLayer("out", new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build(), "layer").setOutputs("out") + .addLayer("out", OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build(), "layer").setOutputs("out") .build(); LayerVertex lv = (LayerVertex) conf.getVertices().get("layer"); @@ -913,10 +913,10 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.convolutionalFlat(10, 8, 3)) .addLayer("layer", - new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(1, 1) + ConvolutionLayer.builder().kernelSize(2, 2).padding(0, 0).stride(1, 1) .build(), "in") - .addLayer("out", new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build(), "layer").setOutputs("out") + .addLayer("out", OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build(), "layer").setOutputs("out") .build(); lv = (LayerVertex) conf.getVertices().get("layer"); @@ -934,13 +934,13 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //Finally, check configuration with a subsampling layer conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .setInputTypes(InputType.convolutionalFlat(10, 8, 3)) - .addLayer("l0", new SubsamplingLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0) + .addLayer("l0", SubsamplingLayer.builder().kernelSize(2, 2).stride(1, 1).padding(0, 0) .build(), "in") .addLayer("layer", - new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(1, 1) + ConvolutionLayer.builder().kernelSize(2, 2).padding(0, 0).stride(1, 1) .build(), "l0") - .addLayer("out", new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build(), "layer").setOutputs("out") + .addLayer("out", OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build(), "layer").setOutputs("out") .build(); //Check subsampling layer: @@ -1001,8 +1001,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().optimizationAlgo(oa).graphBuilder() .addInputs("input") - .addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input") - .addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), + .addLayer("first", DenseLayer.builder().nIn(4).nOut(5).build(), "input") + .addLayer("output", OutputLayer.builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "first") .setOutputs("output").build(); @@ -1019,9 +1019,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .addLayer("0", DenseLayer.builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), "0") @@ -1058,24 +1058,24 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .activation(Activation.IDENTITY); ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") - .addLayer("denseCentre0", new DenseLayer.Builder().nIn(10).nOut(9).build(), "inCentre") - .addLayer("denseCentre1", new DenseLayer.Builder().nIn(9).nOut(8).build(), "denseCentre0") - .addLayer("denseCentre2", new DenseLayer.Builder().nIn(8).nOut(7).build(), "denseCentre1") - .addLayer("denseCentre3", new DenseLayer.Builder().nIn(7).nOut(7).build(), "denseCentre2") + .addLayer("denseCentre0", DenseLayer.builder().nIn(10).nOut(9).build(), "inCentre") + .addLayer("denseCentre1", DenseLayer.builder().nIn(9).nOut(8).build(), "denseCentre0") + .addLayer("denseCentre2", DenseLayer.builder().nIn(8).nOut(7).build(), "denseCentre1") + .addLayer("denseCentre3", DenseLayer.builder().nIn(7).nOut(7).build(), "denseCentre2") .addLayer("outCentre", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(7).nOut(4).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(7).nOut(4).build(), "denseCentre3") .addVertex("subsetLeft", new SubsetVertex(0, 3), "denseCentre1") - .addLayer("denseLeft0", new DenseLayer.Builder().nIn(4).nOut(5).build(), "subsetLeft") + .addLayer("denseLeft0", DenseLayer.builder().nIn(4).nOut(5).build(), "subsetLeft") .addLayer("outLeft", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(5).nOut(6).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5).nOut(6).build(), "denseLeft0") - .addLayer("denseRight", new DenseLayer.Builder().nIn(7).nOut(7).build(), "denseCentre2") - .addLayer("denseRight0", new DenseLayer.Builder().nIn(2).nOut(3).build(), "inRight") + .addLayer("denseRight", DenseLayer.builder().nIn(7).nOut(7).build(), "denseCentre2") + .addLayer("denseRight0", DenseLayer.builder().nIn(2).nOut(3).build(), "inRight") .addVertex("mergeRight", new MergeVertex(), "denseRight", "denseRight0") - .addLayer("denseRight1", new DenseLayer.Builder().nIn(10).nOut(5).build(), "mergeRight") + .addLayer("denseRight1", DenseLayer.builder().nIn(10).nOut(5).build(), "mergeRight") .addLayer("outRight", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(5).nOut(5).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5).nOut(5).build(), "denseRight1") .setOutputs("outLeft", "outCentre", "outRight").build(); @@ -1096,10 +1096,10 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { public void testFeedForwardIncludeNonLayerVertices() { ComputationGraphConfiguration c = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(5).nOut(5).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(5).build(), "in") + .addLayer("0", DenseLayer.builder().nIn(5).nOut(5).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(5).nOut(5).build(), "in") .addVertex("merge", new MergeVertex(), "0", "1") - .addLayer("out", new OutputLayer.Builder().nIn(10).nOut(5).activation(Activation.SOFTMAX).build(), "merge").setOutputs("out") + .addLayer("out", OutputLayer.builder().nIn(10).nOut(5).activation(Activation.SOFTMAX).build(), "merge").setOutputs("out") .build(); ComputationGraph cg = new ComputationGraph(c); @@ -1124,7 +1124,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //Users generally shouldn't do this, but multiple setOutputs calls should *replace* not *add* outputs ComputationGraphConfiguration c = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("out", new OutputLayer.Builder().nIn(10).nOut(5).activation(Activation.SOFTMAX).build(), "in").setOutputs("out") + .addLayer("out", OutputLayer.builder().nIn(10).nOut(5).activation(Activation.SOFTMAX).build(), "in").setOutputs("out") .setOutputs("out").build(); List l = c.getNetworkOutputs(); @@ -1138,7 +1138,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { NeuralNetConfiguration.builder().weightNoise(new DropConnect(0.5)) .graphBuilder().setInputTypes(InputType.feedForward(1)).addInputs("input1") .addLayer("output", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(1).nOut(1) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(1).nOut(1) .activation(Activation.SIGMOID).build(), "input1") .setOutputs("output").backpropType(BackpropType.Standard) @@ -1153,17 +1153,17 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration c = NeuralNetConfiguration.builder().l1(0.5).l2(0.6).graphBuilder() .addInputs("in") - .addLayer("sub1", new SubsamplingLayer.Builder(2, 2).build(), "in") - .addLayer("sub2", new Subsampling1DLayer.Builder(2).build(), "sub1") - .addLayer("act", new ActivationLayer.Builder().activation(Activation.TANH) + .addLayer("sub1", SubsamplingLayer.builder(2, 2).build(), "in") + .addLayer("sub2", Subsampling1DLayer.builder(2).build(), "sub1") + .addLayer("act", ActivationLayer.builder().activation(Activation.TANH) .build(), "sub2") - .addLayer("pad", new ZeroPaddingLayer.Builder(2, 3).build(), "act") - .addLayer("lrn", new LocalResponseNormalization.Builder().build(), "pad") - .addLayer("pool", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), + .addLayer("pad", ZeroPaddingLayer.builder(2, 3).build(), "act") + .addLayer("lrn", LocalResponseNormalization.builder().build(), "pad") + .addLayer("pool", GlobalPoolingLayer.builder(PoolingType.AVG).build(), "act") - .addLayer("drop", new DropoutLayer.Builder(0.5).build(), "pool") - .addLayer("dense", new DenseLayer.Builder().nIn(1).nOut(1).build(), "drop") - .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("drop", DropoutLayer.builder(0.5).build(), "pool") + .addLayer("dense", DenseLayer.builder().nIn(1).nOut(1).build(), "drop") + .addLayer("loss", LossLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT.getILossFunction()) .build(), "dense") .allowDisconnected(true) .setOutputs("loss").build(); @@ -1179,7 +1179,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { public void testErrorNoOutputLayer() { ComputationGraphConfiguration c = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("dense", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("dense") + .addLayer("dense", DenseLayer.builder().nIn(10).nOut(10).build(), "in").setOutputs("dense") .build(); ComputationGraph cg = new ComputationGraph(c); @@ -1203,7 +1203,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { //vertex NeuralNetConfiguration nnc = NeuralNetConfiguration.builder().build(); - nnc.setLayer(new DenseLayer.Builder().build()); + nnc.setLayer(DenseLayer.builder().build()); GraphVertex[] singleInputVertices = new GraphVertex[]{new L2NormalizeVertex(), new LayerVertex(nnc, null), new PoolHelperVertex(), new PreprocessorVertex(), new ReshapeVertex(1, 1), new ScaleVertex(1.0), new ShiftVertex(1.0), new SubsetVertex(1, 1), new UnstackVertex(0, 2), @@ -1241,7 +1241,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("input") - .addLayer("L1", new ConvolutionLayer.Builder(new int[]{1, 1}, new int[]{1, 1}, new int[]{0, 0}).nIn(depth).nOut(depth) + .addLayer("L1", ConvolutionLayer.builder(new int[]{1, 1}, new int[]{1, 1}, new int[]{0, 0}).nIn(depth).nOut(depth) .build(), "input") .addVertex("L2", new ReshapeVertex(minibatch, 1, 36, 48), "L1") .setOutputs("L2") @@ -1265,7 +1265,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build(), "in") + .addLayer("out", OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build(), "in") .setOutputs("out") .build(); @@ -1305,23 +1305,23 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { NeuralNetConfiguration.builder().seed(12345).l2(0.001) //l2 regularization on all layers .updater(new AdaGrad(0.4)).graphBuilder() .addInputs("in") - .addLayer("layer0", new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB + .addLayer("layer0", ConvolutionLayer.builder(10, 10).nIn(3) //3 channels: RGB .nOut(30).stride(4, 4).activation(Activation.RELU).weightInit( WeightInit.RELU).build(),"in") //Output: (130-10+0)/4+1 = 31 -> 31*31*30 - .addLayer("layer1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .addLayer("layer1", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(3, 3).stride(2, 2).build(),"layer0") //(31-3+0)/2+1 = 15 - .addLayer("layer2", new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2) + .addLayer("layer2", ConvolutionLayer.builder(3, 3).nIn(30).nOut(10).stride(2, 2) .activation(Activation.RELU).weightInit(WeightInit.RELU) .updater(Updater.ADAGRAD).build(), "layer1") //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490 - .addLayer("layer3", new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50) + .addLayer("layer3", DenseLayer.builder().activation(Activation.RELU).nIn(490).nOut(50) .weightInit(WeightInit.RELU).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10).build(), "layer2") - .addLayer("layer4", new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50) + .addLayer("layer4", GravesLSTM.builder().activation(Activation.SOFTSIGN).nIn(50) .nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10) .build(), "layer3") - .addLayer("layer5", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("layer5", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line .weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) @@ -1351,10 +1351,10 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .convolutionMode(ConvolutionMode.Same) .graphBuilder() .addInputs("in") - .addLayer("0", new ConvolutionLayer.Builder().kernelSize(2,2).stride(1,1).nIn(1).nOut(1).build(), "in") - .addLayer("1", new SubsamplingLayer.Builder().kernelSize(2,2).stride(1,1).build(), "0") - .addLayer("2", new DenseLayer.Builder().nOut(10).build(), "1") - .addLayer("3", new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build(), "2") + .addLayer("0", ConvolutionLayer.builder().kernelSize(2,2).stride(1,1).nIn(1).nOut(1).build(), "in") + .addLayer("1", SubsamplingLayer.builder().kernelSize(2,2).stride(1,1).build(), "0") + .addLayer("2", DenseLayer.builder().nOut(10).build(), "1") + .addLayer("3", OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build(), "2") .setOutputs("3") .setInputTypes(InputType.convolutional(28,28,1)) .build(); @@ -1386,9 +1386,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration.GraphBuilder b = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().activation(Activation.SIGMOID).nOut(8).build(), "in") - .addLayer("1", new DenseLayer.Builder().activation(Activation.SIGMOID).nOut(8).build(), "in") //Disconnected - .addLayer("O", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nOut(10).build(), "0") + .addLayer("0", DenseLayer.builder().activation(Activation.SIGMOID).nOut(8).build(), "in") + .addLayer("1", DenseLayer.builder().activation(Activation.SIGMOID).nOut(8).build(), "in") //Disconnected + .addLayer("O", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nOut(10).build(), "0") .setOutputs("O") .setInputTypes(InputType.feedForward(8)); @@ -1418,10 +1418,10 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .graphBuilder() .addInputs("in") - .layer("0", new ConvolutionLayer.Builder().kernelSize(2,2).nOut(6).build(), "in") - .layer("1", new SubsamplingLayer.Builder().kernelSize(2,2).build(), "0") - .layer("2", new DenseLayer.Builder().nOut(30).build(), "1") - .layer("3", new OutputLayer.Builder().nOut(13).activation(Activation.SOFTMAX).build(), "2") + .layer("0", ConvolutionLayer.builder().kernelSize(2,2).nOut(6).build(), "in") + .layer("1", SubsamplingLayer.builder().kernelSize(2,2).build(), "0") + .layer("2", DenseLayer.builder().nOut(30).build(), "1") + .layer("3", OutputLayer.builder().nOut(13).activation(Activation.SOFTMAX).build(), "2") .setOutputs("3") .setInputTypes(InputType.convolutional(28,28,3)) .build(); @@ -1456,8 +1456,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("0", new SubsamplingLayer.Builder().kernelSize(2,2).stride(2,2).build(), "in") - .layer("1", new LossLayer.Builder().activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE).build(), "0") + .layer("0", SubsamplingLayer.builder().kernelSize(2,2).stride(2,2).build(), "in") + .layer("1", LossLayer.builder().activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE.getILossFunction()).build(), "0") .setOutputs("1") .setInputTypes(InputType.convolutionalFlat(28,28,1)) .build(); @@ -1501,7 +1501,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .setOutputs(outputName) .setInputTypes(InputType.inferInputType(input)) .addVertex(scaleName, new ScaleVertex(scaleFactor), inputName) - .addLayer(outputName, new OutputLayer.Builder() + .addLayer(outputName, OutputLayer.builder() .activation(new ActivationIdentity()) .lossFunction(LossFunctions.LossFunction.MSE) .nOut(input.length()) @@ -1539,7 +1539,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .seed(12345) .graphBuilder() .addInputs("in") - .layer("layer", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build(), "in") + .layer("layer", OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build(), "in") .setOutputs("layer") .build(); ComputationGraph cg = new ComputationGraph(conf); @@ -1561,11 +1561,11 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration.GraphBuilder builder = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in1", "in2") - .layer("0", new DenseLayer.Builder().nOut(10).build(), "in1") - .layer("1", new DenseLayer.Builder().nOut(9).build(), "in1", "in2") - .layer("2", new DenseLayer.Builder().nOut(8).build(), "in2") - .layer("3", new DenseLayer.Builder().nOut(7).build(), "0") - .layer("4", new DenseLayer.Builder().nOut(6).build(), "1", "2") + .layer("0", DenseLayer.builder().nOut(10).build(), "in1") + .layer("1", DenseLayer.builder().nOut(9).build(), "in1", "in2") + .layer("2", DenseLayer.builder().nOut(8).build(), "in2") + .layer("3", DenseLayer.builder().nOut(7).build(), "0") + .layer("4", DenseLayer.builder().nOut(6).build(), "1", "2") .setInputTypes(InputType.feedForward(5), InputType.feedForward(6)) .allowNoOutput(true); @@ -1598,14 +1598,14 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in1", "in2") - .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in1") - .addLayer("l1", new DenseLayer.Builder().nIn(20).nOut(10).build(), "in1", "in2") - .addLayer("l2", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in2") - .addLayer("l3", new DenseLayer.Builder().nIn(10).nOut(10).build(), "l0") - .addLayer("l4", new DenseLayer.Builder().nIn(10).nOut(10).build(), "l1") - .addLayer("l5", new DenseLayer.Builder().nIn(10).nOut(10).build(), "l2") - .addLayer("l6", new OutputLayer.Builder().nIn(20).nOut(10).activation(Activation.SOFTMAX).build(), "l3", "l5") - .addLayer("l7", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "l4") + .addLayer("l0", DenseLayer.builder().nIn(10).nOut(10).build(), "in1") + .addLayer("l1", DenseLayer.builder().nIn(20).nOut(10).build(), "in1", "in2") + .addLayer("l2", DenseLayer.builder().nIn(10).nOut(10).build(), "in2") + .addLayer("l3", DenseLayer.builder().nIn(10).nOut(10).build(), "l0") + .addLayer("l4", DenseLayer.builder().nIn(10).nOut(10).build(), "l1") + .addLayer("l5", DenseLayer.builder().nIn(10).nOut(10).build(), "l2") + .addLayer("l6", OutputLayer.builder().nIn(20).nOut(10).activation(Activation.SOFTMAX).build(), "l3", "l5") + .addLayer("l7", OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "l4") .setOutputs("l6", "l7") .build(); @@ -1698,9 +1698,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .graphBuilder() .addInputs("in") - .layer("0", new VariationalAutoencoder.Builder() + .layer("0", VariationalAutoencoder.builder() .nIn(10).nOut(10).encoderLayerSizes(10).decoderLayerSizes(10).build(), "in") - .layer("1", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "0") + .layer("1", OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "0") .setOutputs("1") .build(); @@ -1746,13 +1746,13 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .graphBuilder() .addInputs("in1", "in2") - .layer("0", new DenseLayer.Builder().nOut(10).build(), "in1") //Modification should not be allowed on input - .layer("1", new DenseLayer.Builder().nOut(10).build(), "in2") //Modification should not be allowed on input - .layer("2", new DenseLayer.Builder().nOut(10).build(), "0") //Modification SHOULD be allowed - .layer("3", new DenseLayer.Builder().nOut(10).build(), "1") //First in topo sort for using this input - not allowed - .layer("4", new DenseLayer.Builder().nOut(10).build(), "1") //Second in topo sort - not allowed - .layer("5", new DenseLayer.Builder().nOut(10).build(), "1") //Last in topo sort - allowed - .layer("6", new DenseLayer.Builder().nOut(10).build(), "2", "3", "4", "5") //Input from merge vertex - allowed + .layer("0", DenseLayer.builder().nOut(10).build(), "in1") //Modification should not be allowed on input + .layer("1", DenseLayer.builder().nOut(10).build(), "in2") //Modification should not be allowed on input + .layer("2", DenseLayer.builder().nOut(10).build(), "0") //Modification SHOULD be allowed + .layer("3", DenseLayer.builder().nOut(10).build(), "1") //First in topo sort for using this input - not allowed + .layer("4", DenseLayer.builder().nOut(10).build(), "1") //Second in topo sort - not allowed + .layer("5", DenseLayer.builder().nOut(10).build(), "1") //Last in topo sort - allowed + .layer("6", DenseLayer.builder().nOut(10).build(), "2", "3", "4", "5") //Input from merge vertex - allowed .setOutputs("6") .setInputTypes(InputType.feedForward(10), InputType.feedForward(10)) .build(); @@ -1787,19 +1787,19 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .addInputs("in1", "in2") .addVertex("merge", new MergeVertex(), "in1", "in2") .addLayer("lstm", - new Bidirectional(Bidirectional.Mode.CONCAT, new LSTM.Builder() + Bidirectional.builder(Bidirectional.Mode.CONCAT, LSTM.builder() .nIn(10).nOut(5) .activation(Activation.TANH) .dropOut(new GaussianNoise(0.05)) - .build()) + .build()).build() ,"merge") .addLayer("out1", - new RnnOutputLayer.Builder().activation(Activation.SOFTMAX) + RnnOutputLayer.builder().activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(10) .nOut(6).build(), "lstm") .addLayer("out2", - new RnnOutputLayer.Builder().activation(Activation.SOFTMAX) + RnnOutputLayer.builder().activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(10) .nOut(4).build(), "lstm") @@ -1825,18 +1825,18 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .addInputs("in1", "in2") .addVertex("merge", new MergeVertex(), "in1", "in2") .addLayer("dense", - new DenseLayer.Builder() + DenseLayer.builder() .nIn(10).nOut(5) .activation(Activation.TANH) .dropOut(new GaussianNoise(0.05)) .build(),"merge") .addLayer("out1", - new OutputLayer.Builder().activation(Activation.SOFTMAX) + OutputLayer.builder().activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5) .nOut(6).build(), "dense") .addLayer("out2", - new OutputLayer.Builder().activation(Activation.SOFTMAX) + OutputLayer.builder().activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5) .nOut(4).build(), "dense") @@ -1867,8 +1867,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("layer_zero", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") - .layer("layer_one", new OutputLayer.Builder().nIn(10).nOut(10).build(), "layer_zero") + .layer("layer_zero", DenseLayer.builder().nIn(10).nOut(10).build(), "in") + .layer("layer_one", OutputLayer.builder().nIn(10).nOut(10).build(), "layer_zero") .setOutputs("layer_one") .build(); @@ -1894,10 +1894,10 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .seed(12345) .graphBuilder() .addInputs("in") - .layer("0", new DenseLayer.Builder().nIn(10).nOut(9).build(), "in") - .layer("1", new DenseLayer.Builder().nIn(9).nOut(8).build(), "0") - .layer("2", new DenseLayer.Builder().nIn(8).nOut(7).build(), "1") - .layer("3", new OutputLayer.Builder().nIn(7).nOut(6).build(), "2") + .layer("0", DenseLayer.builder().nIn(10).nOut(9).build(), "in") + .layer("1", DenseLayer.builder().nIn(9).nOut(8).build(), "0") + .layer("2", DenseLayer.builder().nIn(8).nOut(7).build(), "1") + .layer("3", OutputLayer.builder().nIn(7).nOut(6).build(), "2") .setOutputs("3") .build(); @@ -1923,9 +1923,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .setInputTypes(inputType) .addInputs("input") .setOutputs("output") - .addLayer("0", new ConvolutionLayer.Builder().nOut(5).convolutionMode(ConvolutionMode.Same).build(),"input" ) + .addLayer("0", ConvolutionLayer.builder().nOut(5).convolutionMode(ConvolutionMode.Same).build(),"input" ) .addVertex("dummyAdd", new ElementWiseVertex(ElementWiseVertex.Op.Add), "0") - .addLayer("output", new CnnLossLayer(), "dummyAdd") + .addLayer("output", CnnLossLayer.builder(), "dummyAdd") .build()); graph.init(); graph.outputSingle(Nd4j.randn(1, 2, 10, 10)); @@ -1943,7 +1943,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .addInputs("input") .addLayer( "dense", - new DenseLayer.Builder() + DenseLayer.builder() .nIn(10) .nOut(10) .activation(Activation.RELU) @@ -1952,7 +1952,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .build(), "input") .addLayer("output", - new OutputLayer.Builder() + OutputLayer.builder() .nIn(10) .nOut(1) .lossFunction(LossFunctions.LossFunction.XENT) @@ -1968,8 +1968,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraph cg2 = model.clone(); - IDropout d1 = model.getLayer(0).getLayerConfiguration().getIDropout(); - IDropout d2 = cg2.getLayer(0).getLayerConfiguration().getIDropout(); + IDropout d1 = model.getLayer(0).getLayerConfiguration().getDropOut(); + IDropout d2 = cg2.getLayer(0).getLayerConfiguration().getDropOut(); assertNotSame(d1, d2); //Should not be same object! assertEquals(d1, d2); //But should be equal @@ -1986,15 +1986,15 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .updater(new Adam()) .graphBuilder() .addInputs("x_emb") - .addLayer("agg_lstm", new Bidirectional(CONCAT, new LSTM.Builder().nOut(hiddenSize/2).build()), "x_emb") - .addLayer("agg_att", new DenseLayer.Builder().nIn(100).nOut(1).activation(Activation.SOFTMAX).build(), "agg_lstm") + .addLayer("agg_lstm", Bidirectional.builder(CONCAT, LSTM.builder().nOut(hiddenSize/2).build()).build(), "x_emb") + .addLayer("agg_att", DenseLayer.builder().nIn(100).nOut(1).activation(Activation.SOFTMAX).build(), "agg_lstm") .addVertex("att", new PreprocessorVertex(new ComposableInputPreProcessor(new FeedForwardToRnnPreProcessor(), new PermutePreprocessor(0,2,1), new RnnToFeedForwardPreProcessor())), "agg_att") - .addLayer("att_repeat", new RepeatVector.Builder(hiddenSize).build(),"att") + .addLayer("att_repeat", RepeatVector.builder().repetitionFactor(hiddenSize).build(),"att") .addVertex("att_trans", new PreprocessorVertex(new PermutePreprocessor(0, 2, 1)), "att_repeat") .addVertex("mult", new ElementWiseVertex(ElementWiseVertex.Op.Product), "agg_lstm", "att_trans") - .addLayer("sum", new GlobalPoolingLayer.Builder().build(), "mult") - .addLayer("agg_out", new DenseLayer.Builder().nIn(100).nOut(6).activation(Activation.TANH).build(), "sum") - .addLayer("output", new OutputLayer.Builder().nIn(6).nOut(6).lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build(), "agg_out") + .addLayer("sum", GlobalPoolingLayer.builder().build(), "mult") + .addLayer("agg_out", DenseLayer.builder().nIn(100).nOut(6).activation(Activation.TANH).build(), "sum") + .addLayer("output", OutputLayer.builder().nIn(6).nOut(6).lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build(), "agg_out") .setOutputs("output") .setInputTypes(InputType.recurrent(inputSize,seqLen,RNNFormat.NCW)) .build(); @@ -2029,9 +2029,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .backpropType(BackpropType.Standard) .addInputs("in") .setOutputs("out") - .addLayer("0",new DenseLayer.Builder().nIn(5).nOut(3).build(),"in") - .addLayer("1",new DenseLayer.Builder().nIn(3).nOut(2).build(),"0") - .addLayer("out",new OutputLayer.Builder(LossFunctions.LossFunction.XENT).nIn(2).nOut(1) + .addLayer("0",DenseLayer.builder().nIn(5).nOut(3).build(),"in") + .addLayer("1",DenseLayer.builder().nIn(3).nOut(2).build(),"0") + .addLayer("out",OutputLayer.builder(LossFunctions.LossFunction.XENT).nIn(2).nOut(1) .activation(Activation.SIGMOID).build(),"1") .build(); @@ -2129,9 +2129,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .graphBuilder() .addInputs("in") .setOutputs("out") - .addLayer("0",new DenseLayer.Builder().nIn(inputSize).nOut(layerSize).build(),"in") + .addLayer("0",DenseLayer.builder().nIn(inputSize).nOut(layerSize).build(),"in") .addVertex("combine", new MergeVertex(), "0", "0", "0") - .addLayer("out",new OutputLayer.Builder(LossFunctions.LossFunction.XENT).nIn(3*layerSize).nOut(outputSize) + .addLayer("out",OutputLayer.builder(LossFunctions.LossFunction.XENT).nIn(3*layerSize).nOut(outputSize) .activation(Activation.SIGMOID).build(),"combine") .build(); @@ -2155,8 +2155,8 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() - .addLayer("l0", new Convolution3D.Builder().kernelSize(2,2,2).stride(1,1,1).nIn(3).nOut(3).dataFormat(Convolution3D.DataFormat.NCDHW).build(), "in") - .addLayer("l1", new Convolution3D.Builder().kernelSize(2,2,2).stride(1,1,1).nIn(3).nOut(3).dataFormat(Convolution3D.DataFormat.NCDHW).build(), "in") + .addLayer("l0", Convolution3D.builder().kernelSize(2,2,2).stride(1,1,1).nIn(3).nOut(3).dataFormat(Convolution3D.DataFormat.NCDHW).build(), "in") + .addLayer("l1", Convolution3D.builder().kernelSize(2,2,2).stride(1,1,1).nIn(3).nOut(3).dataFormat(Convolution3D.DataFormat.NCDHW).build(), "in") .addVertex("out", new MergeVertex(), "l0", "l1") .setInputTypes(InputType.convolutional3D(Convolution3D.DataFormat.NCDHW, 16, 16, 16, 3)) .addInputs("in") @@ -2175,9 +2175,9 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .addLayer("e1", new EmbeddingLayer.Builder().nIn(10).nOut(5).build(), "in") - .addLayer("e2", new EmbeddingLayer.Builder().nIn(10).nOut(5).build(), "in") - .addLayer("out", new OutputLayer.Builder().nIn(10).nOut(2).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "e1", "e2") + .addLayer("e1", EmbeddingLayer.builder().nIn(10).nOut(5).build(), "in") + .addLayer("e2", EmbeddingLayer.builder().nIn(10).nOut(5).build(), "in") + .addLayer("out", OutputLayer.builder().nIn(10).nOut(2).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "e1", "e2") .setOutputs("out") .build(); @@ -2195,18 +2195,18 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .convolutionMode(ConvolutionMode.Same) .graphBuilder() .addInputs("in") - .layer("l0", new ConvolutionLayer.Builder() + .layer("l0", ConvolutionLayer.builder() .nOut(16) - .dataFormat(CNN2DFormat.NHWC) + .convFormat(CNN2DFormat.NHWC) .kernelSize(2,2).stride(1,1) .build(), "in") - .layer("l1", new ConvolutionLayer.Builder() + .layer("l1", ConvolutionLayer.builder() .nOut(8) - .dataFormat(CNN2DFormat.NHWC) + .convFormat(CNN2DFormat.NHWC) .kernelSize(2,2).stride(1,1) .build(), "in") .addVertex("merge", new MergeVertex(), "l0", "l1") - .layer("out", new CnnLossLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "merge") + .layer("out", CnnLossLayer.builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE.getILossFunction()).build(), "merge") .setOutputs("out") .setInputTypes(InputType.convolutional(32, 32, 3, CNN2DFormat.NHWC)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java index 685920d10..e8e20065e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestSetGetParameters.java @@ -43,14 +43,14 @@ public class TestSetGetParameters extends BaseDL4JTest { //Create configuration. Doesn't matter if this doesn't actually work for forward/backward pass here ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).graphBuilder() - .addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") - .addLayer("1", new GravesLSTM.Builder().nIn(10).nOut(10).build(), "in") - .addLayer("2", new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).build(), "in") - .addLayer("3", new ConvolutionLayer.Builder().nIn(10).nOut(10).kernelSize(2, 2).stride(2, 2) + .addInputs("in").addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in") + .addLayer("1", GravesLSTM.builder().nIn(10).nOut(10).build(), "in") + .addLayer("2", GravesBidirectionalLSTM.builder().nIn(10).nOut(10).build(), "in") + .addLayer("3", ConvolutionLayer.builder().nIn(10).nOut(10).kernelSize(2, 2).stride(2, 2) .padding(2, 2).build(), "in") - .addLayer("4", new OutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "3") - .addLayer("5", new OutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "0") - .addLayer("6", new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "1", + .addLayer("4", OutputLayer.builder().lossFunction(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "3") + .addLayer("5", OutputLayer.builder().lossFunction(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "0") + .addLayer("6", RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "1", "2") .setOutputs("4", "5", "6").build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java index 7023e0039..039ede713 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java @@ -71,9 +71,9 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).graphBuilder().addInputs("in") - .addLayer("0", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), + .addLayer("0", GravesLSTM.builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "in") - .addLayer("1", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE) + .addLayer("1", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .nIn(2).nOut(1).activation(Activation.TANH).build(), "0") .setInputTypes(InputType.recurrent(2,5,RNNFormat.NCW)) .setOutputs("1").build(); @@ -162,13 +162,13 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(new NormalDistribution(0,2)) .updater(new Sgd(0.1)).seed(12345).graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), + .addLayer("0", DenseLayer.builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "in") - .addLayer("1", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), + .addLayer("1", DenseLayer.builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "0") - .addLayer("2", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), + .addLayer("2", GravesLSTM.builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "1") - .addLayer("3", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE) + .addLayer("3", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .nIn(2).nOut(1).activation(Activation.TANH).build(), "2") .setOutputs("3").inputPreProcessor("0", new RnnToFeedForwardPreProcessor()) .inputPreProcessor("2", new FeedForwardToRnnPreProcessor()) @@ -303,13 +303,13 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { NeuralNetConfiguration.builder().seed(12345L) .graphBuilder() .addInputs("in").addLayer("0", - new GravesLSTM.Builder().nIn(nIn).nOut(5) + GravesLSTM.builder().nIn(nIn).nOut(5) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).build(), "in") - .addLayer("1", new RnnOutputLayer.Builder( + .addLayer("1", RnnOutputLayer.builder( LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(5).nOut(nOut) @@ -373,13 +373,13 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { NeuralNetConfiguration.builder().seed(12345L) .graphBuilder() .addInputs("in").addLayer("0", - new GravesLSTM.Builder().nIn(nIn).nOut(5) + GravesLSTM.builder().nIn(nIn).nOut(5) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).build(), "in") - .addLayer("1", new RnnOutputLayer.Builder( + .addLayer("1", RnnOutputLayer.builder( LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(5).nOut(nOut) @@ -394,13 +394,13 @@ public class TestVariableLengthTSCG extends BaseDL4JTest { NeuralNetConfiguration.builder().seed(12345L) .graphBuilder() .addInputs("in").addLayer("0", - new GravesLSTM.Builder().nIn(nIn).nOut(5) + GravesLSTM.builder().nIn(nIn).nOut(5) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).build(), "in") - .addLayer("1", new RnnOutputLayer.Builder( + .addLayer("1", RnnOutputLayer.builder( LossFunctions.LossFunction.XENT) .activation(Activation.SIGMOID) .nIn(5).nOut(nOut) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java index 3ca1aa8bd..90e2de64f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java @@ -190,7 +190,7 @@ public class TestGraphNodes extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") .addVertex("lastTS", new LastTimeStepVertex("in"), "in") - .addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "lastTS").setOutputs("out") + .addLayer("out", OutputLayer.builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "lastTS").setOutputs("out") .build(); ComputationGraph graph = new ComputationGraph(conf); @@ -242,8 +242,8 @@ public class TestGraphNodes extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder() .addInputs("in2d", "in3d") .addVertex("duplicateTS", new DuplicateToTimeSeriesVertex("in3d"), "in2d") - .addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "duplicateTS") - .addLayer("out3d", new RnnOutputLayer.Builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "in3d") + .addLayer("out", OutputLayer.builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "duplicateTS") + .addLayer("out3d", RnnOutputLayer.builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "in3d") .setOutputs("out", "out3d").build(); ComputationGraph graph = new ComputationGraph(conf); @@ -315,12 +315,12 @@ public class TestGraphNodes extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in1", "in2") .addVertex("stack", new org.deeplearning4j.nn.conf.graph.StackVertex(), "in1", "in2") - .addLayer("1", new EmbeddingLayer.Builder().nIn(5).nOut(5).build(), "stack") + .addLayer("1", EmbeddingLayer.builder().nIn(5).nOut(5).build(), "stack") .addVertex("unstack1", new org.deeplearning4j.nn.conf.graph.UnstackVertex(0, 2), "1") .addVertex("unstack2", new org.deeplearning4j.nn.conf.graph.UnstackVertex(0, 2), "1") - .addLayer("out1", new OutputLayer.Builder().activation(Activation.TANH) + .addLayer("out1", OutputLayer.builder().activation(Activation.TANH) .lossFunction(LossFunctions.LossFunction.L2).nIn(5).nOut(5).build(), "unstack1") - .addLayer("out2", new OutputLayer.Builder().activation(Activation.TANH) + .addLayer("out2", OutputLayer.builder().activation(Activation.TANH) .lossFunction(LossFunctions.LossFunction.L2).nIn(5).nOut(5).build(), "unstack2") .setOutputs("out1", "out2").build(); @@ -550,7 +550,7 @@ public class TestGraphNodes extends BaseDL4JTest { .addVertex("v6", new LastTimeStepVertex("in"), "in") .addVertex("v7", new org.deeplearning4j.nn.conf.graph.StackVertex(), "in") .addVertex("v8", new org.deeplearning4j.nn.conf.graph.UnstackVertex(0, 1), "in") - .addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "in") + .addLayer("out", OutputLayer.builder().nIn(1).nOut(1).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "in") .setOutputs("out", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8").build(); String json = conf.toJson(); @@ -573,8 +573,8 @@ public class TestGraphNodes extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .graphBuilder() .addInputs("rr") - .addLayer("1", new LSTM.Builder().activation(Activation.TANH).nIn(numInputs).nOut(lstmLayerSize).dropOut(0.9).build(), "rr") - .addLayer("2", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("1", LSTM.builder().activation(Activation.TANH).nIn(numInputs).nOut(lstmLayerSize).dropOut(0.9).build(), "rr") + .addLayer("2", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(numLabelClasses).build(), "1") .setOutputs("2") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java index 629fd7069..ae4d3520b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.layers; +import static org.junit.jupiter.api.Assertions.*; + +import java.util.List; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; @@ -46,285 +49,396 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.*; - -/** - */ - +/** */ public class ActivationLayerTest extends BaseDL4JTest { - @Override - public DataType getDataType(){ - return DataType.FLOAT; - } + @Override + public DataType getDataType() { + return DataType.FLOAT; + } - @Test - public void testInputTypes() { - org.deeplearning4j.nn.conf.layers.ActivationLayer l = - new org.deeplearning4j.nn.conf.layers.ActivationLayer.Builder().activation(Activation.RELU) - .build(); + @Test + public void testInputTypes() { + org.deeplearning4j.nn.conf.layers.ActivationLayer l = + org.deeplearning4j.nn.conf.layers.ActivationLayer.builder() + .activation(Activation.RELU) + .build(); + InputType in1 = InputType.feedForward(20); + InputType in2 = InputType.convolutional(28, 28, 1); - InputType in1 = InputType.feedForward(20); - InputType in2 = InputType.convolutional(28, 28, 1); + assertEquals(in1, l.getOutputType(0, in1)); + assertEquals(in2, l.getOutputType(0, in2)); + assertNull(l.getPreProcessorForInputType(in1)); + assertNull(l.getPreProcessorForInputType(in2)); + } - assertEquals(in1, l.getOutputType(0, in1)); - assertEquals(in2, l.getOutputType(0, in2)); - assertNull(l.getPreProcessorForInputType(in1)); - assertNull(l.getPreProcessorForInputType(in2)); - } + @Test + public void testDenseActivationLayer() throws Exception { + DataSetIterator iter = new MnistDataSetIterator(2, 2); + DataSet next = iter.next(); - @Test - public void testDenseActivationLayer() throws Exception { - DataSetIterator iter = new MnistDataSetIterator(2, 2); - DataSet next = iter.next(); - - // Run without separate activation layer - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() + // Run without separate activation layer + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.LBFGS) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .seed(123) - .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) + .layer( + 0, + DenseLayer.builder() + .nIn(28 * 28) + .nOut(10) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .nIn(10) + .nOut(10) + .build()) .build(); - MultiLayerNetwork network = new MultiLayerNetwork(conf); - network.init(); - network.fit(next); + MultiLayerNetwork network = new MultiLayerNetwork(conf); + network.init(); + network.fit(next); + // Run with separate activation layer + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .seed(123) + .list() + .layer( + 0, + DenseLayer.builder() + .nIn(28 * 28) + .nOut(10) + .activation(Activation.IDENTITY) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + org.deeplearning4j.nn.conf.layers.ActivationLayer.builder() + .activation(Activation.RELU) + .build()) + .layer( + 2, + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .nIn(10) + .nOut(10) + .build()) + .build(); - // Run with separate activation layer - NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) - .list() - .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10).activation(Activation.IDENTITY) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.ActivationLayer.Builder() - .activation(Activation.RELU).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(10).nOut(10) - .build()) - .build(); + MultiLayerNetwork network2 = new MultiLayerNetwork(conf2); + network2.init(); + network2.fit(next); - MultiLayerNetwork network2 = new MultiLayerNetwork(conf2); - network2.init(); - network2.fit(next); + // check parameters + assertEquals(network.getLayer(0).getParam("W"), network2.getLayer(0).getParam("W")); + assertEquals(network.getLayer(1).getParam("W"), network2.getLayer(2).getParam("W")); + assertEquals(network.getLayer(0).getParam("b"), network2.getLayer(0).getParam("b")); + assertEquals(network.getLayer(1).getParam("b"), network2.getLayer(2).getParam("b")); - // check parameters - assertEquals(network.getLayer(0).getParam("W"), network2.getLayer(0).getParam("W")); - assertEquals(network.getLayer(1).getParam("W"), network2.getLayer(2).getParam("W")); - assertEquals(network.getLayer(0).getParam("b"), network2.getLayer(0).getParam("b")); - assertEquals(network.getLayer(1).getParam("b"), network2.getLayer(2).getParam("b")); + // check activations + network.init(); + network.setInput(next.getFeatures()); + List activations = network.feedForward(true); - // check activations - network.init(); - network.setInput(next.getFeatures()); - List activations = network.feedForward(true); + network2.init(); + network2.setInput(next.getFeatures()); + List activations2 = network2.feedForward(true); - network2.init(); - network2.setInput(next.getFeatures()); - List activations2 = network2.feedForward(true); + assertEquals(activations.get(1).reshape(activations2.get(2).shape()), activations2.get(2)); + assertEquals(activations.get(2), activations2.get(3)); + } - assertEquals(activations.get(1).reshape(activations2.get(2).shape()), activations2.get(2)); - assertEquals(activations.get(2), activations2.get(3)); + @Test + public void testAutoEncoderActivationLayer() throws Exception { + int minibatch = 3; + int nIn = 5; + int layerSize = 5; + int nOut = 3; + INDArray next = Nd4j.rand(minibatch, nIn); + INDArray labels = Nd4j.zeros(minibatch, nOut); + for (int i = 0; i < minibatch; i++) { + labels.putScalar(i, i % nOut, 1.0); } - @Test - public void testAutoEncoderActivationLayer() throws Exception { + // Run without separate activation layer + Nd4j.getRandom().setSeed(12345); + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .seed(123) + .list() + .layer( + 0, + AutoEncoder.builder() + .nIn(nIn) + .nOut(layerSize) + .corruptionLevel(0.0) + .activation(Activation.SIGMOID) + .build()) + .layer( + 1, + OutputLayer.builder().lossFunction( + LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY) + .activation(Activation.SOFTMAX) + .nIn(layerSize) + .nOut(nOut) + .build()) + .build(); - int minibatch = 3; - int nIn = 5; - int layerSize = 5; - int nOut = 3; + MultiLayerNetwork network = new MultiLayerNetwork(conf); + network.init(); + network.fit( + next, + labels); // Labels are necessary for this test: layer activation function affect pretraining + // results, otherwise - INDArray next = Nd4j.rand(minibatch, nIn); - INDArray labels = Nd4j.zeros(minibatch, nOut); - for (int i = 0; i < minibatch; i++) { - labels.putScalar(i, i % nOut, 1.0); - } + // Run with separate activation layer + Nd4j.getRandom().setSeed(12345); + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .seed(123) + .list() + .layer( + 0, + AutoEncoder.builder() + .nIn(nIn) + .nOut(layerSize) + .corruptionLevel(0.0) + .activation(Activation.IDENTITY) + .build()) + .layer( + 1, + ActivationLayer.builder() + .activation(Activation.SIGMOID) + .build()) + .layer( + 2, + OutputLayer.builder().lossFunction( + LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY) + .activation(Activation.SOFTMAX) + .nIn(layerSize) + .nOut(nOut) + .build()) + .build(); - // Run without separate activation layer - Nd4j.getRandom().setSeed(12345); - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) - .list() - .layer(0, new AutoEncoder.Builder().nIn(nIn).nOut(layerSize).corruptionLevel(0.0) - .activation(Activation.SIGMOID).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY) - .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) - .build()) - .build(); + MultiLayerNetwork network2 = new MultiLayerNetwork(conf2); + network2.init(); + network2.fit(next, labels); - MultiLayerNetwork network = new MultiLayerNetwork(conf); - network.init(); - network.fit(next, labels); //Labels are necessary for this test: layer activation function affect pretraining results, otherwise + // check parameters + assertEquals(network.getLayer(0).getParam("W"), network2.getLayer(0).getParam("W")); + assertEquals(network.getLayer(1).getParam("W"), network2.getLayer(2).getParam("W")); + assertEquals(network.getLayer(0).getParam("b"), network2.getLayer(0).getParam("b")); + assertEquals(network.getLayer(1).getParam("b"), network2.getLayer(2).getParam("b")); + // check activations + network.init(); + network.setInput(next); + List activations = network.feedForward(true); - // Run with separate activation layer - Nd4j.getRandom().setSeed(12345); - NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) - .list() - .layer(0, new AutoEncoder.Builder().nIn(nIn).nOut(layerSize).corruptionLevel(0.0) - .activation(Activation.IDENTITY).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.ActivationLayer.Builder() - .activation(Activation.SIGMOID).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY) - .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) - .build()) - .build(); + network2.init(); + network2.setInput(next); + List activations2 = network2.feedForward(true); - MultiLayerNetwork network2 = new MultiLayerNetwork(conf2); - network2.init(); - network2.fit(next, labels); + assertEquals(activations.get(1).reshape(activations2.get(2).shape()), activations2.get(2)); + assertEquals(activations.get(2), activations2.get(3)); + } - // check parameters - assertEquals(network.getLayer(0).getParam("W"), network2.getLayer(0).getParam("W")); - assertEquals(network.getLayer(1).getParam("W"), network2.getLayer(2).getParam("W")); - assertEquals(network.getLayer(0).getParam("b"), network2.getLayer(0).getParam("b")); - assertEquals(network.getLayer(1).getParam("b"), network2.getLayer(2).getParam("b")); + @Test + public void testCNNActivationLayer() throws Exception { + DataSetIterator iter = new MnistDataSetIterator(2, 2); + DataSet next = iter.next(); - // check activations - network.init(); - network.setInput(next); - List activations = network.feedForward(true); + // Run without separate activation layer + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .seed(123) + .list() + .layer( + 0, + ConvolutionLayer.builder(4, 4) + .stride(2, 2) + .nIn(1) + .nOut(20) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + OutputLayer.builder().lossFunction( + LossFunctions.LossFunction.MCXENT) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .nOut(10) + .build()) + .inputType(InputType.convolutionalFlat(28, 28, 1)) + .build(); - network2.init(); - network2.setInput(next); - List activations2 = network2.feedForward(true); + MultiLayerNetwork network = new MultiLayerNetwork(conf); + network.init(); + network.fit(next); - assertEquals(activations.get(1).reshape(activations2.get(2).shape()), activations2.get(2)); - assertEquals(activations.get(2), activations2.get(3)); + // Run with separate activation layer + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .seed(123) + .list() + .layer( + 0, + ConvolutionLayer.builder(4, 4) + .stride(2, 2) + .nIn(1) + .nOut(20) + .activation(Activation.IDENTITY) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + org.deeplearning4j.nn.conf.layers.ActivationLayer.builder() + .activation(Activation.RELU) + .build()) + .layer( + 2, + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .nOut(10) + .build()) + .inputType(InputType.convolutionalFlat(28, 28, 1)) + .build(); + MultiLayerNetwork network2 = new MultiLayerNetwork(conf2); + network2.init(); + network2.fit(next); - } + // check parameters + assertEquals(network.getLayer(0).getParam("W"), network2.getLayer(0).getParam("W")); + assertEquals(network.getLayer(1).getParam("W"), network2.getLayer(2).getParam("W")); + assertEquals(network.getLayer(0).getParam("b"), network2.getLayer(0).getParam("b")); - @Test - public void testCNNActivationLayer() throws Exception { - DataSetIterator iter = new MnistDataSetIterator(2, 2); - DataSet next = iter.next(); + // check activations + network.init(); + network.setInput(next.getFeatures()); + List activations = network.feedForward(true); - // Run without separate activation layer - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) - .list() - .layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) - .activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( - LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).nOut(10).build()) - .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); + network2.init(); + network2.setInput(next.getFeatures()); + List activations2 = network2.feedForward(true); - MultiLayerNetwork network = new MultiLayerNetwork(conf); - network.init(); - network.fit(next); + assertEquals(activations.get(1).reshape(activations2.get(2).shape()), activations2.get(2)); + assertEquals(activations.get(2), activations2.get(3)); + } + @Test + public void testActivationInheritance() { - // Run with separate activation layer - NeuralNetConfiguration conf2 = - NeuralNetConfiguration.builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .seed(123).list() - .layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) - .activation(Activation.IDENTITY).weightInit(WeightInit.XAVIER) - .build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.ActivationLayer.Builder() - .activation(Activation.RELU).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) - .nOut(10).build()) + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .seed(123) + .weightInit(WeightInit.XAVIER) + .activation(Activation.RATIONALTANH) - .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(ActivationLayer.builder().build()) + .layer(ActivationLayer.builder().build()) + .layer(ActivationLayer.builder().activation(Activation.ELU).build()) + .layer( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( + LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nIn(10) + .nOut(10) + .build()) + .build(); - MultiLayerNetwork network2 = new MultiLayerNetwork(conf2); - network2.init(); - network2.fit(next); + MultiLayerNetwork network = new MultiLayerNetwork(conf); + network.init(); - // check parameters - assertEquals(network.getLayer(0).getParam("W"), network2.getLayer(0).getParam("W")); - assertEquals(network.getLayer(1).getParam("W"), network2.getLayer(2).getParam("W")); - assertEquals(network.getLayer(0).getParam("b"), network2.getLayer(0).getParam("b")); + assertNotNull( + ((ActivationLayer) network.getLayer(1).getLayerConfiguration()).getActivationFn()); - // check activations - network.init(); - network.setInput(next.getFeatures()); - List activations = network.feedForward(true); + assertTrue( + ((DenseLayer) network.getLayer(0).getLayerConfiguration()).getActivationFn() + instanceof ActivationRationalTanh); + assertTrue( + ((ActivationLayer) network.getLayer(1).getLayerConfiguration()).getActivationFn() + instanceof ActivationRationalTanh); + assertTrue( + ((ActivationLayer) network.getLayer(2).getLayerConfiguration()).getActivationFn() + instanceof ActivationRationalTanh); + assertTrue( + ((ActivationLayer) network.getLayer(3).getLayerConfiguration()).getActivationFn() + instanceof ActivationELU); + assertTrue( + ((OutputLayer) network.getLayer(4).getLayerConfiguration()).getActivationFn() + instanceof ActivationSoftmax); + } - network2.init(); - network2.setInput(next.getFeatures()); - List activations2 = network2.feedForward(true); + @Test + public void testActivationInheritanceCG() { - assertEquals(activations.get(1).reshape(activations2.get(2).shape()), activations2.get(2)); - assertEquals(activations.get(2), activations2.get(3)); - } + ComputationGraphConfiguration conf = + NeuralNetConfiguration.builder() + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .seed(123) + .weightInit(WeightInit.XAVIER) + .activation(Activation.RATIONALTANH) + .graphBuilder() + .addInputs("in") + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in") + .addLayer("1", ActivationLayer.builder().build(), "0") + .addLayer("2", ActivationLayer.builder().build(), "1") + .addLayer("3", ActivationLayer.builder().activation(Activation.ELU).build(), "2") + .addLayer( + "4", + OutputLayer.builder().lossFunction( + LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nIn(10) + .nOut(10) + .build(), + "3") + .setOutputs("4") + .build(); + ComputationGraph network = new ComputationGraph(conf); + network.init(); - @Test - public void testActivationInheritance() { - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) - .weightInit(WeightInit.XAVIER) - .activation(Activation.RATIONALTANH) - .list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new ActivationLayer()) - .layer(new ActivationLayer.Builder().build()) - .layer(new ActivationLayer.Builder().activation(Activation.ELU).build()) - .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) - .build(); - - MultiLayerNetwork network = new MultiLayerNetwork(conf); - network.init(); - - assertNotNull(((ActivationLayer)network.getLayer(1).getLayerConfiguration()).getActivationFn()); - - assertTrue(((DenseLayer)network.getLayer(0).getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer(1).getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer(2).getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer(3).getLayerConfiguration()).getActivationFn() instanceof ActivationELU); - assertTrue(((OutputLayer)network.getLayer(4).getLayerConfiguration()).getActivationFn() instanceof ActivationSoftmax); - } - - @Test - public void testActivationInheritanceCG() { - - ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) - .weightInit(WeightInit.XAVIER) - .activation(Activation.RATIONALTANH) - .graphBuilder() - .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") - .addLayer("1", new ActivationLayer(), "0") - .addLayer("2", new ActivationLayer.Builder().build(), "1") - .addLayer("3", new ActivationLayer.Builder().activation(Activation.ELU).build(), "2") - .addLayer("4", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) - .activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "3") - .setOutputs("4") - .build(); - - ComputationGraph network = new ComputationGraph(conf); - network.init(); - - assertNotNull(((ActivationLayer)network.getLayer("1").getLayerConfiguration()).getActivationFn()); - - assertTrue(((DenseLayer)network.getLayer("0").getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer("1").getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer("2").getLayerConfiguration()).getActivationFn() instanceof ActivationRationalTanh); - assertTrue(((ActivationLayer)network.getLayer("3").getLayerConfiguration()).getActivationFn() instanceof ActivationELU); - assertTrue(((OutputLayer)network.getLayer("4").getLayerConfiguration()).getActivationFn() instanceof ActivationSoftmax); - } + assertNotNull( + ((ActivationLayer) network.getLayer("1").getLayerConfiguration()).getActivationFn()); + assertTrue( + ((DenseLayer) network.getLayer("0").getLayerConfiguration()).getActivationFn() + instanceof ActivationRationalTanh); + assertTrue( + ((ActivationLayer) network.getLayer("1").getLayerConfiguration()).getActivationFn() + instanceof ActivationRationalTanh); + assertTrue( + ((ActivationLayer) network.getLayer("2").getLayerConfiguration()).getActivationFn() + instanceof ActivationRationalTanh); + assertTrue( + ((ActivationLayer) network.getLayer("3").getLayerConfiguration()).getActivationFn() + instanceof ActivationELU); + assertTrue( + ((OutputLayer) network.getLayer("4").getLayerConfiguration()).getActivationFn() + instanceof ActivationSoftmax); + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/AutoEncoderTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/AutoEncoderTest.java index 8b63b88b4..117416f45 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/AutoEncoderTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/AutoEncoderTest.java @@ -51,15 +51,15 @@ public class AutoEncoderTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .graphBuilder() .addInputs("in1", "in2") - .addLayer("1", new DenseLayer.Builder().nOut(mergeSize).build(), "in1") - .addLayer("2", new DenseLayer.Builder().nOut(mergeSize).build(), "in2") + .addLayer("1", DenseLayer.builder().nOut(mergeSize).build(), "in1") + .addLayer("2", DenseLayer.builder().nOut(mergeSize).build(), "in2") .addVertex("merge", new MergeVertex(), "1", "2") - .addLayer("e",new AutoEncoder.Builder().nOut(encdecSize).corruptionLevel(0.2).build(),"merge") - .addLayer("hidden",new AutoEncoder.Builder().nOut(hiddenSize).build(),"e") - .addLayer("decoder",new AutoEncoder.Builder().nOut(encdecSize).corruptionLevel(0.2).build(),"hidden") - .addLayer("L4", new DenseLayer.Builder().nOut(mergeSize).build(), "decoder") - .addLayer("out1", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(in1Size).build(),"L4") - .addLayer("out2",new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(in2Size).build(),"L4") + .addLayer("e",AutoEncoder.builder().nOut(encdecSize).corruptionLevel(0.2).build(),"merge") + .addLayer("hidden",AutoEncoder.builder().nOut(hiddenSize).build(),"e") + .addLayer("decoder",AutoEncoder.builder().nOut(encdecSize).corruptionLevel(0.2).build(),"hidden") + .addLayer("L4", DenseLayer.builder().nOut(mergeSize).build(), "decoder") + .addLayer("out1", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(in1Size).build(),"L4") + .addLayer("out2",OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(in2Size).build(),"L4") .setOutputs("out1","out2") .setInputTypes(InputType.feedForward(in1Size), InputType.feedForward(in2Size)) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerConfigurationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerConfigurationTest.java index c481d20df..977cb264a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/BaseLayerConfigurationTest.java @@ -81,7 +81,7 @@ public class BaseLayerConfigurationTest extends BaseDL4JTest { int nOut = 2; NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new ConvolutionLayer.Builder().nIn(nIn).nOut(nOut).build()).build(); + .layer(ConvolutionLayer.builder().nIn(nIn).nOut(nOut).build()).build(); val numParams = conf.getFlattenedLayerConfigurations().get(0).initializer().numParams(conf); INDArray params = Nd4j.create(1, numParams); @@ -94,8 +94,8 @@ public class BaseLayerConfigurationTest extends BaseDL4JTest { int nOut = 2; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) - .layer(1, new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build()).build(); + .layer(0, DenseLayer.builder().nIn(nIn).nOut(nOut).build()) + .layer(1, OutputLayer.builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java index 7898d35ad..924b3aa18 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CacheModeTest.java @@ -70,9 +70,9 @@ public class CacheModeTest extends BaseDL4JTest { .seed(12345) .cacheMode(cacheMode) .list() - .layer(new ConvolutionLayer.Builder().nOut(3).build()) - .layer(new ConvolutionLayer.Builder().nOut(3).build()) - .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) + .layer(ConvolutionLayer.builder().nOut(3).build()) + .layer(ConvolutionLayer.builder().nOut(3).build()) + .layer(OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)) .build(); @@ -115,12 +115,12 @@ public class CacheModeTest extends BaseDL4JTest { .cacheMode(cacheMode) .list() .layer(graves ? - new GravesLSTM.Builder().nIn(3).nOut(3).build() : - new LSTM.Builder().nIn(3).nOut(3).build()) + GravesLSTM.builder().nIn(3).nOut(3).build() : + LSTM.builder().nIn(3).nOut(3).build()) .layer(graves ? - new GravesLSTM.Builder().nIn(3).nOut(3).build() : - new LSTM.Builder().nIn(3).nOut(3).build()) - .layer(new RnnOutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) + GravesLSTM.builder().nIn(3).nOut(3).build() : + LSTM.builder().nIn(3).nOut(3).build()) + .layer(RnnOutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build()) .build(); return conf; @@ -160,9 +160,9 @@ public class CacheModeTest extends BaseDL4JTest { .cacheMode(cacheMode) .graphBuilder() .addInputs("in") - .layer("0", new ConvolutionLayer.Builder().nOut(3).build(), "in") - .layer("1", new ConvolutionLayer.Builder().nOut(3).build(), "0") - .layer("2", new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build(), "1") + .layer("0", ConvolutionLayer.builder().nOut(3).build(), "in") + .layer("1", ConvolutionLayer.builder().nOut(3).build(), "0") + .layer("2", OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build(), "1") .setOutputs("2") .setInputTypes(InputType.convolutionalFlat(28, 28, 1)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java index 84f94928b..8201610ba 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/CenterLossOutputLayerTest.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.layers; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +import java.util.Random; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; @@ -44,106 +47,145 @@ import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; -import java.util.Random; - -import static org.junit.jupiter.api.Assertions.assertNotEquals; - public class CenterLossOutputLayerTest extends BaseDL4JTest { - private ComputationGraph getGraph(int numLabels, double lambda) { - Nd4j.getRandom().setSeed(12345); - ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .dist(new NormalDistribution(0, 1)).updater(new NoOp()) - .graphBuilder().addInputs("input1") - .addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.RELU).build(), - "input1") - .addLayer("lossLayer", new CenterLossOutputLayer.Builder() - .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(numLabels) - .lambda(lambda).activation(Activation.SOFTMAX).build(), "l1") - .setOutputs("lossLayer").build(); + private ComputationGraph getGraph(int numLabels, double lambda) { + Nd4j.getRandom().setSeed(12345); + ComputationGraphConfiguration conf = + NeuralNetConfiguration.builder() + .seed(12345) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .dist(new NormalDistribution(0, 1)) + .updater(new NoOp()) + .graphBuilder() + .addInputs("input1") + .addLayer( + "l1", + DenseLayer.builder().nIn(4).nOut(5).activation(Activation.RELU).build(), + "input1") + .addLayer( + "lossLayer", + CenterLossOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .nIn(5) + .nOut(numLabels) + .lambda(lambda) + .activation(Activation.SOFTMAX) + .build(), + "l1") + .setOutputs("lossLayer") + .build(); - ComputationGraph graph = new ComputationGraph(conf); - graph.init(); - return graph; + ComputationGraph graph = new ComputationGraph(conf); + graph.init(); + return graph; + } + + public ComputationGraph getCNNMnistConfig() { + + int nChannels = 1; // Number of input channels + int outputNum = 10; // The number of possible outcomes + + ComputationGraphConfiguration conf = + NeuralNetConfiguration.builder() + .seed(12345) // Training iterations as above + .l2(0.0005) + .weightInit(WeightInit.XAVIER) + .updater(new Nesterovs(0.01, 0.9)) + .graphBuilder() + .addInputs("input") + .setInputTypes(InputType.convolutionalFlat(28, 28, 1)) + .addLayer( + "0", + ConvolutionLayer.builder(5, 5) + // nIn and nOut specify channels. nIn here is the nChannels and nOut is the + // number of filters to be applied + .nIn(nChannels) + .stride(1, 1) + .nOut(20) + .activation(Activation.IDENTITY) + .build(), + "input") + .addLayer( + "1", + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) + .kernelSize(2, 2) + .stride(2, 2) + .build(), + "0") + .addLayer( + "2", + ConvolutionLayer.builder(5, 5) + // Note that nIn need not be specified in later layers + .stride(1, 1) + .nOut(50) + .activation(Activation.IDENTITY) + .build(), + "1") + .addLayer( + "3", + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) + .kernelSize(2, 2) + .stride(2, 2) + .build(), + "2") + .addLayer("4", DenseLayer.builder().activation(Activation.RELU).nOut(500).build(), "3") + .addLayer( + "output", + CenterLossOutputLayer.builder() + .lossFunction(LossFunction.MCXENT) + .nOut(outputNum) + .activation(Activation.SOFTMAX) + .build(), + "4") + .setOutputs("output") + .build(); + + ComputationGraph graph = new ComputationGraph(conf); + graph.init(); + return graph; + } + + @Test + public void testLambdaConf() { + double[] lambdas = new double[] {0.1, 0.01}; + double[] results = new double[2]; + int numClasses = 2; + + INDArray input = Nd4j.rand(150, 4); + INDArray labels = Nd4j.zeros(150, numClasses); + Random r = new Random(12345); + for (int i = 0; i < 150; i++) { + labels.putScalar(i, r.nextInt(numClasses), 1.0); + } + ComputationGraph graph; + + for (int i = 0; i < lambdas.length; i++) { + graph = getGraph(numClasses, lambdas[i]); + graph.setInput(0, input); + graph.setLabel(0, labels); + graph.computeGradientAndScore(); + results[i] = graph.getScore(); } - public ComputationGraph getCNNMnistConfig() { + assertNotEquals(results[0], results[1]); + } - int nChannels = 1; // Number of input channels - int outputNum = 10; // The number of possible outcomes + @Test + //// @Ignore //Should be run manually + public void testMNISTConfig() throws Exception { + int batchSize = 64; // Test batch size + DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345); - ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) // Training iterations as above - .l2(0.0005).weightInit(WeightInit.XAVIER) - .updater(new Nesterovs(0.01, 0.9)) - .graphBuilder().addInputs("input") - .setInputTypes(InputType.convolutionalFlat(28, 28, 1)) - .addLayer("0", new ConvolutionLayer.Builder(5, 5) - //nIn and nOut specify channels. nIn here is the nChannels and nOut is the number of filters to be applied - .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build(), - "input") - .addLayer("1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) - .stride(2, 2).build(), "0") - .addLayer("2", new ConvolutionLayer.Builder(5, 5) - //Note that nIn need not be specified in later layers - .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build(), "1") - .addLayer("3", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) - .stride(2, 2).build(), "2") - .addLayer("4", new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build(), "3") - .addLayer("output", - new org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer.Builder( - LossFunction.MCXENT).nOut(outputNum) - .activation(Activation.SOFTMAX).build(), - "4") - .setOutputs("output").build(); + ComputationGraph net = getCNNMnistConfig(); + net.init(); + net.addTrainingListeners(new ScoreIterationListener(1)); - ComputationGraph graph = new ComputationGraph(conf); - graph.init(); - return graph; + for (int i = 0; i < 50; i++) { + net.fit(mnistTrain.next()); + Thread.sleep(1000); } - @Test - public void testLambdaConf() { - double[] lambdas = new double[] {0.1, 0.01}; - double[] results = new double[2]; - int numClasses = 2; - - INDArray input = Nd4j.rand(150, 4); - INDArray labels = Nd4j.zeros(150, numClasses); - Random r = new Random(12345); - for (int i = 0; i < 150; i++) { - labels.putScalar(i, r.nextInt(numClasses), 1.0); - } - ComputationGraph graph; - - for (int i = 0; i < lambdas.length; i++) { - graph = getGraph(numClasses, lambdas[i]); - graph.setInput(0, input); - graph.setLabel(0, labels); - graph.computeGradientAndScore(); - results[i] = graph.getScore(); - } - - assertNotEquals(results[0], results[1]); - } - - - - @Test - ////@Ignore //Should be run manually - public void testMNISTConfig() throws Exception { - int batchSize = 64; // Test batch size - DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345); - - ComputationGraph net = getCNNMnistConfig(); - net.init(); - net.addTrainingListeners(new ScoreIterationListener(1)); - - for (int i = 0; i < 50; i++) { - net.fit(mnistTrain.next()); - Thread.sleep(1000); - } - - Thread.sleep(100000); - } + Thread.sleep(100000); + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java index 80cf35543..9b9af8919 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/DropoutLayerTest.java @@ -62,7 +62,7 @@ public class DropoutLayerTest extends BaseDL4JTest { @Test public void testInputTypes() { - DropoutLayer config = new DropoutLayer.Builder(0.5).build(); + DropoutLayer config = DropoutLayer.builder(0.5).build(); InputType in1 = InputType.feedForward(20); InputType in2 = InputType.convolutional(28, 28, 1); @@ -77,10 +77,10 @@ public class DropoutLayerTest extends BaseDL4JTest { public void testDropoutLayerWithoutTraining() throws Exception { NeuralNetConfiguration confIntegrated = NeuralNetConfiguration.builder().seed(3648) .list().layer(0, - new ConvolutionLayer.Builder(1, 1).stride(1, 1).nIn(1).nOut(1).dropOut(0.25) + ConvolutionLayer.builder(1, 1).stride(1, 1).nIn(1).nOut(1).dropOut(0.25) .activation(Activation.IDENTITY).weightInit(WeightInit.XAVIER) .build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .weightInit(WeightInit.XAVIER).dropOut(0.25) .nOut(4).build()) @@ -98,13 +98,13 @@ public class DropoutLayerTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .seed(3648) .list().layer(0, - new DropoutLayer.Builder(0.25) + DropoutLayer.builder(0.25) .build()) - .layer(1, new ConvolutionLayer.Builder(1, 1).stride(1, 1).nIn(1).nOut(1) + .layer(1, ConvolutionLayer.builder(1, 1).stride(1, 1).nIn(1).nOut(1) .activation(Activation.IDENTITY).weightInit(WeightInit.XAVIER) .build()) - .layer(2, new DropoutLayer.Builder(0.25).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, DropoutLayer.builder(0.25).build()) + .layer(3, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .nOut(4).build()) @@ -136,8 +136,8 @@ public class DropoutLayerTest extends BaseDL4JTest { List actTestSeparate = netSeparate.feedForward(in.dup(), false); //Check masks: - INDArray maskIntegrated = ((Dropout)netIntegrated.getLayer(0).getLayerConfiguration().getIDropout()).getMask(); - INDArray maskSeparate = ((Dropout)netSeparate.getLayer(0).getLayerConfiguration().getIDropout()).getMask(); + INDArray maskIntegrated = ((Dropout)netIntegrated.getLayer(0).getLayerConfiguration().getDropOut()).getMask(); + INDArray maskSeparate = ((Dropout)netSeparate.getLayer(0).getLayerConfiguration().getDropOut()).getMask(); assertEquals(maskIntegrated, maskSeparate); @@ -158,11 +158,11 @@ public class DropoutLayerTest extends BaseDL4JTest { NeuralNetConfiguration confIntegrated = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() - .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10) + .layer(0, DenseLayer.builder().nIn(28 * 28).nOut(10) .activation(Activation.RELU).weightInit( WeightInit.XAVIER) .build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).dropOut(0.25) .nIn(10).nOut(10).build()) .build(); @@ -175,10 +175,10 @@ public class DropoutLayerTest extends BaseDL4JTest { NeuralNetConfiguration confSeparate = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() - .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10).activation(Activation.RELU) + .layer(0, DenseLayer.builder().nIn(28 * 28).nOut(10).activation(Activation.RELU) .weightInit(WeightInit.XAVIER).build()) - .layer(1, new DropoutLayer.Builder(0.25).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1, DropoutLayer.builder(0.25).build()) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(10).nOut(10) .build()) .build(); @@ -230,10 +230,10 @@ public class DropoutLayerTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration confIntegrated = NeuralNetConfiguration.builder().seed(123) .list().layer(0, - new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) + ConvolutionLayer.builder(4, 4).stride(2, 2).nIn(1).nOut(20) .activation(Activation.TANH).weightInit(WeightInit.XAVIER) .build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).dropOut(0.5) .nOut(10).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); @@ -248,10 +248,10 @@ public class DropoutLayerTest extends BaseDL4JTest { preProcessorMap.put(1, new CnnToFeedForwardPreProcessor(13, 13, 20)); NeuralNetConfiguration confSeparate = NeuralNetConfiguration.builder().seed(123).list() - .layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) + .layer(0, ConvolutionLayer.builder(4, 4).stride(2, 2).nIn(1).nOut(20) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()) - .layer(1, new DropoutLayer.Builder(0.5).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1, DropoutLayer.builder(0.5).build()) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()) .inputPreProcessors(preProcessorMap) .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java index 20880d71a..bcbf3b89b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerTest.java @@ -26,7 +26,6 @@ import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.DenseLayer.Builder; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -64,10 +63,10 @@ public class FrozenLayerTest extends BaseDL4JTest { MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( (NeuralNetConfiguration) ((NeuralNetConfigurationBuilder)overallConf).clone().list() - .layer(0, new Builder().nIn(4).nOut(3).build()) - .layer(1, new Builder().nIn(3).nOut(2).build()) - .layer(2, new Builder().nIn(2).nOut(3).build()) - .layer(3, new OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).build()) + .layer(2, DenseLayer.builder().nIn(2).nOut(3).build()) + .layer(3, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); @@ -83,8 +82,8 @@ public class FrozenLayerTest extends BaseDL4JTest { Nd4j.hstack(modelToFineTune.getLayer(2).getParams(), modelToFineTune.getLayer(3).getParams()); MultiLayerNetwork notFrozen = new MultiLayerNetwork( (NeuralNetConfiguration) overallConf.clone() - .layer(0, new Builder().nIn(2).nOut(3).build()) - .layer(1, new OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(2).nOut(3).build()) + .layer(1, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build(), paramsLastTwoLayers); @@ -118,10 +117,10 @@ public class FrozenLayerTest extends BaseDL4JTest { .activation(Activation.IDENTITY); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( (NeuralNetConfiguration) overallConf - .layer(0, new Builder().nIn(4).nOut(3).build()) - .layer(1, new Builder().nIn(3).nOut(2).build()) - .layer(2, new Builder().nIn(2).nOut(3).build()) - .layer(3, new OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).build()) + .layer(2, DenseLayer.builder().nIn(2).nOut(3).build()) + .layer(3, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); @@ -139,8 +138,8 @@ public class FrozenLayerTest extends BaseDL4JTest { assertEquals(modelNow.getModelParams(), clonedModel.getModelParams()); MultiLayerNetwork notFrozen = new MultiLayerNetwork( - (NeuralNetConfiguration) overallConf.layer(0, new Builder().nIn(2).nOut(3).build()) - .layer(1, new OutputLayer.Builder( + (NeuralNetConfiguration) overallConf.layer(0, DenseLayer.builder().nIn(2).nOut(3).build()) + .layer(1, OutputLayer.builder( LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) @@ -171,11 +170,11 @@ public class FrozenLayerTest extends BaseDL4JTest { .activation(Activation.IDENTITY); ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") - .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In") - .addLayer("layer1", new DenseLayer.Builder().nIn(3).nOut(2).build(), "layer0") - .addLayer("layer2", new DenseLayer.Builder().nIn(2).nOut(3).build(), "layer1") + .addLayer("layer0", DenseLayer.builder().nIn(4).nOut(3).build(), "layer0In") + .addLayer("layer1", DenseLayer.builder().nIn(3).nOut(2).build(), "layer0") + .addLayer("layer2", DenseLayer.builder().nIn(2).nOut(3).build(), "layer1") .addLayer("layer3", - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), @@ -189,9 +188,9 @@ public class FrozenLayerTest extends BaseDL4JTest { new TransferLearning.GraphBuilder(modelToFineTune).setFeatureExtractor("layer1").build(); ComputationGraph notFrozen = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") - .addLayer("layer0", new DenseLayer.Builder().nIn(2).nOut(3).build(), "layer0In") + .addLayer("layer0", DenseLayer.builder().nIn(2).nOut(3).build(), "layer0In") .addLayer("layer1", - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), @@ -222,11 +221,11 @@ public class FrozenLayerTest extends BaseDL4JTest { .activation(Activation.IDENTITY); ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") - .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In") - .addLayer("layer1", new DenseLayer.Builder().nIn(3).nOut(2).build(), "layer0") - .addLayer("layer2", new DenseLayer.Builder().nIn(2).nOut(3).build(), "layer1") + .addLayer("layer0", DenseLayer.builder().nIn(4).nOut(3).build(), "layer0In") + .addLayer("layer1", DenseLayer.builder().nIn(3).nOut(2).build(), "layer0") + .addLayer("layer2", DenseLayer.builder().nIn(2).nOut(3).build(), "layer1") .addLayer("layer3", - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), @@ -247,9 +246,9 @@ public class FrozenLayerTest extends BaseDL4JTest { assertEquals(modelNow.getModelParams(), clonedModel.getModelParams()); ComputationGraph notFrozen = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") - .addLayer("layer0", new DenseLayer.Builder().nIn(2).nOut(3).build(), "layer0In") + .addLayer("layer0", DenseLayer.builder().nIn(2).nOut(3).build(), "layer0In") .addLayer("layer1", - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), @@ -280,22 +279,22 @@ public class FrozenLayerTest extends BaseDL4JTest { //We need to be able to instantitate frozen layers from JSON etc, and have them be the same as if // they were initialized via the builder NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder().seed(12345).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()) - .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .layer(1, DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(2, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10) .nOut(10).build()) .build(); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345).list().layer(0, - new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(new DenseLayer.Builder().nIn(10).nOut(10) - .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())) - .layer(1, new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer( - new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) - .weightInit(WeightInit.XAVIER).build())) - .layer(2, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.builder(DenseLayer.builder().nIn(10).nOut(10) + .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()).build()) + .layer(1, org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.builder( + DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) + .weightInit(WeightInit.XAVIER).build()).build()) + .layer(2, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10) .nOut(10).build()) .build(); @@ -331,11 +330,11 @@ public class FrozenLayerTest extends BaseDL4JTest { // they were initialized via the builder ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .addLayer("1", DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build(), "0") - .addLayer("2", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("2", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10) .nOut(10).build(), "1") @@ -343,15 +342,16 @@ public class FrozenLayerTest extends BaseDL4JTest { ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.Builder() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) - .weightInit(WeightInit.XAVIER).build()) + .addLayer("0", org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.builder() + .innerConfiguration( + DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) + .weightInit(WeightInit.XAVIER).build() ) .build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.Builder() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .addLayer("1", org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.builder() + .innerConfiguration(DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()) .build(), "0") - .addLayer("2", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("2", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10) .nOut(10).build(), "1") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java index d47973a89..3fc79af09 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackpropTest.java @@ -50,22 +50,22 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { //We need to be able to instantitate frozen layers from JSON etc, and have them be the same as if // they were initialized via the builder NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder().seed(12345).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()) - .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .layer(1, DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()) - .layer(2, new OutputLayer.Builder( + .layer(2, OutputLayer.builder( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10) .nOut(10).build()) .build(); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345).list().layer(0, - new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(10).nOut(10) - .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())) - .layer(1, new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) - .weightInit(WeightInit.XAVIER).build())) - .layer(2, new OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder(DenseLayer.builder().nIn(10).nOut(10) + .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()).build()) + .layer(1, org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) + .weightInit(WeightInit.XAVIER).build()).build()) + .layer(2, OutputLayer.builder( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10) .nOut(10).build()) .build(); @@ -101,11 +101,11 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { // they were initialized via the builder ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .addLayer("1", DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build(), "0") - .addLayer("2", new OutputLayer.Builder( + .addLayer("2", OutputLayer.builder( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10) .nOut(10).build(), "1") @@ -113,13 +113,13 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345).graphBuilder() .addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .addLayer("0", org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH) + .addLayer("1", org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + DenseLayer.builder().nIn(10).nOut(10).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).build()), "0") - .addLayer("2", new OutputLayer.Builder( + .addLayer("2", OutputLayer.builder( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10) .nOut(10).build(), "1") @@ -159,13 +159,13 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .updater(new Sgd(2)) .list() - .layer(new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new DenseLayer.Builder().nIn(3).nOut(4).build())) - .layer(new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new DenseLayer.Builder().nIn(4).nOut(2).build())) - .layer(new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.TANH).nIn(2).nOut(1).build())) + .layer(DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + DenseLayer.builder().nIn(3).nOut(4).build())) + .layer(org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + DenseLayer.builder().nIn(4).nOut(2).build())) + .layer(org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.TANH).nIn(2).nOut(1).build())) .build(); MultiLayerNetwork network = new MultiLayerNetwork(conf1); @@ -211,18 +211,18 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { .seed(12345) .graphBuilder() .addInputs("input") - .addLayer(initialLayer, new DenseLayer.Builder().nIn(4).nOut(4).build(),"input") - .addLayer(frozenBranchUnfrozenLayer0, new DenseLayer.Builder().nIn(4).nOut(3).build(),initialLayer) - .addLayer(frozenBranchFrozenLayer1, new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new DenseLayer.Builder().nIn(3).nOut(4).build()),frozenBranchUnfrozenLayer0) - .addLayer(frozenBranchFrozenLayer2, new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new DenseLayer.Builder().nIn(4).nOut(2).build()),frozenBranchFrozenLayer1) - .addLayer(unfrozenLayer0, new DenseLayer.Builder().nIn(4).nOut(4).build(),initialLayer) - .addLayer(unfrozenLayer1, new DenseLayer.Builder().nIn(4).nOut(2).build(),unfrozenLayer0) - .addLayer(unfrozenBranch2, new DenseLayer.Builder().nIn(2).nOut(1).build(),unfrozenLayer1) + .addLayer(initialLayer, DenseLayer.builder().nIn(4).nOut(4).build(),"input") + .addLayer(frozenBranchUnfrozenLayer0, DenseLayer.builder().nIn(4).nOut(3).build(),initialLayer) + .addLayer(frozenBranchFrozenLayer1, org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + DenseLayer.builder().nIn(3).nOut(4).build()),frozenBranchUnfrozenLayer0) + .addLayer(frozenBranchFrozenLayer2, org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + DenseLayer.builder().nIn(4).nOut(2).build()),frozenBranchFrozenLayer1) + .addLayer(unfrozenLayer0, DenseLayer.builder().nIn(4).nOut(4).build(),initialLayer) + .addLayer(unfrozenLayer1, DenseLayer.builder().nIn(4).nOut(2).build(),unfrozenLayer0) + .addLayer(unfrozenBranch2, DenseLayer.builder().nIn(2).nOut(1).build(),unfrozenLayer1) .addVertex("merge", new MergeVertex(), frozenBranchFrozenLayer2, unfrozenBranch2) - .addLayer(frozenBranchOutput,new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.TANH).nIn(3).nOut(1).build()),"merge") + .addLayer(frozenBranchOutput,org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.TANH).nIn(3).nOut(1).build()),"merge") .setOutputs(frozenBranchOutput) .build(); @@ -257,10 +257,10 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .updater(new Sgd(2)) .list() - .layer(0,new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1,new DenseLayer.Builder().updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).nIn(3).nOut(4).build()) - .layer(2,new DenseLayer.Builder().updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).nIn(4).nOut(2).build()) - .layer(3,new OutputLayer.Builder(LossFunctions.LossFunction.MSE).updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).activation(Activation.TANH).nIn(2).nOut(1).build()) + .layer(0,DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1,DenseLayer.builder().updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).nIn(3).nOut(4).build()) + .layer(2,DenseLayer.builder().updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).nIn(4).nOut(2).build()) + .layer(3,OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).activation(Activation.TANH).nIn(2).nOut(1).build()) .build(); NeuralNetConfiguration confFrozen = NeuralNetConfiguration.builder() @@ -268,10 +268,10 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .updater(new Sgd(2)) .list() - .layer(0,new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1,new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(3).nOut(4).build())) - .layer(2,new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(4).nOut(2).build())) - .layer(3,new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.TANH).nIn(2).nOut(1).build())) + .layer(0,DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1,org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder(DenseLayer.builder().nIn(3).nOut(4).build())) + .layer(2,org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder(DenseLayer.builder().nIn(4).nOut(2).build())) + .layer(3,org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.TANH).nIn(2).nOut(1).build())) .build(); MultiLayerNetwork frozenNetwork = new MultiLayerNetwork(confFrozen); frozenNetwork.init(); @@ -325,19 +325,19 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { .seed(12345) .graphBuilder() .addInputs("input") - .addLayer(initialLayer,new DenseLayer.Builder().nIn(4).nOut(4).build(),"input") - .addLayer(frozenBranchUnfrozenLayer0,new DenseLayer.Builder().nIn(4).nOut(3).build(), initialLayer) - .addLayer(frozenBranchFrozenLayer1,new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new DenseLayer.Builder().nIn(3).nOut(4).build()),frozenBranchUnfrozenLayer0) + .addLayer(initialLayer,DenseLayer.builder().nIn(4).nOut(4).build(),"input") + .addLayer(frozenBranchUnfrozenLayer0,DenseLayer.builder().nIn(4).nOut(3).build(), initialLayer) + .addLayer(frozenBranchFrozenLayer1,org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + DenseLayer.builder().nIn(3).nOut(4).build()),frozenBranchUnfrozenLayer0) .addLayer(frozenBranchFrozenLayer2, - new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new DenseLayer.Builder().nIn(4).nOut(2).build()),frozenBranchFrozenLayer1) - .addLayer(unfrozenLayer0,new DenseLayer.Builder().nIn(4).nOut(4).build(),initialLayer) - .addLayer(unfrozenLayer1,new DenseLayer.Builder().nIn(4).nOut(2).build(),unfrozenLayer0) - .addLayer(unfrozenBranch2,new DenseLayer.Builder().nIn(2).nOut(1).build(),unfrozenLayer1) + org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + DenseLayer.builder().nIn(4).nOut(2).build()),frozenBranchFrozenLayer1) + .addLayer(unfrozenLayer0,DenseLayer.builder().nIn(4).nOut(4).build(),initialLayer) + .addLayer(unfrozenLayer1,DenseLayer.builder().nIn(4).nOut(2).build(),unfrozenLayer0) + .addLayer(unfrozenBranch2,DenseLayer.builder().nIn(2).nOut(1).build(),unfrozenLayer1) .addVertex("merge",new MergeVertex(), frozenBranchFrozenLayer2, unfrozenBranch2) - .addLayer(frozenBranchOutput, new org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop( - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.TANH).nIn(3).nOut(1).build()),"merge") + .addLayer(frozenBranchOutput, org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop.builder( + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.TANH).nIn(3).nOut(1).build()),"merge") .setOutputs(frozenBranchOutput) .build(); @@ -346,15 +346,15 @@ public class FrozenLayerWithBackpropTest extends BaseDL4JTest { .seed(12345) .graphBuilder() .addInputs("input") - .addLayer(initialLayer, new DenseLayer.Builder().nIn(4).nOut(4).build(),"input") - .addLayer(frozenBranchUnfrozenLayer0,new DenseLayer.Builder().nIn(4).nOut(3).build(),initialLayer) - .addLayer(frozenBranchFrozenLayer1,new DenseLayer.Builder().updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).nIn(3).nOut(4).build(),frozenBranchUnfrozenLayer0) - .addLayer(frozenBranchFrozenLayer2,new DenseLayer.Builder().updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).nIn(4).nOut(2).build(),frozenBranchFrozenLayer1) - .addLayer(unfrozenLayer0,new DenseLayer.Builder().nIn(4).nOut(4).build(),initialLayer) - .addLayer(unfrozenLayer1,new DenseLayer.Builder().nIn(4).nOut(2).build(),unfrozenLayer0) - .addLayer(unfrozenBranch2,new DenseLayer.Builder().nIn(2).nOut(1).build(),unfrozenLayer1) + .addLayer(initialLayer, DenseLayer.builder().nIn(4).nOut(4).build(),"input") + .addLayer(frozenBranchUnfrozenLayer0,DenseLayer.builder().nIn(4).nOut(3).build(),initialLayer) + .addLayer(frozenBranchFrozenLayer1,DenseLayer.builder().updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).nIn(3).nOut(4).build(),frozenBranchUnfrozenLayer0) + .addLayer(frozenBranchFrozenLayer2,DenseLayer.builder().updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).nIn(4).nOut(2).build(),frozenBranchFrozenLayer1) + .addLayer(unfrozenLayer0,DenseLayer.builder().nIn(4).nOut(4).build(),initialLayer) + .addLayer(unfrozenLayer1,DenseLayer.builder().nIn(4).nOut(2).build(),unfrozenLayer0) + .addLayer(unfrozenBranch2,DenseLayer.builder().nIn(2).nOut(1).build(),unfrozenLayer1) .addVertex("merge",new MergeVertex(), frozenBranchFrozenLayer2, unfrozenBranch2) - .addLayer(frozenBranchOutput,new OutputLayer.Builder(LossFunctions.LossFunction.MSE).updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).activation(Activation.TANH).nIn(3).nOut(1).build(),"merge") + .addLayer(frozenBranchOutput,OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).updater(new Sgd(0.0)).biasUpdater(new Sgd(0.0)).activation(Activation.TANH).nIn(3).nOut(1).build(),"merge") .setOutputs(frozenBranchOutput) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java index 6e2132d92..15ae4219c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/OutputLayerTest.java @@ -59,7 +59,7 @@ public class OutputLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) .updater(new Sgd(1e-1)) - .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3) + .layer(org.deeplearning4j.nn.conf.layers.OutputLayer.builder().nIn(4).nOut(3) .weightInit(WeightInit.ZERO).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -94,10 +94,10 @@ public class OutputLayerTest extends BaseDL4JTest { } NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L).list() - .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) + .layer(0, GravesLSTM.builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)).activation(Activation.TANH) .updater(new NoOp()).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT) + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).build()) @@ -118,10 +118,10 @@ public class OutputLayerTest extends BaseDL4JTest { //As above, but for RnnOutputLayer. Expect all activations etc. to be 3d NeuralNetConfiguration confRnn = NeuralNetConfiguration.builder().seed(12345L).list() - .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) + .layer(0, GravesLSTM.builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)).activation(Activation.TANH) .updater(new NoOp()).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(1, org.deeplearning4j.nn.conf.layers.RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).build()) @@ -175,10 +175,10 @@ public class OutputLayerTest extends BaseDL4JTest { INDArray labels2d = proc.backprop(labels3d, miniBatchSize, LayerWorkspaceMgr.noWorkspaces()); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L).list() - .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) + .layer(0, GravesLSTM.builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)) .activation(Activation.TANH).updater(new NoOp()).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT) + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).build()) @@ -192,10 +192,10 @@ public class OutputLayerTest extends BaseDL4JTest { INDArray out3d = proc.preProcess(out2d, miniBatchSize, LayerWorkspaceMgr.noWorkspaces()); NeuralNetConfiguration confRnn = NeuralNetConfiguration.builder().seed(12345L).list() - .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize) + .layer(0, GravesLSTM.builder().nIn(nIn).nOut(layerSize) .dist(new NormalDistribution(0, 1)) .activation(Activation.TANH).updater(new NoOp()).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(1, org.deeplearning4j.nn.conf.layers.RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).build()) @@ -274,11 +274,11 @@ public class OutputLayerTest extends BaseDL4JTest { NeuralNetConfiguration.builder().seed(12345L) .updater(new NoOp()) .list() - .layer(new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) + .layer(LSTM.builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build()) - .layer(new DenseLayer.Builder().nIn(layerSize).nOut(nOut).activation(Activation.IDENTITY).build()) - .layer(new RnnLossLayer.Builder(LossFunction.MCXENT) + .layer(DenseLayer.builder().nIn(layerSize).nOut(nOut).activation(Activation.IDENTITY).build()) + .layer(RnnLossLayer.builder().lossFunction(LossFunction.MCXENT) .activation(Activation.SOFTMAX) .build()) .build(); @@ -291,10 +291,10 @@ public class OutputLayerTest extends BaseDL4JTest { NeuralNetConfiguration.builder().seed(12345L) .updater(new NoOp()) .list() - .layer(new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) + .layer(LSTM.builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build()) - .layer(new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nIn(layerSize).nOut(nOut) .build()) @@ -354,11 +354,11 @@ public class OutputLayerTest extends BaseDL4JTest { .inferenceWorkspaceMode(ws) .trainingWorkspaceMode(ws) .list() - .layer(new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(Activation.IDENTITY) + .layer(ConvolutionLayer.builder().nIn(3).nOut(4).activation(Activation.IDENTITY) .kernelSize(2, 2).stride(1, 1) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build()) - .layer(new CnnLossLayer.Builder(LossFunction.MSE) + .layer(CnnLossLayer.builder().lossFunction(LossFunction.MSE.getILossFunction()) .activation(a) .build()) .build(); @@ -370,11 +370,11 @@ public class OutputLayerTest extends BaseDL4JTest { .inferenceWorkspaceMode(ws) .trainingWorkspaceMode(ws) .list() - .layer(new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(a) + .layer(ConvolutionLayer.builder().nIn(3).nOut(4).activation(a) .kernelSize(2, 2).stride(1, 1) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build()) - .layer(new CnnLossLayer.Builder(LossFunction.MSE) + .layer(CnnLossLayer.builder().lossFunction(LossFunction.MSE.getILossFunction()) .activation(Activation.IDENTITY) .build()) .build(); @@ -444,11 +444,11 @@ public class OutputLayerTest extends BaseDL4JTest { .trainingWorkspaceMode(ws) .graphBuilder() .addInputs("in") - .addLayer("0", new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(Activation.IDENTITY) + .addLayer("0", ConvolutionLayer.builder().nIn(3).nOut(4).activation(Activation.IDENTITY) .kernelSize(2, 2).stride(1, 1) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build(), "in") - .addLayer("1", new CnnLossLayer.Builder(LossFunction.MSE) + .addLayer("1", CnnLossLayer.builder().lossFunction(LossFunction.MSE.getILossFunction()) .activation(a) .build(), "0") .setOutputs("1") @@ -462,11 +462,11 @@ public class OutputLayerTest extends BaseDL4JTest { .trainingWorkspaceMode(ws) .graphBuilder() .addInputs("in") - .addLayer("0", new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(a) + .addLayer("0", ConvolutionLayer.builder().nIn(3).nOut(4).activation(a) .kernelSize(2, 2).stride(1, 1) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build(), "in") - .addLayer("1", new CnnLossLayer.Builder(LossFunction.MSE) + .addLayer("1", CnnLossLayer.builder().lossFunction(LossFunction.MSE.getILossFunction()) .activation(Activation.IDENTITY) .build(), "0") .setOutputs("1") @@ -528,10 +528,10 @@ public class OutputLayerTest extends BaseDL4JTest { .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) .list() - .layer(new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(Activation.IDENTITY) + .layer(ConvolutionLayer.builder().nIn(3).nOut(4).activation(Activation.IDENTITY) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build()) - .layer(new CnnLossLayer.Builder(LossFunction.MSE) + .layer(CnnLossLayer.builder().lossFunction(LossFunction.MSE.getILossFunction()) .activation(Activation.SOFTMAX) .build()) .build(); @@ -555,19 +555,19 @@ public class OutputLayerTest extends BaseDL4JTest { public void testOutputLayerDefaults(){ NeuralNetConfiguration.builder().list() - .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(10).nOut(10).build()) + .layer(org.deeplearning4j.nn.conf.layers.OutputLayer.builder().nIn(10).nOut(10).build()) .build(); NeuralNetConfiguration.builder().list() - .layer(new org.deeplearning4j.nn.conf.layers.LossLayer.Builder().build()) + .layer(org.deeplearning4j.nn.conf.layers.LossLayer.builder().build()) .build(); NeuralNetConfiguration.builder().list() - .layer(new org.deeplearning4j.nn.conf.layers.CnnLossLayer.Builder().build()) + .layer(org.deeplearning4j.nn.conf.layers.CnnLossLayer.builder().build()) .build(); NeuralNetConfiguration.builder().list() - .layer(new org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer.Builder().build()) + .layer(org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer.builder().build()) .build(); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/RepeatVectorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/RepeatVectorTest.java index a62ccdcf0..abd5df7c7 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/RepeatVectorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/RepeatVectorTest.java @@ -42,7 +42,7 @@ public class RepeatVectorTest extends BaseDL4JTest { private Layer getRepeatVectorLayer() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .dataType(DataType.DOUBLE) - .layer(new RepeatVector.Builder(REPEAT).build()).build(); + .layer(RepeatVector.builder().repetitionFactor(REPEAT).build()).build(); return conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, null, 0, null, false, DataType.DOUBLE); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java index f1e64f204..7d01cdfdd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/SeedTest.java @@ -46,7 +46,7 @@ public class SeedTest extends BaseDL4JTest { @Test public void testAutoEncoderSeed() { - AutoEncoder layerType = new AutoEncoder.Builder().nIn(4).nOut(3).corruptionLevel(0.0) + AutoEncoder layerType = AutoEncoder.builder().nIn(4).nOut(3).corruptionLevel(0.0) .activation(Activation.SIGMOID).build(); NeuralNetConfiguration conf = diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java index e17653219..7ca3839a5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/TestDropout.java @@ -52,7 +52,7 @@ public class TestDropout extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd()) .dropOut(0.5).list() - .layer(0, new OutputLayer.Builder().activation(Activation.IDENTITY) + .layer(0, OutputLayer.builder().activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).nIn(nIn).nOut(nOut) .weightInit(WeightInit.XAVIER).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java index 6b307a68c..fc92a8c18 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java @@ -58,19 +58,19 @@ public class CapsNetMNISTTest extends BaseDL4JTest { .seed(123) .updater(new Adam()) .list() - .layer(new ConvolutionLayer.Builder() + .layer(ConvolutionLayer.builder() .nOut(16) .kernelSize(9, 9) .stride(3, 3) .build()) - .layer(new PrimaryCapsules.Builder(8, 8) + .layer(PrimaryCapsules.builder(8, 8) .kernelSize(7, 7) .stride(2, 2) .build()) - .layer(new CapsuleLayer.Builder(10, 16, 3).build()) - .layer(new CapsuleStrengthLayer.Builder().build()) - .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build()) - .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build()) + .layer(CapsuleLayer.builder(10, 16, 3).build()) + .layer(CapsuleStrengthLayer.builder().build()) + .layer(ActivationLayer.builder(new ActivationSoftmax()).build()) + .layer(LossLayer.builder().lossFunction(new LossNegativeLogLikelihood()).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleLayerTest.java index 4536b915b..c72daa81b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleLayerTest.java @@ -44,7 +44,7 @@ public class CapsuleLayerTest extends BaseDL4JTest { @Test public void testOutputType(){ - CapsuleLayer layer = new CapsuleLayer.Builder(10, 16, 5).build(); + CapsuleLayer layer = CapsuleLayer.builder(10, 16, 5).build(); InputType in1 = InputType.recurrent(5, 8); @@ -53,7 +53,7 @@ public class CapsuleLayerTest extends BaseDL4JTest { @Test public void testInputType(){ - CapsuleLayer layer = new CapsuleLayer.Builder(10, 16, 5).build(); + CapsuleLayer layer = CapsuleLayer.builder(10, 16, 5).build(); InputType in1 = InputType.recurrent(5, 8); @@ -65,14 +65,14 @@ public class CapsuleLayerTest extends BaseDL4JTest { @Test public void testConfig(){ - CapsuleLayer layer1 = new CapsuleLayer.Builder(10, 16, 5).build(); + CapsuleLayer layer1 = CapsuleLayer.builder(10, 16, 5).build(); assertEquals(10, layer1.getCapsules()); assertEquals(16, layer1.getCapsuleDimensions()); assertEquals(5, layer1.getRoutings()); assertFalse(layer1.isHasBias()); - CapsuleLayer layer2 = new CapsuleLayer.Builder(10, 16, 5).hasBias(true).build(); + CapsuleLayer layer2 = CapsuleLayer.builder(10, 16, 5).hasBias(true).build(); assertTrue(layer2.isHasBias()); @@ -83,7 +83,7 @@ public class CapsuleLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(123) .list() - .layer(new CapsuleLayer.Builder(10, 16, 3).build()) + .layer(CapsuleLayer.builder(10, 16, 3).build()) .inputType(InputType.recurrent(10, 8)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleStrengthLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleStrengthLayerTest.java index 388d380dc..1c2f3c5f7 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleStrengthLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsuleStrengthLayerTest.java @@ -42,7 +42,7 @@ public class CapsuleStrengthLayerTest extends BaseDL4JTest { @Test public void testOutputType(){ - CapsuleStrengthLayer layer = new CapsuleStrengthLayer.Builder().build(); + CapsuleStrengthLayer layer = CapsuleStrengthLayer.builder().build(); InputType in1 = InputType.recurrent(5, 8); @@ -54,7 +54,7 @@ public class CapsuleStrengthLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(123) .list() - .layer(new CapsuleStrengthLayer.Builder().build()) + .layer(CapsuleStrengthLayer.builder().build()) .inputType(InputType.recurrent(5, 8)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java index 12f63e7ec..d8bbe5ae5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java @@ -44,7 +44,7 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { @Test public void testOutputType(){ - PrimaryCapsules layer = new PrimaryCapsules.Builder(8, 8) + PrimaryCapsules layer = PrimaryCapsules.builder(8, 8) .kernelSize(7, 7) .stride(2, 2) .build(); @@ -57,7 +57,7 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { @Test public void testInputType(){ - PrimaryCapsules layer = new PrimaryCapsules.Builder(8, 8) + PrimaryCapsules layer = PrimaryCapsules.builder(8, 8) .kernelSize(7, 7) .stride(2, 2) .build(); @@ -72,7 +72,7 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { @Test public void testConfig(){ - PrimaryCapsules layer1 = new PrimaryCapsules.Builder(8, 10) + PrimaryCapsules layer1 = PrimaryCapsules.builder(8, 10) .kernelSize(5, 5) .stride(4, 4) .useLeakyReLU(0.5) @@ -84,22 +84,22 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { assertArrayEquals(new int[]{4, 4}, layer1.getStride()); assertArrayEquals(new int[]{0, 0}, layer1.getPadding()); assertArrayEquals(new int[]{1, 1}, layer1.getDilation()); - assertTrue(layer1.isUseRelu()); - assertEquals(0.5, layer1.getLeak(), 0.001); + assertTrue(layer1.isUseRelU()); + assertEquals(0.5, layer1.getUseLeakyReLU(), 0.001); - PrimaryCapsules layer2 = new PrimaryCapsules.Builder(8, 10) + PrimaryCapsules layer2 = PrimaryCapsules.builder(8, 10) .kernelSize(5, 5) .stride(4, 4) .build(); - assertFalse(layer2.isUseRelu()); + assertFalse(layer2.isUseRelU()); - PrimaryCapsules layer3 = new PrimaryCapsules.Builder(8, 10) + PrimaryCapsules layer3 = PrimaryCapsules.builder(8, 10) .kernelSize(5, 5) .stride(4, 4) .useReLU() .build(); - assertTrue(layer3.isUseRelu()); - assertEquals(0, layer3.getLeak(), 0.001); + assertTrue(layer3.isUseRelU()); + assertEquals(0, layer3.getUseLeakyReLU(), 0.001); } @@ -108,7 +108,7 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(123) .list() - .layer(new PrimaryCapsules.Builder(8, 10) + .layer(PrimaryCapsules.builder(8, 10) .kernelSize(5, 5) .stride(4, 4) .useLeakyReLU(0.5) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java index 6f24ff226..44d7380fd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java @@ -557,16 +557,16 @@ public class ConvDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getConv2dNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) { if (setOnLayerAlso) { - return getNetWithLayer(new ConvolutionLayer.Builder() + return getNetWithLayer(ConvolutionLayer.builder() .kernelSize(3, 3) .stride(2, 2) .activation(Activation.TANH) - .dataFormat(format) + .convFormat(format) .nOut(3) .helperAllowFallback(false) .build(), format, cm, null); } else { - return getNetWithLayer(new ConvolutionLayer.Builder() + return getNetWithLayer(ConvolutionLayer.builder() .kernelSize(3, 3) .stride(2, 2) .activation(Activation.TANH) @@ -578,14 +578,14 @@ public class ConvDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getSubsampling2dNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) { if (setOnLayerAlso) { - return getNetWithLayer(new SubsamplingLayer.Builder() + return getNetWithLayer(SubsamplingLayer.builder() .kernelSize(2, 2) .stride(1, 1) .dataFormat(format) .helperAllowFallback(false) .build(), format, cm, null); } else { - return getNetWithLayer(new SubsamplingLayer.Builder() + return getNetWithLayer(SubsamplingLayer.builder() .kernelSize(2, 2) .stride(1, 1) .helperAllowFallback(false) @@ -595,7 +595,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getSeparableConv2dNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) { if (setOnLayerAlso) { - return getNetWithLayer(new SeparableConvolution2D.Builder() + return getNetWithLayer(SeparableConvolution2D.builder() .kernelSize(3, 3) .stride(2, 2) .activation(Activation.TANH) @@ -604,7 +604,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { .helperAllowFallback(false) .build(), format, cm, null); } else { - return getNetWithLayer(new SeparableConvolution2D.Builder() + return getNetWithLayer(SeparableConvolution2D.builder() .kernelSize(3, 3) .stride(2, 2) .activation(Activation.TANH) @@ -616,7 +616,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getDepthwiseConv2dNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) { if (setOnLayerAlso) { - return getNetWithLayer(new DepthwiseConvolution2D.Builder() + return getNetWithLayer(DepthwiseConvolution2D.builder() .depthMultiplier(2) .kernelSize(3, 3) .stride(2, 2) @@ -626,7 +626,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { .helperAllowFallback(false) .build(), format, cm, null); } else { - return getNetWithLayer(new DepthwiseConvolution2D.Builder() + return getNetWithLayer(DepthwiseConvolution2D.builder() .depthMultiplier(2) .kernelSize(3, 3) .stride(2, 2) @@ -639,12 +639,12 @@ public class ConvDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getLrnLayer(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) { if (setOnLayerAlso) { - return getNetWithLayer(new LocalResponseNormalization.Builder() + return getNetWithLayer(LocalResponseNormalization.builder() .dataFormat(format) .helperAllowFallback(false) .build(), format, cm, null); } else { - return getNetWithLayer(new LocalResponseNormalization.Builder() + return getNetWithLayer(LocalResponseNormalization.builder() .helperAllowFallback(false) .build(), format, cm, null); } @@ -652,47 +652,47 @@ public class ConvDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getZeroPaddingNet(CNN2DFormat format, boolean setOnLayerAlso) { if (setOnLayerAlso) { - return getNetWithLayer(new ZeroPaddingLayer.Builder(2,2) + return getNetWithLayer(ZeroPaddingLayer.builder(2,2) .dataFormat(format).build(), format, ConvolutionMode.Same, null); } else { - return getNetWithLayer(new ZeroPaddingLayer.Builder(2,2).build(), + return getNetWithLayer(ZeroPaddingLayer.builder(2,2).build(), format, ConvolutionMode.Same, null); } } private MultiLayerNetwork getCropping2dNet(CNN2DFormat format, boolean setOnLayerAlso) { if (setOnLayerAlso) { - return getNetWithLayer(new Cropping2D.Builder(2,2) + return getNetWithLayer(Cropping2D.builder(2,2) .dataFormat(format).build(), format, ConvolutionMode.Same, null); } else { - return getNetWithLayer(new Cropping2D.Builder(2,2) + return getNetWithLayer(Cropping2D.builder(2,2) .build(), format, ConvolutionMode.Same, null); } } private MultiLayerNetwork getUpsamplingNet(CNN2DFormat format, boolean setOnLayerAlso) { if (setOnLayerAlso) { - return getNetWithLayer(new Upsampling2D.Builder(2) + return getNetWithLayer(Upsampling2D.builder(2) .dataFormat(format).build(), format, ConvolutionMode.Same, null); } else { - return getNetWithLayer(new Upsampling2D.Builder(2) + return getNetWithLayer(Upsampling2D.builder(2) .build(), format, ConvolutionMode.Same, null); } } private MultiLayerNetwork getDeconv2DNet2dNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) { if (setOnLayerAlso) { - return getNetWithLayer(new Deconvolution2D.Builder().nOut(2) + return getNetWithLayer(Deconvolution2D.builder().nOut(2) .activation(Activation.TANH) .kernelSize(2,2) - .dataFormat(format) + .convFormat(format) .stride(2,2) .build(), format, cm, null); } else { - return getNetWithLayer(new Deconvolution2D.Builder().nOut(2) + return getNetWithLayer(Deconvolution2D.builder().nOut(2) .activation(Activation.TANH) .kernelSize(2,2) - .dataFormat(format) + .convFormat(format) .stride(2,2) .build(), format, cm, null); } @@ -700,13 +700,13 @@ public class ConvDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getBatchNormNet(boolean logStdev, CNN2DFormat format, boolean setOnLayerAlso) { if (setOnLayerAlso) { - return getNetWithLayer(new BatchNormalization.Builder() + return getNetWithLayer(BatchNormalization.builder() .useLogStd(logStdev) .dataFormat(format) .helperAllowFallback(false) .nOut(3).build(), format, ConvolutionMode.Same, null); } else { - return getNetWithLayer(new BatchNormalization.Builder() + return getNetWithLayer(BatchNormalization.builder() .useLogStd(logStdev) .helperAllowFallback(false) .nOut(3).build(), format, ConvolutionMode.Same, null); @@ -715,33 +715,33 @@ public class ConvDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getSpaceToDepthNet(CNN2DFormat format, boolean setOnLayerAlso) { if (setOnLayerAlso) { - return getNetWithLayer(new SpaceToDepthLayer.Builder() - .blocks(2) + return getNetWithLayer(SpaceToDepthLayer.builder() + .blockSize(2) .dataFormat(format) .build(), format, ConvolutionMode.Same, null); } else { - return getNetWithLayer(new SpaceToDepthLayer.Builder() - .blocks(2) + return getNetWithLayer(SpaceToDepthLayer.builder() + .blockSize(2) .build(), format, ConvolutionMode.Same, null); } } private MultiLayerNetwork getSpaceToBatchNet(CNN2DFormat format, boolean setOnLayerAlso) { if (setOnLayerAlso) { - return getNetWithLayer(new SpaceToBatchLayer.Builder() - .blocks(2, 2) + return getNetWithLayer(SpaceToBatchLayer.builder() + .blockSize(2, 2) .dataFormat(format) .build(), format, ConvolutionMode.Same, InputType.convolutional(16, 16, 3, format)); } else { - return getNetWithLayer(new SpaceToBatchLayer.Builder() - .blocks(2, 2) + return getNetWithLayer(SpaceToBatchLayer.builder() + .blockSize(2, 2) .build(), format, ConvolutionMode.Same, InputType.convolutional(16, 16, 3, format)); } } private MultiLayerNetwork getLocallyConnectedNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) { if (setOnLayerAlso) { - return getNetWithLayer(new LocallyConnected2D.Builder() + return getNetWithLayer(LocallyConnected2D.builder() .kernelSize(3, 3) .stride(2, 2) .activation(Activation.TANH) @@ -749,7 +749,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { .nOut(3) .build(), format, cm, null); } else { - return getNetWithLayer(new LocallyConnected2D.Builder() + return getNetWithLayer(LocallyConnected2D.builder() .kernelSize(3, 3) .stride(2, 2) .activation(Activation.TANH) @@ -764,7 +764,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { .seed(12345) .convolutionMode(cm) .list() - .layer(new ConvolutionLayer.Builder() + .layer(ConvolutionLayer.builder() .kernelSize(3, 3) .stride(2, 2) .activation(Activation.TANH) @@ -772,7 +772,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { .helperAllowFallback(false) .build()) .layer(layer) - .layer(new OutputLayer.Builder().nOut(10) + .layer(OutputLayer.builder().nOut(10) .activation(Activation.SOFTMAX).build()) .inputType(inputType != null ? inputType : InputType.convolutional(12, 12, 3, format)); @@ -789,11 +789,11 @@ public class ConvDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getGlobalPoolingNet(CNN2DFormat format, PoolingType pt, boolean setOnLayerAlso) { if (setOnLayerAlso) { - return getNetWithLayer(new GlobalPoolingLayer.Builder(pt) + return getNetWithLayer(GlobalPoolingLayer.builder(pt) .poolingDimensions(format == CNN2DFormat.NCHW ? new int[]{2,3} : new int[]{1,2}) .build(), format, ConvolutionMode.Same, null); } else { - return getNetWithLayer(new GlobalPoolingLayer.Builder(pt) + return getNetWithLayer(GlobalPoolingLayer.builder(pt) .build(), format, ConvolutionMode.Same, null); } } @@ -803,19 +803,19 @@ public class ConvDataFormatTests extends BaseDL4JTest { .seed(12345) .convolutionMode(cm) .list() - .layer(new ConvolutionLayer.Builder() + .layer(ConvolutionLayer.builder() .kernelSize(3, 3) .stride(2, 2) .activation(Activation.TANH) - .dataFormat(format) + .convFormat(format) .nOut(3) .helperAllowFallback(false) .build()); if(setOnLayerAlso){ - builder.layer(new CnnLossLayer.Builder() - .format(format).activation(Activation.SOFTMAX).build()); + builder.layer(CnnLossLayer.builder() + .dataFormat(format).activation(Activation.SOFTMAX).build()); } else { - builder.layer(new CnnLossLayer.Builder() + builder.layer(CnnLossLayer.builder() .activation(Activation.SOFTMAX).build()); } @@ -988,19 +988,19 @@ public class ConvDataFormatTests extends BaseDL4JTest { switch (i){ case 0: - b.layer(new ConvolutionLayer.Builder().kernelSize(2,2).nIn(3).nOut(3).dataFormat(df).build()); + b.layer(ConvolutionLayer.builder().kernelSize(2,2).nIn(3).nOut(3).convFormat(df).build()); b.inputType(InputType.convolutional(12,12,3,df)); break; case 1: - b.layer(new DepthwiseConvolution2D.Builder().kernelSize(2,2).nIn(3).nOut(3).dataFormat(df).build()); + b.layer(DepthwiseConvolution2D.builder().kernelSize(2,2).nIn(3).nOut(3).dataFormat(df).build()); b.inputType(InputType.convolutional(12,12,3,df)); break; case 2: - b.layer(new Deconvolution2D.Builder().dataFormat(df).kernelSize(2,2).nIn(3).nOut(3).build()); + b.layer(Deconvolution2D.builder().convFormat(df).kernelSize(2,2).nIn(3).nOut(3).build()); b.inputType(InputType.convolutional(12,12,3,df)); break; case 3: - b.layer(new SeparableConvolution2D.Builder().dataFormat(df).kernelSize(2,2).nIn(3).nOut(3).build()); + b.layer(SeparableConvolution2D.builder().dataFormat(df).kernelSize(2,2).nIn(3).nOut(3).build()); b.inputType(InputType.convolutional(12,12,3,df)); break; } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Convolution3DTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Convolution3DTest.java index c8137f4a6..30aafe526 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Convolution3DTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Convolution3DTest.java @@ -86,7 +86,7 @@ public class Convolution3DTest extends BaseDL4JTest { private Layer getConvolution3DLayer(ConvolutionMode mode) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) - .layer(new Convolution3D.Builder().kernelSize(kernelSize).nIn(nChannelsIn).nOut(nChannelsOut) + .layer(Convolution3D.builder().kernelSize(kernelSize).nIn(nChannelsIn).nOut(nChannelsOut) .dataFormat(Convolution3D.DataFormat.NCDHW).convolutionMode(mode).hasBias(false) .build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java index 1af476e5e..f608d9341 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java @@ -27,6 +27,7 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; +import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -93,16 +94,16 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .l1(1e-1).l2(2e-4).dropOut(0.5).miniBatch(true) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(0, new ConvolutionLayer.Builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) + .layer(0, ConvolutionLayer.builder(5, 5).nOut(5).dropOut(0.5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) .build()) - .layer(2, new ConvolutionLayer.Builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER) + .layer(2, ConvolutionLayer.builder(3, 3).nOut(10).dropOut(0.5).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) + .layer(3, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) .build()) - .layer(4, new DenseLayer.Builder().nOut(100).activation(Activation.RELU).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(4, DenseLayer.builder().nOut(100).activation(Activation.RELU).build()) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) @@ -178,16 +179,16 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(3) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(0, new ConvolutionLayer.Builder( + .layer(0, ConvolutionLayer.builder( new int[] {5, 5}).nOut(6).build()) - .layer(1, new SubsamplingLayer.Builder( + .layer(1, SubsamplingLayer.builder( new int[] {2, 2}).build()) - .layer(2, new LocalResponseNormalization.Builder().build()) - .layer(3, new ConvolutionLayer.Builder( + .layer(2, LocalResponseNormalization.builder().build()) + .layer(3, ConvolutionLayer.builder( new int[] {5, 5}).nOut(6).build()) - .layer(4, new SubsamplingLayer.Builder( + .layer(4, SubsamplingLayer.builder( new int[] {2, 2}).build()) - .layer(5, new OutputLayer.Builder( + .layer(5, OutputLayer.builder( LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(2) .activation(Activation.SOFTMAX).build()); return builder; @@ -198,15 +199,15 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(3) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(0, new ConvolutionLayer.Builder( + .layer(0, ConvolutionLayer.builder( new int[] {5, 5}).nOut(6).build()) - .layer(1, new SubsamplingLayer.Builder( + .layer(1, SubsamplingLayer.builder( new int[] {2, 2}).build()) - .layer(2, new ConvolutionLayer.Builder( + .layer(2, ConvolutionLayer.builder( new int[] {5, 5}).nOut(6).build()) - .layer(3, new SubsamplingLayer.Builder( + .layer(3, SubsamplingLayer.builder( new int[] {2, 2}).build()) - .layer(4, new OutputLayer.Builder( + .layer(4, OutputLayer.builder( LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).activation(Activation.SOFTMAX) .nOut(2).build()); return builder; @@ -218,17 +219,17 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(3) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(0, new ConvolutionLayer.Builder( + .layer(0, ConvolutionLayer.builder( new int[] {5, 5}).nIn(1).nOut(20).build()) - .layer(1, new SubsamplingLayer.Builder( + .layer(1, SubsamplingLayer.builder( new int[] {2, 2}, new int[] {2, 2}).build()) - .layer(2, new ConvolutionLayer.Builder( + .layer(2, ConvolutionLayer.builder( new int[] {5, 5}).nIn(20).nOut(50).build()) - .layer(3, new SubsamplingLayer.Builder( + .layer(3, SubsamplingLayer.builder( new int[] {2, 2}, new int[] {2, 2}).build()) - .layer(4, new DenseLayer.Builder().nOut(500) + .layer(4, DenseLayer.builder().nOut(500) .build()) - .layer(5, new OutputLayer.Builder( + .layer(5, OutputLayer.builder( LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX).nOut(10) .build()); @@ -239,15 +240,15 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration builder = NeuralNetConfiguration.builder().seed(3) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( + .layer(0, org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder( new int[] {5, 5}).nIn(1).nOut(6).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.SubsamplingLayer.builder( new int[] {5, 5}, new int[] {2, 2}).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( + .layer(2, org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder( new int[] {5, 5}).nIn(1).nOut(6).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.SubsamplingLayer.Builder( + .layer(3, org.deeplearning4j.nn.conf.layers.SubsamplingLayer.builder( new int[] {5, 5}, new int[] {2, 2}).build()) - .layer(4, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(4, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(150) .nOut(10).build()) .build(); @@ -261,11 +262,11 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(new int[] {10, 10}, + .layer(0, org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder(new int[] {10, 10}, new int[] {2, 2}).nIn(nChannels).nOut(6).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) .build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(2, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) ; @@ -283,11 +284,11 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(new int[] {10, 10}, + .layer(0, org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder(new int[] {10, 10}, new int[] {2, 2}).nIn(nChannels).nOut(6).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) .build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(2, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nIn(5 * 5 * 6) //216 .nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) @@ -303,10 +304,10 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() //out = stride * (in-1) + filter - 2*pad -> 2 * (28-1) + 2 - 0 = 56 -> 56x56x3 - .layer(0, new Deconvolution2D.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) + .layer(0, Deconvolution2D.builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(56-2+2*1)/2+1 = 29 -> 29x29x3 - .layer(1, new SubsamplingLayer.Builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) - .layer(2, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) + .layer(1, SubsamplingLayer.builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) + .layer(2, OutputLayer.builder().nOut(3).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)); NeuralNetConfiguration conf = builder.build(); @@ -325,9 +326,9 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { public void testSubSamplingWithPadding() { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() - .layer(0, new ConvolutionLayer.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 - .layer(1, new SubsamplingLayer.Builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) //(14-2+2)/2+1 = 8 -> 8x8x3 - .layer(2, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) + .layer(0, ConvolutionLayer.builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 + .layer(1, SubsamplingLayer.builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) //(14-2+2)/2+1 = 8 -> 8x8x3 + .layer(2, OutputLayer.builder().nOut(3).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)); NeuralNetConfiguration conf = builder.build(); @@ -346,9 +347,9 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { public void testUpsampling() { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() - .layer(new ConvolutionLayer.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 - .layer(new Upsampling2D.Builder().size(3).build()) // 14 * 3 = 42! - .layer(new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) + .layer(ConvolutionLayer.builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 + .layer(Upsampling2D.builder().size(3).build()) // 14 * 3 = 42! + .layer(OutputLayer.builder().nOut(3).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)); NeuralNetConfiguration conf = builder.build(); @@ -369,9 +370,9 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { int[] blocks = new int[] {2, 2}; NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() - .layer(new ConvolutionLayer.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 - .layer(new SpaceToBatchLayer.Builder(blocks).build()) // Divide space dimensions by blocks, i.e. 14/2 = 7 - .layer(new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) + .layer(ConvolutionLayer.builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 + .layer(SpaceToBatchLayer.builder(blocks).build()) // Divide space dimensions by blocks, i.e. 14/2 = 7 + .layer(OutputLayer.builder().nOut(3).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)); NeuralNetConfiguration conf = builder.build(); @@ -389,12 +390,12 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { int blocks = 2; - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder() //(28-2+0)/2+1 = 14 -> 14x14x3 out - .layer(new ConvolutionLayer.Builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) + .layer(ConvolutionLayer.builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) // Divide space dimensions by blocks, i.e. 14/2 = 7 -> 7x7x12 out (3x2x2 depth) - .layer(new SpaceToDepthLayer.Builder(blocks, SpaceToDepthLayer.DataFormat.NCHW).build()) - .layer(new OutputLayer.Builder().nIn(3 * 2 * 2).nOut(3).activation(Activation.SOFTMAX).build()) // nIn of the next layer gets multiplied by 2*2. + .layer(SpaceToDepthLayer.builder().blockSize(blocks).dataFormat(CNN2DFormat.NCHW).build()) + .layer(OutputLayer.builder().nIn(3 * 2 * 2).nOut(3).activation(Activation.SOFTMAX).build()) // nIn of the next layer gets multiplied by 2*2. .inputType(InputType.convolutional(28, 28, 1)); NeuralNetConfiguration conf = builder.build(); @@ -418,15 +419,15 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .weightInit(WeightInit.XAVIER).list() - .layer(0, new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(1).nOut(6) + .layer(0, ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(1).nOut(6) .activation(Activation.IDENTITY).build()) - .layer(1, new BatchNormalization.Builder().build()) - .layer(2, new ActivationLayer.Builder().activation(Activation.RELU).build()) - .layer(3, new DenseLayer.Builder().nIn(28 * 28 * 6).nOut(10).activation(Activation.IDENTITY) + .layer(1,BatchNormalization.builder().build()) + .layer(2, ActivationLayer.builder().activation(Activation.RELU).build()) + .layer(3, DenseLayer.builder().nIn(28 * 28 * 6).nOut(10).activation(Activation.IDENTITY) .build()) - .layer(4, new BatchNormalization.Builder().nOut(10).build()) - .layer(5, new ActivationLayer.Builder().activation(Activation.RELU).build()) - .layer(6, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(4,BatchNormalization.builder().nOut(10).build()) + .layer(5, ActivationLayer.builder().activation(Activation.RELU).build()) + .layer(6, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(10).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); @@ -448,12 +449,12 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { public void testSeparableConv2D() { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() - .layer( new SeparableConvolution2D.Builder(2, 2) + .layer( SeparableConvolution2D.builder(2, 2) .depthMultiplier(2) .padding(0, 0) .stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 - .layer( new SubsamplingLayer.Builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) //(14-2+2)/2+1 = 8 -> 8x8x3 - .layer(2, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) + .layer( SubsamplingLayer.builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) //(14-2+2)/2+1 = 8 -> 8x8x3 + .layer(2, OutputLayer.builder().nOut(3).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)); NeuralNetConfiguration conf = builder.build(); @@ -473,12 +474,12 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() //out = stride * (in-1) + filter - 2*pad -> 2 * (28-1) + 2 - 0 = 56 -> 56x56x3 - .layer( new Deconvolution2D.Builder(2, 2) + .layer( Deconvolution2D.builder(2, 2) .padding(0, 0) .stride(2, 2).nIn(1).nOut(3).build()) //(56-2+2*1)/2+1 = 29 -> 29x29x3 - .layer( new SubsamplingLayer.Builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) - .layer(2, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) + .layer( SubsamplingLayer.builder().kernelSize(2, 2).padding(1, 1).stride(2, 2).build()) + .layer(2, OutputLayer.builder().nOut(3).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)); NeuralNetConfiguration conf = builder.build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java index 4b5458b15..87814e038 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.layers.convolution; +import static org.junit.jupiter.api.Assertions.*; + +import java.util.List; import lombok.val; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; @@ -32,9 +35,9 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; -import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; @@ -55,778 +58,987 @@ import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.*; - /** * @author Adam Gibson */ public class ConvolutionLayerTest extends BaseDL4JTest { - @Override - public DataType getDataType(){ - return DataType.FLOAT; + private static final int kH = 2; + private static final int kW = 2; + private static final int[] strides = {1, 1}; + private static final int[] pad = {0, 0}; + private static final int miniBatch = 2; + private static final int inDepth = 2; + private static final int height = 3; + private static final int width = 3; + private static final int outW = 2; + private static final int outH = 2; + + ////////////////////////////////////////////////////////////////////////////////// + + private static Layer getCNNConfig( + int nIn, int nOut, int[] kernelSize, int[] stride, int[] padding) { + + ConvolutionLayer layer = + ConvolutionLayer.builder(kernelSize, stride, padding) + .nIn(nIn) + .nOut(nOut) + .activation(Activation.SIGMOID) + .build(); + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(layer).build(); + + val numParams = conf.getFlattenedLayerConfigurations().get(0).initializer().numParams(conf); + INDArray params = Nd4j.create(1, numParams); + return conf.getFlattenedLayerConfigurations() + .get(0) + .instantiate(conf, null, 0, params, true, params.dataType()); + } + + private static INDArray getInput() { + + /* + ----- Input images ----- + example 0: + channels 0 channels 1 + [ 0 1 2 [ 9 10 11 + 3 4 5 12 13 14 + 6 7 8] 15 16 17] + example 1: + [18 19 20 [27 28 29 + 21 22 23 30 31 32 + 24 25 26] 33 34 35] + */ + + INDArray input = Nd4j.create(new int[] {miniBatch, inDepth, height, width}, 'c'); + input.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}})); + input.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}})); + input.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}})); + input.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{27, 28, 29}, {30, 31, 32}, {33, 34, 35}})); + + return input; + } + + private static MultiLayerNetwork getCNNMLNConfig(boolean backprop, boolean pretrain) { + int outputNum = 10; + int seed = 123; + + NeuralNetConfiguration.NeuralNetConfigurationBuilder conf = + NeuralNetConfiguration.builder() + .seed(seed) + .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) + .list() + .layer(0, ConvolutionLayer.builder(new int[] {10, 10}).nOut(6).build()) + .layer( + 1, + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) + .stride(1, 1) + .build()) + .layer( + 2, + OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .nOut(outputNum) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .build()) + .inputType(InputType.convolutionalFlat(28, 28, 1)); + + MultiLayerNetwork model = new MultiLayerNetwork(conf.build()); + model.init(); + + return model; + } + + @Override + public DataType getDataType() { + return DataType.FLOAT; + } + + @Test + public void testTwdFirstLayer() throws Exception { + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .l2(2e-4) + .updater(new Nesterovs(0.9)) + .dropOut(0.5) + .list() + .layer( + 0, + ConvolutionLayer.builder(8, 8) // 16 filters kernel size 8 stride 4 + .stride(4, 4) + .nOut(16) + .dropOut(0.5) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + ConvolutionLayer.builder(4, 4) // 32 filters kernel size 4 stride 2 + .stride(2, 2) + .nOut(32) + .dropOut(0.5) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 2, + DenseLayer.builder() // fully connected with 256 rectified units + .nOut(256) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .dropOut(0.5) + .build()) + .layer( + 3, + OutputLayer.builder(LossFunctions.LossFunction.SQUARED_LOSS) // output layer + .nOut(10) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .build()) + .inputType(InputType.convolutionalFlat(28, 28, 1)); + + DataSetIterator iter = new MnistDataSetIterator(10, 10); + NeuralNetConfiguration conf = builder.build(); + MultiLayerNetwork network = new MultiLayerNetwork(conf); + network.init(); + DataSet ds = iter.next(); + for (int i = 0; i < 5; i++) { + network.fit(ds); } + } - @Test - public void testTwdFirstLayer() throws Exception { - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(123) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4) - .updater(new Nesterovs(0.9)).dropOut(0.5) - .list().layer(0, - new ConvolutionLayer.Builder(8, 8) //16 filters kernel size 8 stride 4 - .stride(4, 4).nOut(16).dropOut(0.5) - .activation(Activation.RELU).weightInit( - WeightInit.XAVIER) - .build()) - .layer(1, new ConvolutionLayer.Builder(4, 4) //32 filters kernel size 4 stride 2 - .stride(2, 2).nOut(32).dropOut(0.5).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(2, new DenseLayer.Builder() //fully connected with 256 rectified units - .nOut(256).activation(Activation.RELU).weightInit(WeightInit.XAVIER) - .dropOut(0.5).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer - .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) - .inputType(InputType.convolutionalFlat(28, 28, 1)); + @Test + public void testCNNSubComboWithMixedHW() { + int imageHeight = 20; + int imageWidth = 23; + int nChannels = 1; + int classes = 2; + int numSamples = 200; - DataSetIterator iter = new MnistDataSetIterator(10, 10); - NeuralNetConfiguration conf = builder.build(); - MultiLayerNetwork network = new MultiLayerNetwork(conf); - network.init(); - DataSet ds = iter.next(); - for( int i=0; i<5; i++ ) { - network.fit(ds); - } - } + int kernelHeight = 3; + int kernelWidth = 3; - @Test - public void testCNNSubComboWithMixedHW() { - int imageHeight = 20; - int imageWidth = 23; - int nChannels = 1; - int classes = 2; - int numSamples = 200; + DataSet trainInput; + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .list() + .layer( + 0, + ConvolutionLayer.builder(kernelHeight, kernelWidth) + .stride(1, 1) + .nOut(2) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()) + .kernelSize(imageHeight - kernelHeight, 1) + .stride(1, 1) + .build()) + .layer( + 2, + OutputLayer.builder() + .nOut(classes) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .build()) + .inputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); - int kernelHeight = 3; - int kernelWidth = 3; + NeuralNetConfiguration conf = builder.build(); + MultiLayerNetwork model = new MultiLayerNetwork(conf); + model.init(); - DataSet trainInput; - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = - NeuralNetConfiguration.builder() - .seed(123) - .list() - .layer(0, new ConvolutionLayer.Builder(kernelHeight, kernelWidth).stride(1, 1) - .nOut(2).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX) - .kernelSize(imageHeight - kernelHeight, 1).stride(1, 1).build()) - .layer(2, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) - .inputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); + INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); + INDArray emptyLables = Nd4j.zeros(numSamples, classes); - NeuralNetConfiguration conf = builder.build(); - MultiLayerNetwork model = new MultiLayerNetwork(conf); - model.init(); + trainInput = new DataSet(emptyFeatures, emptyLables); + model.fit(trainInput); + } - INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); - INDArray emptyLables = Nd4j.zeros(numSamples, classes); + ////////////////////////////////////////////////////////////////////////////////// - trainInput = new DataSet(emptyFeatures, emptyLables); - model.fit(trainInput); - } + @Test + public void testCausal1d() { + Nd4j.getEnvironment().setVerbose(true); + Nd4j.getEnvironment().setDebug(true); + // See: Fixes: https://github.com/eclipse/deeplearning4j/issues/9060 + double learningRate = 1e-3; + long seed = 123; + long timeSteps = 72; + long vectorLength = 64; + long batchSize = 1; + INDArray arr = Nd4j.randn(batchSize, vectorLength, timeSteps); - @Test - public void testCausal1d() { - Nd4j.getEnvironment().setVerbose(true); - Nd4j.getEnvironment().setDebug(true); - //See: Fixes: https://github.com/eclipse/deeplearning4j/issues/9060 - double learningRate = 1e-3; - long seed = 123; - long timeSteps = 72; - long vectorLength = 64; - long batchSize = 1; - INDArray arr = Nd4j.randn(batchSize,vectorLength,timeSteps); + NeuralNetConfiguration build = + NeuralNetConfiguration.builder() + .seed(seed) + .activation(Activation.RELU) + .weightInit(WeightInit.NORMAL) // better init + .updater(new Adam(learningRate)) + .list() + // block 1 + .layer( + Convolution1D.builder() + .kernelSize(2) + .rnnDataFormat(RNNFormat.NCW) + .stride(1) + .nOut(14) + .convolutionMode(ConvolutionMode.Causal) + .dilation(4) + .build()) + .layer( + RnnLossLayer.builder() + .dataFormat(RNNFormat.NCW) + .activation(new ActivationSoftmax()) + .lossFunction(new LossMCXENT()) + .build()) + .inputType(InputType.recurrent(vectorLength, timeSteps, RNNFormat.NCW)) + .build(); - NeuralNetConfiguration build = NeuralNetConfiguration.builder().seed(seed) - .activation(Activation.RELU) - .weightInit(WeightInit.NORMAL) // better init - .updater(new Adam(learningRate)) - .list() - // block 1 - .layer(new Convolution1D.Builder() - .kernelSize(2) - .rnnDataFormat(RNNFormat.NCW) - .stride(1) - .nOut(14) - .convolutionMode(ConvolutionMode.Causal) - .dilation(4) - .build()) - .layer(new RnnLossLayer.Builder().dataFormat(RNNFormat.NCW) - .activation(new ActivationSoftmax()) - .lossFunction(new LossMCXENT()).build()) - .inputType(InputType.recurrent(vectorLength,timeSteps,RNNFormat.NCW)) - .build(); + MultiLayerNetwork network = new MultiLayerNetwork(build); + network.init(); + INDArray output = network.output(arr); + assertArrayEquals(new long[] {1, 14, 72}, output.shape()); + System.out.println(output); + } - MultiLayerNetwork network = new MultiLayerNetwork(build); - network.init(); - INDArray output = network.output(arr); - assertArrayEquals(new long[]{1,14,72},output.shape()); - System.out.println(output); - } + @Test + public void testCNNTooLargeKernel() { + assertThrows( + DL4JException.class, + () -> { + int imageHeight = 20; - @Test - public void testCNNTooLargeKernel() { - assertThrows(DL4JException.class, () -> { - int imageHeight = 20; + int imageWidth = 23; + int nChannels = 1; + int classes = 2; + int numSamples = 200; - int imageWidth = 23; - int nChannels = 1; - int classes = 2; - int numSamples = 200; + int kernelHeight = imageHeight; + int kernelWidth = imageWidth + 1; - int kernelHeight = imageHeight; - int kernelWidth = imageWidth + 1; + DataSet trainInput; + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .list() + .layer( + 0, + ConvolutionLayer.builder( + kernelHeight, + kernelWidth) // (img-kernel+2*padding)/stride + 1: must be >= 1. + // Therefore: with p=0, kernel <= img size + .stride(1, 1) + .nOut(2) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + OutputLayer.builder() + .nOut(classes) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .build()) + .inputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); - DataSet trainInput; - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = - NeuralNetConfiguration.builder() - .seed(123) - .list() - .layer(0, new ConvolutionLayer.Builder(kernelHeight, kernelWidth) //(img-kernel+2*padding)/stride + 1: must be >= 1. Therefore: with p=0, kernel <= img size - .stride(1, 1).nOut(2).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) - .inputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); + NeuralNetConfiguration conf = builder.build(); + MultiLayerNetwork model = new MultiLayerNetwork(conf); + model.init(); - NeuralNetConfiguration conf = builder.build(); - MultiLayerNetwork model = new MultiLayerNetwork(conf); - model.init(); + INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); + INDArray emptyLables = Nd4j.zeros(numSamples, classes); - INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); - INDArray emptyLables = Nd4j.zeros(numSamples, classes); - - trainInput = new DataSet(emptyFeatures, emptyLables); - model.fit(trainInput); + trainInput = new DataSet(emptyFeatures, emptyLables); + model.fit(trainInput); }); - } + } - @Test - public void testCNNZeroStride() { - assertThrows(Exception.class, () -> { - int imageHeight = 20; - int imageWidth = 23; - int nChannels = 1; - int classes = 2; - int numSamples = 200; + @Test + public void testCNNZeroStride() { + assertThrows( + Exception.class, + () -> { + int imageHeight = 20; + int imageWidth = 23; + int nChannels = 1; + int classes = 2; + int numSamples = 200; - int kernelHeight = imageHeight; - int kernelWidth = imageWidth; + int kernelHeight = imageHeight; + int kernelWidth = imageWidth; - DataSet trainInput; - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = - NeuralNetConfiguration.builder() - .seed(123) - .list() - .layer(0, new ConvolutionLayer.Builder(kernelHeight, kernelWidth).stride(1, 0) - .nOut(2).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) + DataSet trainInput; + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .list() + .layer( + 0, + ConvolutionLayer.builder(kernelHeight, kernelWidth) + .stride(1, 0) + .nOut(2) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + OutputLayer.builder() + .nOut(classes) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .build()) + .inputType(InputType.convolutional(imageHeight, imageWidth, nChannels)); - .inputType(InputType.convolutional(imageHeight, imageWidth, nChannels)); + NeuralNetConfiguration conf = builder.build(); + MultiLayerNetwork model = new MultiLayerNetwork(conf); + model.init(); - NeuralNetConfiguration conf = builder.build(); - MultiLayerNetwork model = new MultiLayerNetwork(conf); - model.init(); + INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); + INDArray emptyLables = Nd4j.zeros(numSamples, classes); - INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); - INDArray emptyLables = Nd4j.zeros(numSamples, classes); - - trainInput = new DataSet(emptyFeatures, emptyLables); - model.fit(trainInput); + trainInput = new DataSet(emptyFeatures, emptyLables); + model.fit(trainInput); }); + } + + @Test + public void testCNNBiasInit() { + ConvolutionLayer cnn = ConvolutionLayer.builder().nIn(1).nOut(3).biasInit(1).build(); + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(cnn).build(); + + val numParams = conf.getFlattenedLayerConfigurations().get(0).initializer().numParams(conf); + INDArray params = Nd4j.create(1, numParams); + Layer layer = + conf.getFlattenedLayerConfigurations() + .get(0) + .instantiate(conf, null, 0, params, true, params.dataType()); + + assertEquals(1, layer.getParam("b").size(0)); + } + + @Test + public void testCNNInputSetupMNIST() throws Exception { + INDArray input = getMnistData(); + Layer layer = getMNISTConfig(); + layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); + + assertEquals(input, layer.input()); + assertArrayEquals(input.shape(), layer.input().shape()); + } + + @Test + public void testFeatureMapShapeMNIST() throws Exception { + int inputWidth = 28; + int[] stride = new int[] {1, 1}; + int[] padding = new int[] {0, 0}; + int[] kernelSize = new int[] {9, 9}; + int nChannelsIn = 1; + int depth = 20; + int featureMapWidth = (inputWidth + padding[1] * 2 - kernelSize[1]) / stride[1] + 1; + + INDArray input = getMnistData(); + + Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); + INDArray convActivations = layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); + + assertEquals(featureMapWidth, convActivations.size(2)); + assertEquals(depth, convActivations.size(1)); + } + + @Test + public void testActivateResultsContained() { + Layer layer = getContainedConfig(); + INDArray input = getContainedData(); + INDArray expectedOutput = + Nd4j.create( + new float[] { + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.99966465f + }, + new int[] {1, 2, 4, 4}); + + INDArray convActivations = layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); + + assertArrayEquals(expectedOutput.shape(), convActivations.shape()); + assertEquals(expectedOutput, convActivations); + } + + public Layer getMNISTConfig() { + int[] kernelSize = new int[] {9, 9}; + int[] stride = new int[] {1, 1}; + int[] padding = new int[] {1, 1}; + int nChannelsIn = 1; + int depth = 20; + + return getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); + } + + public INDArray getMnistData() throws Exception { + int inputWidth = 28; + int inputHeight = 28; + int nChannelsIn = 1; + int nExamples = 5; + + DataSetIterator data = new MnistDataSetIterator(nExamples, nExamples); + DataSet mnist = data.next(); + nExamples = mnist.numExamples(); + return mnist.getFeatures().reshape(nExamples, nChannelsIn, inputHeight, inputWidth); + } + + public Layer getContainedConfig() { + int[] kernelSize = new int[] {2, 2}; + int[] stride = new int[] {2, 2}; + int[] padding = new int[] {0, 0}; + int nChannelsIn = 1; + int depth = 2; + + INDArray W = + Nd4j.create(new double[] {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}, new int[] {2, 1, 2, 2}); + INDArray b = Nd4j.create(new double[] {1, 1}); + Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); + layer.setParam("W", W); + layer.setParam("b", b); + + return layer; + } + + public INDArray getContainedData() { + INDArray ret = + Nd4j.create( + new float[] { + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, + 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, + 4, 4, 4, 4, 4, 4 + }, + new int[] {1, 1, 8, 8}); + return ret; + } + + public INDArray getContainedCol() { + return Nd4j.create( + new float[] { + 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, + 3, 3, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, + 4, 4, 4, 4 + }, + new int[] {1, 1, 2, 2, 4, 4}); + } + + @Test + public void testCNNMLNPretrain() throws Exception { + // Note CNN does not do pretrain + int numSamples = 10; + int batchSize = 10; + DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true); + + MultiLayerNetwork model = getCNNMLNConfig(false, true); + model.fit(mnistIter); + + mnistIter.reset(); + + MultiLayerNetwork model2 = getCNNMLNConfig(false, true); + model2.fit(mnistIter); + mnistIter.reset(); + + DataSet test = mnistIter.next(); + + Evaluation eval = new Evaluation(); + INDArray output = model.output(test.getFeatures()); + eval.eval(test.getLabels(), output); + double f1Score = eval.f1(); + + Evaluation eval2 = new Evaluation(); + INDArray output2 = model2.output(test.getFeatures()); + eval2.eval(test.getLabels(), output2); + double f1Score2 = eval2.f1(); + + assertEquals(f1Score, f1Score2, 1e-4); + } + + @Test + public void testCNNMLNBackprop() throws Exception { + int numSamples = 10; + int batchSize = 10; + DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true); + + MultiLayerNetwork model = getCNNMLNConfig(true, false); + model.fit(mnistIter); + + MultiLayerNetwork model2 = getCNNMLNConfig(true, false); + model2.fit(mnistIter); + + mnistIter.reset(); + DataSet test = mnistIter.next(); + + Evaluation eval = new Evaluation(); + INDArray output = model.output(test.getFeatures()); + eval.eval(test.getLabels(), output); + double f1Score = eval.f1(); + + Evaluation eval2 = new Evaluation(); + INDArray output2 = model2.output(test.getFeatures()); + eval2.eval(test.getLabels(), output2); + double f1Score2 = eval2.f1(); + + assertEquals(f1Score, f1Score2, 1e-4); + } + + @Test + public void testGetSetParams() { + + MultiLayerNetwork net = getCNNMLNConfig(true, false); + + INDArray paramsOrig = net.getModelParams().dup(); + net.setParams(paramsOrig); + + INDArray params2 = net.getModelParams(); + + assertEquals(paramsOrig, params2); + } + + @Test + public void testCnnIm2ColReshaping() { + // This test: a bit unusual in that it tests the *assumptions* of the CNN implementation rather + // than the implementation itself + // Specifically, it tests the row and column orders after reshaping on im2col is reshaped (both + // forward and backward pass) + INDArray input = getInput(); + + // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input + // [miniBatch,channels,kH,kW,outH,outW] + // given the current im2col implementation + // To get this: create an array of the order we want, permute it to the order required by im2col + // implementation, and then do im2col on that + // to get old order from required order: permute(2,3,4,5,1,2) + INDArray col = Nd4j.create(new int[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); + Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], false, col2); + + /* + Expected Output, im2col + - example 0 - + channels 0 channels 1 + h0,w0 h0,w1 h0,w0 h0,w1 + 0 1 1 2 9 10 10 11 + 3 4 4 5 12 13 13 14 + + h1,w0 h1,w1 h1,w0 h1,w1 + 3 4 4 5 12 13 13 14 + 6 7 7 8 15 16 16 17 + + - example 1 - + channels 0 channels 1 + h0,w0 h0,w1 h0,w0 h0,w1 + 18 19 19 20 27 28 28 29 + 21 22 22 23 30 31 31 32 + + h1,w0 h1,w1 h1,w0 h1,w1 + 21 22 22 23 30 31 31 32 + 24 25 25 26 33 34 34 35 + */ + + // Now, after reshaping im2col to 2d, we expect: + // Rows with order (wOut0,hOut0,mb0), (wOut1,hOut0,mb0), (wOut0,hOut1,mb0), (wOut1,hOut1,mb0), + // (wOut0,hOut0,mb1), ... + // Columns with order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), ... + + INDArray reshapedCol = + Shape.newShapeNoCopy(col, new int[] {miniBatch * outH * outW, inDepth * kH * kW}, false); + + INDArray exp2d = Nd4j.create(outW * outH * miniBatch, inDepth * kH * kW); + exp2d.putRow( + 0, + Nd4j.create( + new double[] { + 0, 1, 3, 4, 9, 10, 12, 13 + })); // wOut0,hOut0,mb0 -> both depths, in order (d0,kh0,kw0), (d0,kh0,kw1), + // (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), (d1,kh0,kw1), (d1,kh1,kw0), + // (d1,kh1,kw1) + exp2d.putRow(1, Nd4j.create(new double[] {1, 2, 4, 5, 10, 11, 13, 14})); // wOut1,hOut0,mb0 + exp2d.putRow(2, Nd4j.create(new double[] {3, 4, 6, 7, 12, 13, 15, 16})); // wOut0,hOut1,mb0 + exp2d.putRow(3, Nd4j.create(new double[] {4, 5, 7, 8, 13, 14, 16, 17})); // wOut1,hOut1,mb0 + exp2d.putRow(4, Nd4j.create(new double[] {18, 19, 21, 22, 27, 28, 30, 31})); // wOut0,hOut0,mb1 + exp2d.putRow(5, Nd4j.create(new double[] {19, 20, 22, 23, 28, 29, 31, 32})); // wOut1,hOut0,mb1 + exp2d.putRow(6, Nd4j.create(new double[] {21, 22, 24, 25, 30, 31, 33, 34})); // wOut0,hOut1,mb1 + exp2d.putRow(7, Nd4j.create(new double[] {22, 23, 25, 26, 31, 32, 34, 35})); // wOut1,hOut1,mb1 + + assertEquals(exp2d, reshapedCol); + + // Check the same thing for the backprop im2col (different order) + INDArray colBackprop = Nd4j.create(new int[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + INDArray colBackprop2 = colBackprop.permute(0, 3, 4, 5, 1, 2); + + Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], false, colBackprop2); + + INDArray reshapedColBackprop = + Shape.newShapeNoCopy( + colBackprop, new int[] {miniBatch * outH * outW, inDepth * kH * kW}, false); + + // Rows with order (mb0,h0,w0), (mb0,h0,w1), (mb0,h1,w0), (mb0,h1,w1), (mb1,h0,w0), (mb1,h0,w1), + // (mb1,h1,w0), (mb1,h1,w1) + // Columns with order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), ... + + INDArray exp2dv2 = Nd4j.create(outW * outH * miniBatch, inDepth * kH * kW); + exp2dv2.putRow( + 0, + Nd4j.create( + new double[] { + 0, 1, 3, 4, 9, 10, 12, 13 + })); // wOut0,hOut0,mb0 -> both depths, in order (d0,kh0,kw0), (d0,kh0,kw1), + // (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), (d1,kh0,kw1), (d1,kh1,kw0), + // (d1,kh1,kw1) + exp2dv2.putRow(1, Nd4j.create(new double[] {1, 2, 4, 5, 10, 11, 13, 14})); // wOut1,hOut0,mb0 + exp2dv2.putRow(2, Nd4j.create(new double[] {3, 4, 6, 7, 12, 13, 15, 16})); // wOut0,hOut1,mb0 + exp2dv2.putRow(3, Nd4j.create(new double[] {4, 5, 7, 8, 13, 14, 16, 17})); // wOut1,hOut1,mb0 + exp2dv2.putRow( + 4, Nd4j.create(new double[] {18, 19, 21, 22, 27, 28, 30, 31})); // wOut0,hOut0,mb1 + exp2dv2.putRow( + 5, Nd4j.create(new double[] {19, 20, 22, 23, 28, 29, 31, 32})); // wOut1,hOut0,mb1 + exp2dv2.putRow( + 6, Nd4j.create(new double[] {21, 22, 24, 25, 30, 31, 33, 34})); // wOut0,hOut1,mb1 + exp2dv2.putRow( + 7, Nd4j.create(new double[] {22, 23, 25, 26, 31, 32, 34, 35})); // wOut1,hOut1,mb1 + + assertEquals(exp2dv2, reshapedColBackprop); + } + + @Test + public void testDeltaReshaping() { + // As per above test: testing assumptions of cnn implementation... + + // Delta: initially shape [miniBatch,dOut,outH,outW] + // permute to [dOut,miniB,outH,outW] + // then reshape to [dOut,miniB*outH*outW] + // Expect columns of delta2d to be like: (mb0,h0,w0), (mb0,h0,w1), (mb1,h0,w2), (mb0,h1,w0), ... + // (mb1,...), ..., (mb2,...) + int miniBatch = 3; + int depth = 2; + int outW = 3; + int outH = 3; + + /* + ----- Input delta ----- + example 0: + channels 0 channels 1 + [ 0 1 2 [ 9 10 11 + 3 4 5 12 13 14 + 6 7 8] 15 16 17] + example 1: + [18 19 20 [27 28 29 + 21 22 23 30 31 32 + 24 25 26] 33 34 35] + example 2: + [36 37 38 [45 46 47 + 39 40 41 48 49 50 + 42 43 44] 51 52 53] + */ + + INDArray deltaOrig = Nd4j.create(new int[] {miniBatch, depth, outH, outW}, 'c'); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}})); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}})); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}})); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{27, 28, 29}, {30, 31, 32}, {33, 34, 35}})); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(2), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}})); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(2), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}})); + + INDArray deltaPermute = deltaOrig.permute(1, 0, 2, 3).dup('c'); + INDArray delta2d = + Shape.newShapeNoCopy(deltaPermute, new int[] {depth, miniBatch * outW * outH}, false); + + INDArray exp = + Nd4j.create( + new double[][] { + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 18, 19, 20, 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, + 40, 41, 42, 43, 44 + }, // depth0 + { + 9, 10, 11, 12, 13, 14, 15, 16, 17, 27, 28, 29, 30, 31, 32, 33, 34, 35, 45, 46, + 47, 48, 49, 50, 51, 52, 53 + } // depth1 + }) + .castTo(delta2d.dataType()); + + assertEquals(exp, delta2d); + } + + ////////////////////////////////////////////////////////////////////////////////// + + @Test + public void testWeightReshaping() { + // Test assumptions of weight reshaping + // Weights: originally c order, shape [outDepth, inDepth, kH, kw] + // permute (3,2,1,0) + + int depthOut = 2; + int depthIn = 3; + int kH = 2; + int kW = 2; + + /* + ----- Weights ----- + - dOut 0 - + dIn 0 dIn 1 dIn 2 + [ 0 1 [ 4 5 [ 8 9 + 2 3] 6 7] 10 11] + - dOut 1 - + [12 13 [16 17 [20 21 + 14 15] 18 19] 22 23] + */ + + INDArray weightOrig = Nd4j.create(new int[] {depthOut, depthIn, kH, kW}, 'c'); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{0, 1}, {2, 3}})); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{4, 5}, {6, 7}})); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(2), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{8, 9}, {10, 11}})); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{12, 13}, {14, 15}})); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{16, 17}, {18, 19}})); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(2), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{20, 21}, {22, 23}})); + + INDArray weightPermute = weightOrig.permute(3, 2, 1, 0); + INDArray w2d = + Shape.newShapeNoCopy(weightPermute, new int[] {depthIn * kH * kW, depthOut}, true); + + assertNotNull(w2d); + + // Expected order of weight rows, after reshaping: (kw0,kh0,din0), (kw1,kh0,din0), + // (kw0,kh1,din0), (kw1,kh1,din0), (kw0,kh0,din1), ... + INDArray wExp = + Nd4j.create( + new double[][] { + {0, 12}, {1, 13}, {2, 14}, {3, 15}, {4, 16}, {5, 17}, {6, 18}, {7, 19}, {8, 20}, + {9, 21}, {10, 22}, {11, 23} + }) + .castTo(DataType.FLOAT); + + assertEquals(wExp, w2d); + } + + @Test + public void test1dInputType() { + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DLayer.builder() + .nOut(3) + .kernelSize(2) + .activation(Activation.TANH) + .build()) + .layer(Subsampling1DLayer.builder().kernelSize(2).stride(2).build()) + .layer(Upsampling1D.builder().size(2).build()) + .layer(RnnOutputLayer.builder().nOut(7).activation(Activation.SOFTMAX).build()) + .inputType(InputType.recurrent(10)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + List l = conf.getLayerActivationTypes(InputType.recurrent(10)); + assertEquals(InputType.recurrent(3, -1), l.get(0)); + assertEquals(InputType.recurrent(3, -1), l.get(1)); + assertEquals(InputType.recurrent(3, -1), l.get(2)); + assertEquals(InputType.recurrent(7, -1), l.get(3)); + + List l2 = conf.getLayerActivationTypes(InputType.recurrent(10, 6)); + assertEquals(InputType.recurrent(3, 6), l2.get(0)); + assertEquals(InputType.recurrent(3, 3), l2.get(1)); + assertEquals(InputType.recurrent(3, 6), l2.get(2)); + assertEquals(InputType.recurrent(7, 6), l2.get(3)); + + INDArray in = Nd4j.create(2, 10, 6); + INDArray out = net.output(in); + assertArrayEquals(new long[] {2, 7, 6}, out.shape()); + } + + @Test + public void testDeconvBadInput() { + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .list() + .layer(Deconvolution2D.builder().nIn(5).nOut(3).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray badInput = Nd4j.create(DataType.FLOAT, 1, 10, 5, 5); + try { + net.output(badInput); + } catch (DL4JInvalidInputException e) { + String msg = e.getMessage(); + assertTrue( + msg.contains("Deconvolution2D") && msg.contains("input") && msg.contains("channels"), + msg); + } + } + + @Test + public void testConv1dCausalAllowed() { + Convolution1DLayer.builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); + Subsampling1DLayer.builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); + } + + @Test + public void testConv2dNoCausalAllowed() { + + try { + ConvolutionLayer.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } - @Test - public void testCNNBiasInit() { - ConvolutionLayer cnn = new ConvolutionLayer.Builder().nIn(1).nOut(3).biasInit(1).build(); - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(cnn).build(); - - val numParams = conf.getFlattenedLayerConfigurations().get(0).initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, null, 0, params, true, params.dataType()); - - assertEquals(1, layer.getParam("b").size(0)); + try { + Deconvolution2D.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } - @Test - public void testCNNInputSetupMNIST() throws Exception { - INDArray input = getMnistData(); - Layer layer = getMNISTConfig(); - layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); - - assertEquals(input, layer.input()); - assertArrayEquals(input.shape(), layer.input().shape()); + try { + DepthwiseConvolution2D.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } - @Test - public void testFeatureMapShapeMNIST() throws Exception { - int inputWidth = 28; - int[] stride = new int[] {1, 1}; - int[] padding = new int[] {0, 0}; - int[] kernelSize = new int[] {9, 9}; - int nChannelsIn = 1; - int depth = 20; - int featureMapWidth = (inputWidth + padding[1] * 2 - kernelSize[1]) / stride[1] + 1; - - INDArray input = getMnistData(); - - Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); - INDArray convActivations = layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); - - assertEquals(featureMapWidth, convActivations.size(2)); - assertEquals(depth, convActivations.size(1)); + try { + SeparableConvolution2D.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } - @Test - public void testActivateResultsContained() { - Layer layer = getContainedConfig(); - INDArray input = getContainedData(); - INDArray expectedOutput = Nd4j.create(new float[] {0.98201379f, 0.98201379f, 0.98201379f, 0.98201379f, 0.99966465f, - 0.99966465f, 0.99966465f, 0.99966465f, 0.98201379f, 0.98201379f, 0.98201379f, 0.98201379f, 0.99966465f, - 0.99966465f, 0.99966465f, 0.99966465f, 0.98201379f, 0.98201379f, 0.98201379f, 0.98201379f, 0.99966465f, - 0.99966465f, 0.99966465f, 0.99966465f, 0.98201379f, 0.98201379f, 0.98201379f, 0.98201379f, 0.99966465f, - 0.99966465f, 0.99966465f, 0.99966465f}, new int[] {1, 2, 4, 4}); + try { + SubsamplingLayer.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); + } + } - INDArray convActivations = layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); - - assertArrayEquals(expectedOutput.shape(), convActivations.shape()); - assertEquals(expectedOutput, convActivations); + @Test + public void testConv3dNoCausalAllowed() { + try { + Convolution3D.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } - ////////////////////////////////////////////////////////////////////////////////// - - private static Layer getCNNConfig(int nIn, int nOut, int[] kernelSize, int[] stride, int[] padding) { - - ConvolutionLayer layer = new ConvolutionLayer.Builder(kernelSize, stride, padding).nIn(nIn).nOut(nOut) - .activation(Activation.SIGMOID).build(); - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(layer).build(); - - val numParams = conf.getFlattenedLayerConfigurations().get(0).initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - return conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, null, 0, params, true, params.dataType()); - } - - public Layer getMNISTConfig() { - int[] kernelSize = new int[] {9, 9}; - int[] stride = new int[] {1, 1}; - int[] padding = new int[] {1, 1}; - int nChannelsIn = 1; - int depth = 20; - - return getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); - - } - - public INDArray getMnistData() throws Exception { - int inputWidth = 28; - int inputHeight = 28; - int nChannelsIn = 1; - int nExamples = 5; - - DataSetIterator data = new MnistDataSetIterator(nExamples, nExamples); - DataSet mnist = data.next(); - nExamples = mnist.numExamples(); - return mnist.getFeatures().reshape(nExamples, nChannelsIn, inputHeight, inputWidth); - } - - public Layer getContainedConfig() { - int[] kernelSize = new int[] {2, 2}; - int[] stride = new int[] {2, 2}; - int[] padding = new int[] {0, 0}; - int nChannelsIn = 1; - int depth = 2; - - INDArray W = Nd4j.create(new double[] {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}, new int[] {2, 1, 2, 2}); - INDArray b = Nd4j.create(new double[] {1, 1}); - Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); - layer.setParam("W", W); - layer.setParam("b", b); - - return layer; - - } - - public INDArray getContainedData() { - INDArray ret = Nd4j.create(new float[] {1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4}, new int[] {1, 1, 8, 8}); - return ret; - } - - public INDArray getContainedCol() { - return Nd4j.create(new float[] {1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, - 1, 1, 3, 3, 3, 3, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, - 2, 2, 4, 4, 4, 4}, new int[] {1, 1, 2, 2, 4, 4}); - } - - - - ////////////////////////////////////////////////////////////////////////////////// - - - @Test - public void testCNNMLNPretrain() throws Exception { - // Note CNN does not do pretrain - int numSamples = 10; - int batchSize = 10; - DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true); - - MultiLayerNetwork model = getCNNMLNConfig(false, true); - model.fit(mnistIter); - - mnistIter.reset(); - - MultiLayerNetwork model2 = getCNNMLNConfig(false, true); - model2.fit(mnistIter); - mnistIter.reset(); - - DataSet test = mnistIter.next(); - - Evaluation eval = new Evaluation(); - INDArray output = model.output(test.getFeatures()); - eval.eval(test.getLabels(), output); - double f1Score = eval.f1(); - - Evaluation eval2 = new Evaluation(); - INDArray output2 = model2.output(test.getFeatures()); - eval2.eval(test.getLabels(), output2); - double f1Score2 = eval2.f1(); - - assertEquals(f1Score, f1Score2, 1e-4); - - - } - - - @Test - public void testCNNMLNBackprop() throws Exception { - int numSamples = 10; - int batchSize = 10; - DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true); - - MultiLayerNetwork model = getCNNMLNConfig(true, false); - model.fit(mnistIter); - - MultiLayerNetwork model2 = getCNNMLNConfig(true, false); - model2.fit(mnistIter); - - mnistIter.reset(); - DataSet test = mnistIter.next(); - - Evaluation eval = new Evaluation(); - INDArray output = model.output(test.getFeatures()); - eval.eval(test.getLabels(), output); - double f1Score = eval.f1(); - - Evaluation eval2 = new Evaluation(); - INDArray output2 = model2.output(test.getFeatures()); - eval2.eval(test.getLabels(), output2); - double f1Score2 = eval2.f1(); - - assertEquals(f1Score, f1Score2, 1e-4); - - } - - @Test - public void testGetSetParams() { - - MultiLayerNetwork net = getCNNMLNConfig(true, false); - - INDArray paramsOrig = net.getModelParams().dup(); - net.setParams(paramsOrig); - - INDArray params2 = net.getModelParams(); - - assertEquals(paramsOrig, params2); - } - - private static final int kH = 2; - private static final int kW = 2; - private static final int[] strides = {1, 1}; - private static final int[] pad = {0, 0}; - - private static final int miniBatch = 2; - private static final int inDepth = 2; - private static final int height = 3; - private static final int width = 3; - - private static final int outW = 2; - private static final int outH = 2; - - private static INDArray getInput() { - - /* - ----- Input images ----- - example 0: - channels 0 channels 1 - [ 0 1 2 [ 9 10 11 - 3 4 5 12 13 14 - 6 7 8] 15 16 17] - example 1: - [18 19 20 [27 28 29 - 21 22 23 30 31 32 - 24 25 26] 33 34 35] - */ - - INDArray input = Nd4j.create(new int[] {miniBatch, inDepth, height, width}, 'c'); - input.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}})); - input.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}})); - input.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}})); - input.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{27, 28, 29}, {30, 31, 32}, {33, 34, 35}})); - - return input; - } - - @Test - public void testCnnIm2ColReshaping() { - //This test: a bit unusual in that it tests the *assumptions* of the CNN implementation rather than the implementation itself - //Specifically, it tests the row and column orders after reshaping on im2col is reshaped (both forward and backward pass) - INDArray input = getInput(); - - //im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input [miniBatch,channels,kH,kW,outH,outW] - // given the current im2col implementation - //To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that - //to get old order from required order: permute(2,3,4,5,1,2) - INDArray col = Nd4j.create(new int[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); - INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); - Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], false, col2); - - /* - Expected Output, im2col - - example 0 - - channels 0 channels 1 - h0,w0 h0,w1 h0,w0 h0,w1 - 0 1 1 2 9 10 10 11 - 3 4 4 5 12 13 13 14 - - h1,w0 h1,w1 h1,w0 h1,w1 - 3 4 4 5 12 13 13 14 - 6 7 7 8 15 16 16 17 - - - example 1 - - channels 0 channels 1 - h0,w0 h0,w1 h0,w0 h0,w1 - 18 19 19 20 27 28 28 29 - 21 22 22 23 30 31 31 32 - - h1,w0 h1,w1 h1,w0 h1,w1 - 21 22 22 23 30 31 31 32 - 24 25 25 26 33 34 34 35 - */ - - //Now, after reshaping im2col to 2d, we expect: - //Rows with order (wOut0,hOut0,mb0), (wOut1,hOut0,mb0), (wOut0,hOut1,mb0), (wOut1,hOut1,mb0), (wOut0,hOut0,mb1), ... - //Columns with order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), ... - - INDArray reshapedCol = Shape.newShapeNoCopy(col, new int[] {miniBatch * outH * outW, inDepth * kH * kW}, false); - - INDArray exp2d = Nd4j.create(outW * outH * miniBatch, inDepth * kH * kW); - exp2d.putRow(0, Nd4j.create(new double[] {0, 1, 3, 4, 9, 10, 12, 13})); //wOut0,hOut0,mb0 -> both depths, in order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), (d1,kh0,kw1), (d1,kh1,kw0), (d1,kh1,kw1) - exp2d.putRow(1, Nd4j.create(new double[] {1, 2, 4, 5, 10, 11, 13, 14})); //wOut1,hOut0,mb0 - exp2d.putRow(2, Nd4j.create(new double[] {3, 4, 6, 7, 12, 13, 15, 16})); //wOut0,hOut1,mb0 - exp2d.putRow(3, Nd4j.create(new double[] {4, 5, 7, 8, 13, 14, 16, 17})); //wOut1,hOut1,mb0 - exp2d.putRow(4, Nd4j.create(new double[] {18, 19, 21, 22, 27, 28, 30, 31})); //wOut0,hOut0,mb1 - exp2d.putRow(5, Nd4j.create(new double[] {19, 20, 22, 23, 28, 29, 31, 32})); //wOut1,hOut0,mb1 - exp2d.putRow(6, Nd4j.create(new double[] {21, 22, 24, 25, 30, 31, 33, 34})); //wOut0,hOut1,mb1 - exp2d.putRow(7, Nd4j.create(new double[] {22, 23, 25, 26, 31, 32, 34, 35})); //wOut1,hOut1,mb1 - - assertEquals(exp2d, reshapedCol); - - //Check the same thing for the backprop im2col (different order) - INDArray colBackprop = Nd4j.create(new int[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); - INDArray colBackprop2 = colBackprop.permute(0, 3, 4, 5, 1, 2); - - Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], false, colBackprop2); - - INDArray reshapedColBackprop = Shape.newShapeNoCopy(colBackprop, - new int[] {miniBatch * outH * outW, inDepth * kH * kW}, false); - - //Rows with order (mb0,h0,w0), (mb0,h0,w1), (mb0,h1,w0), (mb0,h1,w1), (mb1,h0,w0), (mb1,h0,w1), (mb1,h1,w0), (mb1,h1,w1) - //Columns with order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), ... - - INDArray exp2dv2 = Nd4j.create(outW * outH * miniBatch, inDepth * kH * kW); - exp2dv2.putRow(0, Nd4j.create(new double[] {0, 1, 3, 4, 9, 10, 12, 13})); //wOut0,hOut0,mb0 -> both depths, in order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), (d1,kh0,kw1), (d1,kh1,kw0), (d1,kh1,kw1) - exp2dv2.putRow(1, Nd4j.create(new double[] {1, 2, 4, 5, 10, 11, 13, 14})); //wOut1,hOut0,mb0 - exp2dv2.putRow(2, Nd4j.create(new double[] {3, 4, 6, 7, 12, 13, 15, 16})); //wOut0,hOut1,mb0 - exp2dv2.putRow(3, Nd4j.create(new double[] {4, 5, 7, 8, 13, 14, 16, 17})); //wOut1,hOut1,mb0 - exp2dv2.putRow(4, Nd4j.create(new double[] {18, 19, 21, 22, 27, 28, 30, 31})); //wOut0,hOut0,mb1 - exp2dv2.putRow(5, Nd4j.create(new double[] {19, 20, 22, 23, 28, 29, 31, 32})); //wOut1,hOut0,mb1 - exp2dv2.putRow(6, Nd4j.create(new double[] {21, 22, 24, 25, 30, 31, 33, 34})); //wOut0,hOut1,mb1 - exp2dv2.putRow(7, Nd4j.create(new double[] {22, 23, 25, 26, 31, 32, 34, 35})); //wOut1,hOut1,mb1 - - assertEquals(exp2dv2, reshapedColBackprop); - } - - @Test - public void testDeltaReshaping() { - //As per above test: testing assumptions of cnn implementation... - - //Delta: initially shape [miniBatch,dOut,outH,outW] - //permute to [dOut,miniB,outH,outW] - //then reshape to [dOut,miniB*outH*outW] - //Expect columns of delta2d to be like: (mb0,h0,w0), (mb0,h0,w1), (mb1,h0,w2), (mb0,h1,w0), ... (mb1,...), ..., (mb2,...) - int miniBatch = 3; - int depth = 2; - int outW = 3; - int outH = 3; - - /* - ----- Input delta ----- - example 0: - channels 0 channels 1 - [ 0 1 2 [ 9 10 11 - 3 4 5 12 13 14 - 6 7 8] 15 16 17] - example 1: - [18 19 20 [27 28 29 - 21 22 23 30 31 32 - 24 25 26] 33 34 35] - example 2: - [36 37 38 [45 46 47 - 39 40 41 48 49 50 - 42 43 44] 51 52 53] - */ - - INDArray deltaOrig = Nd4j.create(new int[] {miniBatch, depth, outH, outW}, 'c'); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}})); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}})); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}})); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{27, 28, 29}, {30, 31, 32}, {33, 34, 35}})); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(2), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}})); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(2), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}})); - - - INDArray deltaPermute = deltaOrig.permute(1, 0, 2, 3).dup('c'); - INDArray delta2d = Shape.newShapeNoCopy(deltaPermute, new int[] {depth, miniBatch * outW * outH}, false); - - INDArray exp = Nd4j.create(new double[][] { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 18, 19, 20, 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, 40, 41, 42, 43, - 44}, //depth0 - {9, 10, 11, 12, 13, 14, 15, 16, 17, 27, 28, 29, 30, 31, 32, 33, 34, 35, 45, 46, 47, 48, 49, 50, - 51, 52, 53} //depth1 - }).castTo(delta2d.dataType()); - - assertEquals(exp, delta2d); - } - - @Test - public void testWeightReshaping() { - //Test assumptions of weight reshaping - //Weights: originally c order, shape [outDepth, inDepth, kH, kw] - //permute (3,2,1,0) - - int depthOut = 2; - int depthIn = 3; - int kH = 2; - int kW = 2; - - /* - ----- Weights ----- - - dOut 0 - - dIn 0 dIn 1 dIn 2 - [ 0 1 [ 4 5 [ 8 9 - 2 3] 6 7] 10 11] - - dOut 1 - - [12 13 [16 17 [20 21 - 14 15] 18 19] 22 23] - */ - - INDArray weightOrig = Nd4j.create(new int[] {depthOut, depthIn, kH, kW}, 'c'); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{0, 1}, {2, 3}})); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{4, 5}, {6, 7}})); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(2), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{8, 9}, {10, 11}})); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{12, 13}, {14, 15}})); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{16, 17}, {18, 19}})); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(2), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{20, 21}, {22, 23}})); - - INDArray weightPermute = weightOrig.permute(3, 2, 1, 0); - INDArray w2d = Shape.newShapeNoCopy(weightPermute, new int[] {depthIn * kH * kW, depthOut}, true); - - assertNotNull(w2d); - - //Expected order of weight rows, after reshaping: (kw0,kh0,din0), (kw1,kh0,din0), (kw0,kh1,din0), (kw1,kh1,din0), (kw0,kh0,din1), ... - INDArray wExp = Nd4j.create(new double[][] {{0, 12}, {1, 13}, {2, 14}, {3, 15}, {4, 16}, {5, 17}, {6, 18}, - {7, 19}, {8, 20}, {9, 21}, {10, 22}, {11, 23}}).castTo(DataType.FLOAT); - - assertEquals(wExp, w2d); - } - - ////////////////////////////////////////////////////////////////////////////////// - - private static MultiLayerNetwork getCNNMLNConfig(boolean backprop, boolean pretrain) { - int outputNum = 10; - int seed = 123; - - NeuralNetConfiguration.NeuralNetConfigurationBuilder conf = - NeuralNetConfiguration.builder().seed(seed) - .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() - .layer(0, new ConvolutionLayer.Builder(new int[] {10, 10}).nOut(6).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, - new int[] {2, 2}).stride(1, 1).build()) - .layer(2, new OutputLayer.Builder( - LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(outputNum).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) - .inputType(InputType.convolutionalFlat(28, 28, 1)); - - MultiLayerNetwork model = new MultiLayerNetwork(conf.build()); - model.init(); - - return model; - } - - - - @Test - public void test1dInputType(){ - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .convolutionMode(ConvolutionMode.Same) - .list() - .layer(new Convolution1DLayer.Builder().nOut(3).kernelSize(2).activation(Activation.TANH).build()) - .layer(new Subsampling1DLayer.Builder().kernelSize(2).stride(2).build()) - .layer(new Upsampling1D.Builder().size(2).build()) - .layer(new RnnOutputLayer.Builder().nOut(7).activation(Activation.SOFTMAX).build()) - .inputType(InputType.recurrent(10)) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - List l = conf.getLayerActivationTypes(InputType.recurrent(10)); - assertEquals(InputType.recurrent(3, -1), l.get(0)); - assertEquals(InputType.recurrent(3, -1), l.get(1)); - assertEquals(InputType.recurrent(3, -1), l.get(2)); - assertEquals(InputType.recurrent(7, -1), l.get(3)); - - List l2 = conf.getLayerActivationTypes(InputType.recurrent(10, 6)); - assertEquals(InputType.recurrent(3, 6), l2.get(0)); - assertEquals(InputType.recurrent(3, 3), l2.get(1)); - assertEquals(InputType.recurrent(3, 6), l2.get(2)); - assertEquals(InputType.recurrent(7, 6), l2.get(3)); - - - INDArray in = Nd4j.create(2, 10, 6); - INDArray out = net.output(in); - assertArrayEquals(new long[]{2,7,6}, out.shape()); - } - - @Test - public void testDeconvBadInput(){ - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .list() - .layer(new Deconvolution2D.Builder().nIn(5).nOut(3).build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - INDArray badInput = Nd4j.create(DataType.FLOAT, 1, 10, 5, 5); - try { - net.output(badInput); - } catch (DL4JInvalidInputException e){ - String msg = e.getMessage(); - assertTrue(msg.contains("Deconvolution2D") && msg.contains("input") && msg.contains("channels"), msg); - } - } - - @Test - public void testConv1dCausalAllowed(){ - new Convolution1DLayer.Builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); - new Subsampling1DLayer.Builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); - } - - @Test - public void testConv2dNoCausalAllowed(){ - - try{ - new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - - try{ - new Deconvolution2D.Builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - - try{ - new DepthwiseConvolution2D.Builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - - try{ - new SeparableConvolution2D.Builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - - try{ - new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - } - - @Test - public void testConv3dNoCausalAllowed(){ - try{ - new Convolution3D.Builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - - try{ - new Subsampling3DLayer.Builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } + try { + Subsampling3DLayer.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java index c39a785c1..6fda18385 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java @@ -68,14 +68,14 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4) .updater(new Nesterovs(0.9)).dropOut(0.5) .list() - .layer(new LocallyConnected2D.Builder().kernelSize(8, 8).nIn(3) + .layer(LocallyConnected2D.builder().kernelSize(8, 8).nIn(3) .stride(4, 4).nOut(16).dropOut(0.5) .convolutionMode(ConvolutionMode.Strict) - .setInputSize(28, 28) + .inputSize(28, 28) .activation(Activation.RELU).weightInit( WeightInit.XAVIER) .build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer + .layer(OutputLayer.builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, 3)); @@ -94,15 +94,14 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4) .updater(new Nesterovs(0.9)).dropOut(0.5) - .list() - .layer(new LocallyConnected1D.Builder().kernelSize(4).nIn(3) + .layer(LocallyConnected1D.builder().kernelSize(4).nIn(3) .stride(1).nOut(16).dropOut(0.5) .convolutionMode(ConvolutionMode.Strict) - .setInputSize(28) + .inputSize(28) .activation(Activation.RELU).weightInit( WeightInit.XAVIER) .build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer + .layer(OutputLayer.builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) .inputType(InputType.recurrent(3, 8)); @@ -145,9 +144,9 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { switch (test){ case 0: b.addInputs("in") - .addLayer("1", new LSTM.Builder().nOut(5).build(), "in") - .addLayer("2", new LocallyConnected1D.Builder().kernelSize(2).nOut(4).build(), "1") - .addLayer("out", new RnnOutputLayer.Builder().nOut(10).build(), "2") + .addLayer("1", LSTM.builder().nOut(5).build(), "in") + .addLayer("2", LocallyConnected1D.builder().kernelSize(2).nOut(4).build(), "1") + .addLayer("out", RnnOutputLayer.builder().nOut(10).build(), "2") .setOutputs("out") .setInputTypes(InputType.recurrent(5, 4)); in = new INDArray[]{Nd4j.rand(networkDtype, 2, 5, 4)}; @@ -155,9 +154,9 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { break; case 1: b.addInputs("in") - .addLayer("1", new ConvolutionLayer.Builder().kernelSize(2,2).nOut(5).convolutionMode(ConvolutionMode.Same).build(), "in") - .addLayer("2", new LocallyConnected2D.Builder().kernelSize(2,2).nOut(5).build(), "1") - .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2") + .addLayer("1", ConvolutionLayer.builder().kernelSize(2,2).nOut(5).convolutionMode(ConvolutionMode.Same).build(), "in") + .addLayer("2", LocallyConnected2D.builder().kernelSize(2,2).nOut(5).build(), "1") + .addLayer("out", OutputLayer.builder().nOut(10).build(), "2") .setOutputs("out") // .setInputTypes(InputType.convolutional(28, 28, 1)); // in = new INDArray[]{Nd4j.rand(networkDtype, 2, 1, 28, 28)}; diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java index 1c47e1b2d..b8548ea33 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java @@ -61,7 +61,7 @@ public class SpaceToDepthTest extends BaseDL4JTest { private Layer getSpaceToDepthLayer() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) - .layer(new SpaceToDepthLayer.Builder(blockSize, dataFormat).build()).build(); + .layer(SpaceToDepthLayer.builder().blockSize(blockSize).dataFormat(dataFormat.toFormat()).build()).build(); return conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java index 4cc8341cc..29a4e5463 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SubsamplingLayerTest.java @@ -170,7 +170,7 @@ public class SubsamplingLayerTest extends BaseDL4JTest { private Layer getSubsamplingLayer(SubsamplingLayer.PoolingType pooling) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) - .layer(new SubsamplingLayer.Builder(pooling, new int[] {2, 2}).build()).build(); + .layer(SubsamplingLayer.builder(pooling, new int[] {2, 2}).build()).build(); return conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); } @@ -214,16 +214,16 @@ public class SubsamplingLayerTest extends BaseDL4JTest { DataSet trainInput; NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(123).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder( + .layer(0, org.deeplearning4j.nn.conf.layers.ConvolutionLayer.builder( kernelHeight, kernelWidth).stride(1, 1).nOut(2) .activation(Activation.RELU).weightInit( WeightInit.XAVIER) .build()) - .layer(1, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX) + .layer(1, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()) .kernelSize(imageHeight - kernelHeight + 2, 1) //imageHeight-kernelHeight+1 is ok: full height .stride(1, 1).build()) - .layer(2, new OutputLayer.Builder().nOut(classes).weightInit(WeightInit.XAVIER) + .layer(2, OutputLayer.builder().nOut(classes).weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(imageHeight, imageWidth, nChannels)); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java index 8cdc85768..89ce76cdb 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java @@ -80,10 +80,10 @@ public class TestConvolutionModes extends BaseDL4JTest { LayerConfiguration layer; if (isSubsampling) { - layer = new SubsamplingLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) + layer = SubsamplingLayer.builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .build(); } else { - layer = new ConvolutionLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) + layer = ConvolutionLayer.builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .nOut(3).build(); } @@ -93,7 +93,7 @@ public class TestConvolutionModes extends BaseDL4JTest { NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(cm).list() .layer(0, layer).layer(1, - new OutputLayer.Builder() + OutputLayer.builder() .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT) .nOut(3).build()) @@ -159,10 +159,10 @@ public class TestConvolutionModes extends BaseDL4JTest { LayerConfiguration layer; if (isSubsampling) { - layer = new SubsamplingLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) + layer = SubsamplingLayer.builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .build(); } else { - layer = new ConvolutionLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) + layer = ConvolutionLayer.builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .nOut(3).build(); } @@ -171,7 +171,7 @@ public class TestConvolutionModes extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER).convolutionMode(cm).graphBuilder() .addInputs("in").addLayer("0", layer, "in") - .addLayer("1", new OutputLayer.Builder() + .addLayer("1", OutputLayer.builder() .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).nOut(3) .build(), "0") @@ -211,31 +211,31 @@ public class TestConvolutionModes extends BaseDL4JTest { for (ConvolutionMode cm : new ConvolutionMode[] {ConvolutionMode.Strict, ConvolutionMode.Truncate}) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(cm).list() - .layer(0, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) + .layer(0, ConvolutionLayer.builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .nIn(3).nOut( 3) .build()) - .layer(1, new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Strict) + .layer(1, ConvolutionLayer.builder().convolutionMode(ConvolutionMode.Strict) .kernelSize(3, 3).stride(3, 3).padding(0, 0) .nIn(3).nOut( 3) .build()) - .layer(2, new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Truncate) + .layer(2, ConvolutionLayer.builder().convolutionMode(ConvolutionMode.Truncate) .kernelSize(3, 3).stride(3, 3).padding(0, 0) .nIn(3).nOut( 3) .build()) - .layer(3, new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Same) + .layer(3, ConvolutionLayer.builder().convolutionMode(ConvolutionMode.Same) .kernelSize(3, 3).stride(3, 3).padding(0, 0).nIn(3).nOut(3).build()) - .layer(4, new SubsamplingLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) + .layer(4, SubsamplingLayer.builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .build()) - .layer(5, new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Strict) + .layer(5, SubsamplingLayer.builder().convolutionMode(ConvolutionMode.Strict) .kernelSize(3, 3).stride(3, 3).padding(0, 0).build()) - .layer(6, new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Truncate) + .layer(6, SubsamplingLayer.builder().convolutionMode(ConvolutionMode.Truncate) .kernelSize(3, 3).stride(3, 3).padding(0, 0).build()) - .layer(7, new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Same) + .layer(7, SubsamplingLayer.builder().convolutionMode(ConvolutionMode.Same) .kernelSize(3, 3).stride(3, 3).padding(0, 0).build()) - .layer(8, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT).nOut(3) + .layer(8, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nOut(3) .activation(Activation.SOFTMAX).build()) .build(); @@ -259,31 +259,31 @@ public class TestConvolutionModes extends BaseDL4JTest { ConvolutionMode.Same}) { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(cm).graphBuilder().addInputs("in") - .addLayer("0", new ConvolutionLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) + .addLayer("0", ConvolutionLayer.builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .nIn(3).nOut( 3) .build(), "in") - .addLayer("1", new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Strict) + .addLayer("1", ConvolutionLayer.builder().convolutionMode(ConvolutionMode.Strict) .kernelSize(3, 3).stride(3, 3).padding(0, 0) .nIn(3).nOut( 3) .build(), "0") - .addLayer("2", new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Truncate) + .addLayer("2", ConvolutionLayer.builder().convolutionMode(ConvolutionMode.Truncate) .kernelSize(3, 3).stride(3, 3).padding(0, 0) .nIn(3).nOut( 3) .build(), "1") - .addLayer("3", new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Same) + .addLayer("3", ConvolutionLayer.builder().convolutionMode(ConvolutionMode.Same) .kernelSize(3, 3).stride(3, 3).padding(0, 0).nIn(3).nOut(3).build(), "2") - .addLayer("4", new SubsamplingLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) + .addLayer("4", SubsamplingLayer.builder().kernelSize(3, 3).stride(3, 3).padding(0, 0) .build(), "3") - .addLayer("5", new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Strict) + .addLayer("5", SubsamplingLayer.builder().convolutionMode(ConvolutionMode.Strict) .kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "4") - .addLayer("6", new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Truncate) + .addLayer("6", SubsamplingLayer.builder().convolutionMode(ConvolutionMode.Truncate) .kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "5") - .addLayer("7", new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Same) + .addLayer("7", SubsamplingLayer.builder().convolutionMode(ConvolutionMode.Same) .kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "6") - .addLayer("8", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .addLayer("8", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(3).build(), "7") .setOutputs("8").build(); @@ -437,13 +437,13 @@ public class TestConvolutionModes extends BaseDL4JTest { int kW = 3; LayerConfiguration[] l = new LayerConfiguration[2]; - l[0] = new ConvolutionLayer.Builder().nOut(4).kernelSize(kH, kW).stride(sH, sW).build(); - l[1] = new SubsamplingLayer.Builder().kernelSize(kH, kW).stride(sH, sW).build(); + l[0] = ConvolutionLayer.builder().nOut(4).kernelSize(kH, kW).stride(sH, sW).build(); + l[1] = SubsamplingLayer.builder().kernelSize(kH, kW).stride(sH, sW).build(); for (int i = 0; i < l.length; i++) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Same) - .list().layer(0, l[i]).layer(1, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) + .list().layer(0, l[i]).layer(1, OutputLayer.builder().nOut(3).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(inH, inW, inDepth)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java index 064464d67..42a5832ef 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling1DTest.java @@ -106,7 +106,7 @@ public class Upsampling1DTest extends BaseDL4JTest { private Layer getUpsampling1DLayer() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) - .layer(new Upsampling1D.Builder(size).build()).build(); + .layer(Upsampling1D.builder(size).build()).build(); return conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java index 286259904..ac016d75d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/Upsampling2DTest.java @@ -110,7 +110,7 @@ public class Upsampling2DTest extends BaseDL4JTest { private Layer getUpsamplingLayer() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) - .layer(new Upsampling2D.Builder(size).build()).build(); + .layer(Upsampling2D.builder(size).build()).build(); return conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java index 94994ea47..7627b0657 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.layers.custom; +import static org.junit.jupiter.api.Assertions.assertEquals; + import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -30,31 +32,38 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - public class TestCustomActivation extends BaseDL4JTest { - @Test - public void testCustomActivationFn() { - //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... + @Test + public void testCustomActivationFn() { + // Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config + // actually works... - NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).activation(new CustomActivation()).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) - .build(); + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .updater(new Sgd(0.1)) - String json = conf.toJson(); - String yaml = conf.toYaml(); + .layer( + 0, DenseLayer.builder().nIn(10).nOut(10).activation(new CustomActivation()).build()) + .layer( + 1, + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nIn(10) + .nOut(10) + .build()) + .build(); -// System.out.println(json); + String json = conf.toJson(); + String yaml = conf.toYaml(); - NeuralNetConfiguration confFromJson = NeuralNetConfiguration.fromJson(json); - assertEquals(conf, confFromJson); + // System.out.println(json); - NeuralNetConfiguration confFromYaml = NeuralNetConfiguration.fromYaml(yaml); - assertEquals(conf, confFromYaml); - - } + NeuralNetConfiguration confFromJson = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, confFromJson); + NeuralNetConfiguration confFromYaml = NeuralNetConfiguration.fromYaml(yaml); + assertEquals(conf, confFromYaml); + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java index 75e48861d..7e35d071b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java @@ -47,9 +47,9 @@ public class TestCustomLayers extends BaseDL4JTest { public void testJsonMultiLayerNetwork() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) .layer(1, new CustomLayer(3.14159)).layer(2, - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) .build(); @@ -70,9 +70,9 @@ public class TestCustomLayers extends BaseDL4JTest { //ComputationGraph with a custom layer; check JSON and YAML config actually works... ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder() - .addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") + .addInputs("in").addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in") .addLayer("1", new CustomLayer(3.14159), "0").addLayer("2", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) .nIn(10).nOut(10).build(), "1") .setOutputs("2").build(); @@ -95,8 +95,8 @@ public class TestCustomLayers extends BaseDL4JTest { //Actually create a network with a custom layer; check initialization and forward pass NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(9).nOut(10).build()).layer(1, new CustomLayer(3.14159)) //hard-coded nIn/nOut of 10 - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(11).build()) + .layer(0, DenseLayer.builder().nIn(9).nOut(10).build()).layer(1, new CustomLayer(3.14159)) //hard-coded nIn/nOut of 10 + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(11).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -118,8 +118,8 @@ public class TestCustomLayers extends BaseDL4JTest { //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new CustomOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, CustomOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nIn(10).nOut(10).build()) .build(); @@ -146,8 +146,8 @@ public class TestCustomLayers extends BaseDL4JTest { NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345).weightInit(WeightInit.XAVIER) .list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()).layer(1, + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) .build(); Nd4j.getRandom().setSeed(12345); @@ -171,8 +171,8 @@ public class TestCustomLayers extends BaseDL4JTest { //Create a ComputationGraphConfiguration with custom output layer, and check JSON and YAML config actually works... ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", - new CustomOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10) + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in").addLayer("1", + CustomOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(10) .nOut(10).activation(Activation.SOFTMAX).build(), "0") .setOutputs("1").build(); @@ -198,8 +198,8 @@ public class TestCustomLayers extends BaseDL4JTest { //Fourth: compare to an equivalent standard output layer (should be identical) ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345) .graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10) + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in").addLayer("1", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10) .activation(Activation.SOFTMAX).build(), "0") .setOutputs("1").build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java index 350f24f4b..d90fb6783 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/testclasses/CustomOutputLayer.java @@ -25,6 +25,7 @@ import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -43,16 +44,14 @@ import java.util.Map; @NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class CustomOutputLayer extends BaseOutputLayer { - protected CustomOutputLayer(Builder builder) { - super(builder); - } - @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); CustomOutputLayerImpl ret = new CustomOutputLayerImpl(lconf, networkDataType); ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); @@ -68,17 +67,5 @@ public class CustomOutputLayer extends BaseOutputLayer { return DefaultParamInitializer.getInstance(); } - @NoArgsConstructor - public static class Builder extends BaseOutputLayer.Builder { - public Builder(LossFunctions.LossFunction lossFunction) { - super.lossFunction(lossFunction); - } - - @Override - @SuppressWarnings("unchecked") - public CustomOutputLayer build() { - return new CustomOutputLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java index 01cc7f2dd..b1176a5ef 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseTest.java @@ -49,7 +49,7 @@ public class DenseTest extends BaseDL4JTest { @Test public void testDenseBiasInit() { - DenseLayer build = new DenseLayer.Builder().nIn(1).nOut(3).biasInit(1).build(); + DenseLayer build = DenseLayer.builder().nIn(1).nOut(3).biasInit(1).build(); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(build).build(); @@ -125,11 +125,11 @@ public class DenseTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(seed) .updater(new Sgd(1e-3)).l1(0.3).l2(1e-3).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(numInputs).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(numInputs).nOut(3) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(3).nOut(2) + .layer(1, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(3).nOut(2) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).nIn(2).nOut(outputNum).activation(Activation.SOFTMAX).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java index 742f38a2d..59be32928 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayerTest.java @@ -59,8 +59,8 @@ public class EmbeddingLayerTest extends BaseDL4JTest { for (boolean hasBias : new boolean[]{true, false}) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() - .layer(0, new EmbeddingLayer.Builder().hasBias(hasBias).nIn(10).nOut(5).build()) - .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) + .layer(0, EmbeddingLayer.builder().hasBias(hasBias).nIn(10).nOut(5).build()) + .layer(1, OutputLayer.builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -91,9 +91,9 @@ public class EmbeddingLayerTest extends BaseDL4JTest { for (boolean hasBias : new boolean[]{true, false}) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() - .layer(new EmbeddingSequenceLayer.Builder().hasBias(hasBias) + .layer(EmbeddingSequenceLayer.builder().hasBias(hasBias) .inputLength(inputLength).nIn(nIn).nOut(embeddingDim).build()) - .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nout).activation(Activation.SOFTMAX).build()) + .layer(RnnOutputLayer.builder().nIn(embeddingDim).nOut(nout).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -123,9 +123,9 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int nOut = 4; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() - .layer(new EmbeddingSequenceLayer.Builder().inputLength(inputLength) + .layer(EmbeddingSequenceLayer.builder().inputLength(inputLength) .hasBias(true).nIn(nClassesIn).nOut(embeddingDim).build()) - .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) + .layer(RnnOutputLayer.builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); @@ -154,13 +154,13 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int nOut = 4; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() - .layer(new EmbeddingSequenceLayer.Builder().inputLength(1) + .layer(EmbeddingSequenceLayer.builder().inputLength(1) .hasBias(true).nIn(nClassesIn).nOut(embeddingDim).build()) - .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) + .layer(RnnOutputLayer.builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().activation(Activation.TANH).list() - .layer(0, new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) - .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) + .layer(0, DenseLayer.builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) + .layer(1, OutputLayer.builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .build(); @@ -203,12 +203,12 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int nClassesIn = 10; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() - .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()) - .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) + .layer(0, EmbeddingLayer.builder().hasBias(true).nIn(nClassesIn).nOut(5).build()) + .layer(1, OutputLayer.builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .build(); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().activation(Activation.TANH).list() - .layer(0, new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) - .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) + .layer(0, DenseLayer.builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) + .layer(1, OutputLayer.builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -246,14 +246,14 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int nClassesIn = 10; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() - .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()).layer(1, - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4) + .layer(0, EmbeddingLayer.builder().hasBias(true).nIn(nClassesIn).nOut(5).build()).layer(1, + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4) .activation(Activation.SOFTMAX).build()) .build(); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().activation(Activation.TANH) .weightInit(WeightInit.XAVIER).list() - .layer(new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4) + .layer(DenseLayer.builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4) .activation(Activation.SOFTMAX).build()) .build(); @@ -307,14 +307,14 @@ public class EmbeddingLayerTest extends BaseDL4JTest { int inputLength = 1; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH).list() - .layer(new EmbeddingSequenceLayer.Builder().inputLength(inputLength) + .layer(EmbeddingSequenceLayer.builder().inputLength(inputLength) .hasBias(true).nIn(nClassesIn).nOut(embeddingDim).build()) - .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) + .layer(RnnOutputLayer.builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) .inputType(InputType.recurrent(nClassesIn,inputLength,RNNFormat.NCW)) .build(); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().activation(Activation.TANH).list() - .layer(new DenseLayer.Builder().nIn(nClassesIn).nOut(embeddingDim).activation(Activation.IDENTITY).build()) - .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) + .layer(DenseLayer.builder().nIn(nClassesIn).nOut(embeddingDim).activation(Activation.IDENTITY).build()) + .layer(RnnOutputLayer.builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) .inputType(InputType.recurrent(nClassesIn,inputLength,RNNFormat.NCW)) .build(); @@ -369,9 +369,9 @@ public class EmbeddingLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().activation(Activation.TANH) .dataType(DataType.DOUBLE) .list() - .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()) - .layer(1, new LSTM.Builder().nIn(5).nOut(7).activation(Activation.SOFTSIGN).build()) - .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(7).nOut(4) + .layer(0, EmbeddingLayer.builder().hasBias(true).nIn(nClassesIn).nOut(5).build()) + .layer(1, LSTM.builder().nIn(5).nOut(7).activation(Activation.SOFTSIGN).build()) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(7).nOut(4) .activation(Activation.SOFTMAX).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(1, new FeedForwardToRnnPreProcessor()) @@ -381,9 +381,9 @@ public class EmbeddingLayerTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .dataType(DataType.DOUBLE) .list() - .layer(0, new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) - .layer(1, new LSTM.Builder().nIn(5).nOut(7).activation(Activation.SOFTSIGN).build()) - .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(7).nOut(4) + .layer(0, DenseLayer.builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) + .layer(1, LSTM.builder().nIn(5).nOut(7).activation(Activation.SOFTSIGN).build()) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(7).nOut(4) .activation(Activation.SOFTMAX).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(1, new FeedForwardToRnnPreProcessor()) @@ -453,11 +453,11 @@ public class EmbeddingLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() - .layer(0, new EmbeddingLayer.Builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses) + .layer(0, EmbeddingLayer.builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses) .nOut(5).build()) - .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) - .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()) - .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) + .layer(1, DenseLayer.builder().activation(Activation.TANH).nIn(5).nOut(4).build()) + .layer(2, LSTM.builder().activation(Activation.TANH).nIn(4).nOut(3).build()) + .layer(3, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(2, new FeedForwardToRnnPreProcessor()) @@ -470,11 +470,11 @@ public class EmbeddingLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() - .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5) + .layer(0, DenseLayer.builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5) .build()) - .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) - .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()) - .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) + .layer(1, DenseLayer.builder().activation(Activation.TANH).nIn(5).nOut(4).build()) + .layer(2, LSTM.builder().activation(Activation.TANH).nIn(4).nOut(3).build()) + .layer(3, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(2, new FeedForwardToRnnPreProcessor()) @@ -546,16 +546,16 @@ public class EmbeddingLayerTest extends BaseDL4JTest { EmbeddingLayer el; if(i == 0){ - el = new EmbeddingLayer.Builder().weightInit(vectors).build(); + el = EmbeddingLayer.builder().weightInit(vectors).build(); } else { - el = new EmbeddingLayer.Builder().weightInit(new WordVectorsMockup()).build(); + el = EmbeddingLayer.builder().weightInit(new WordVectorsMockup()).build(); } NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345).list() .layer(el) - .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(3).nOut(3).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) + .layer(DenseLayer.builder().activation(Activation.TANH).nIn(3).nOut(3).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .build(); @@ -570,17 +570,17 @@ public class EmbeddingLayerTest extends BaseDL4JTest { //Test same thing for embedding sequence layer: EmbeddingSequenceLayer esl; if(i == 0){ - esl = new EmbeddingSequenceLayer.Builder().weightInit(vectors).build(); + esl = EmbeddingSequenceLayer.builder().weightInit(vectors).build(); } else { - esl = new EmbeddingSequenceLayer.Builder().weightInit(new WordVectorsMockup()).build(); + esl = EmbeddingSequenceLayer.builder().weightInit(new WordVectorsMockup()).build(); } conf = NeuralNetConfiguration.builder() .seed(12345).list() .layer(esl) - .layer(new GlobalPoolingLayer()) - .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(3).nOut(3).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) + .layer(GlobalPoolingLayer.builder().build()) + .layer(DenseLayer.builder().activation(Activation.TANH).nIn(3).nOut(3).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .build(); @@ -615,11 +615,11 @@ public class EmbeddingLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() - .layer(0, new EmbeddingSequenceLayer.Builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses) + .layer(0, EmbeddingSequenceLayer.builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses) .nOut(5).build()) - .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) - .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()) - .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) + .layer(1, DenseLayer.builder().activation(Activation.TANH).nIn(5).nOut(4).build()) + .layer(2, LSTM.builder().activation(Activation.TANH).nIn(4).nOut(3).build()) + .layer(3, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .inputType(InputType.recurrent(numInputClasses,timeSeriesLength,RNNFormat.NCW)).build(); @@ -629,11 +629,11 @@ public class EmbeddingLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() - .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5) + .layer(0, DenseLayer.builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5) .build()) - .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) - .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).dataFormat(RNNFormat.NCW).build()) - .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) + .layer(1, DenseLayer.builder().activation(Activation.TANH).nIn(5).nOut(4).build()) + .layer(2, LSTM.builder().activation(Activation.TANH).nIn(4).nOut(3).dataFormat(RNNFormat.NCW).build()) + .layer(3, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .inputType(InputType.recurrent(numInputClasses,1,RNNFormat.NCW)).build(); @@ -722,8 +722,8 @@ public class EmbeddingLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new EmbeddingLayer.Builder().nIn(10).nOut(10).build()) - .layer(new EmbeddingSequenceLayer.Builder().nIn(10).nOut(10).build()) + .layer(EmbeddingLayer.builder().nIn(10).nOut(10).build()) + .layer(EmbeddingSequenceLayer.builder().nIn(10).nOut(10).build()) .build(); EmbeddingLayer l = (EmbeddingLayer) conf.getConf(0).getLayer(); @@ -749,8 +749,8 @@ public class EmbeddingLayerTest extends BaseDL4JTest { .seed(12345) .list() .layer(seq ? - new EmbeddingSequenceLayer.Builder().weightInit(wi).nIn(100).nOut(100).build() : - new EmbeddingLayer.Builder().weightInit(wi).nIn(100).nOut(100).build()) + EmbeddingSequenceLayer.builder().weightInit(wi).nIn(100).nOut(100).build() : + EmbeddingLayer.builder().weightInit(wi).nIn(100).nOut(100).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -760,8 +760,8 @@ public class EmbeddingLayerTest extends BaseDL4JTest { .seed(12345) .list() .layer(seq ? - new EmbeddingSequenceLayer.Builder().weightInit(wi).nIn(100).nOut(100).build() : - new EmbeddingLayer.Builder().weightInit(wi).nIn(100).nOut(100).build()) + EmbeddingSequenceLayer.builder().weightInit(wi).nIn(100).nOut(100).build() : + EmbeddingLayer.builder().weightInit(wi).nIn(100).nOut(100).build()) .build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); @@ -771,8 +771,8 @@ public class EmbeddingLayerTest extends BaseDL4JTest { .seed(12345) .list() .layer(seq ? - new EmbeddingSequenceLayer.Builder().weightInit(wi).nIn(100000).nOut(100).build() : - new EmbeddingLayer.Builder().weightInit(wi).nIn(100000).nOut(100).build()) + EmbeddingSequenceLayer.builder().weightInit(wi).nIn(100000).nOut(100).build() : + EmbeddingLayer.builder().weightInit(wi).nIn(100000).nOut(100).build()) .build(); MultiLayerNetwork net3 = new MultiLayerNetwork(conf3); net3.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java index eb76c88f2..bf01f07f8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java @@ -123,7 +123,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { } protected static Layer getLayer(int nOut, double epsilon, boolean lockGammaBeta, double gamma, double beta) { - BatchNormalization.Builder b = new BatchNormalization.Builder().nOut(nOut).eps(epsilon); + BatchNormalization.BatchNormalizationBuilder b = BatchNormalization.builder().nOut(nOut).eps(epsilon); if (lockGammaBeta) { b.lockGammaBeta(true).gamma(gamma).beta(beta); } @@ -367,12 +367,12 @@ public class BatchNormalizationTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() - .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(10).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(28 * 28).nOut(10).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(1, new BatchNormalization.Builder().nOut(10).build()).layer(2, - new ActivationLayer.Builder() + .layer(1,BatchNormalization.builder().nOut(10).build()).layer(2, + ActivationLayer.builder() .activation(Activation.RELU).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(3, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(10).nOut(10) .build()) .build(); @@ -399,11 +399,11 @@ public class BatchNormalizationTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() - .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) + .layer(0, ConvolutionLayer.builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).build()) - .layer(1, new BatchNormalization.Builder().build()) - .layer(2, new ActivationLayer.Builder().activation(Activation.RELU).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1,BatchNormalization.builder().build()) + .layer(2, ActivationLayer.builder().activation(Activation.RELU).build()) + .layer(3, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); @@ -424,13 +424,13 @@ public class BatchNormalizationTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .list() - .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) + .layer(0, ConvolutionLayer.builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).build()) - .layer(1, new BatchNormalization.Builder().build()) - .layer(2, new ActivationLayer.Builder().activation(Activation.LEAKYRELU).build()) - .layer(3, new DenseLayer.Builder().nOut(10).activation(Activation.LEAKYRELU).build()) - .layer(4, new BatchNormalization.Builder().build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1,BatchNormalization.builder().build()) + .layer(2, ActivationLayer.builder().activation(Activation.LEAKYRELU).build()) + .layer(3, DenseLayer.builder().nOut(10).activation(Activation.LEAKYRELU).build()) + .layer(4,BatchNormalization.builder().build()) + .layer(5, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); @@ -463,13 +463,13 @@ public class BatchNormalizationTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).seed(12345).list() - .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) + .layer(0, ConvolutionLayer.builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).build()) - .layer(1, new BatchNormalization.Builder().build()) - .layer(2, new ActivationLayer.Builder().activation(Activation.LEAKYRELU).build()) - .layer(3, new DenseLayer.Builder().nOut(10).activation(Activation.LEAKYRELU).build()) - .layer(4, new BatchNormalization.Builder().build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1,BatchNormalization.builder().build()) + .layer(2, ActivationLayer.builder().activation(Activation.LEAKYRELU).build()) + .layer(3, DenseLayer.builder().nOut(10).activation(Activation.LEAKYRELU).build()) + .layer(4,BatchNormalization.builder().build()) + .layer(5, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); @@ -522,9 +522,9 @@ public class BatchNormalizationTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).seed(12345) .list().layer(0, - new BatchNormalization.Builder().nIn(10).nOut(10).eps(1e-5).decay(0.95) + BatchNormalization.builder().nIn(10).nOut(10).eps(1e-5).decay(0.95) .useLogStd(useLogStd).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).nIn(10).nOut(10).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -588,8 +588,8 @@ public class BatchNormalizationTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).seed(12345).list() - .layer(0, new BatchNormalization.Builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(useLogStd).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) + .layer(0,BatchNormalization.builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(useLogStd).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).nOut(10).build()) .inputType(InputType.convolutional(5, 5, 3)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -651,8 +651,8 @@ public class BatchNormalizationTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).seed(12345).list() - .layer(0, new BatchNormalization.Builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(false).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) + .layer(0,BatchNormalization.builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(false).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).nOut(10).build()) .inputType(InputType.convolutional(5, 5, 3)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -662,8 +662,8 @@ public class BatchNormalizationTest extends BaseDL4JTest { NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()).seed(12345).list() - .layer(0, new BatchNormalization.Builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(true).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) + .layer(0,BatchNormalization.builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(true).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY).nOut(10).build()) .inputType(InputType.convolutional(5, 5, 3)).build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); @@ -695,10 +695,10 @@ public class BatchNormalizationTest extends BaseDL4JTest { .updater(new Adam(1e-3)) .activation(Activation.TANH) .list() - .layer(new ConvolutionLayer.Builder().nOut(5).kernelSize(2, 2).build()) - .layer(new BatchNormalization()) - .layer(new ConvolutionLayer.Builder().nOut(5).kernelSize(2, 2).build()) - .layer(new OutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(10).build()) + .layer(ConvolutionLayer.builder().nOut(5).kernelSize(2, 2).build()) + .layer(BatchNormalization.builder().build()) + .layer(ConvolutionLayer.builder().nOut(5).kernelSize(2, 2).build()) + .layer(OutputLayer.builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(10).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)) .build(); @@ -714,8 +714,8 @@ public class BatchNormalizationTest extends BaseDL4JTest { .updater(new AdaDelta()) .build()) .removeOutputLayer() - .addLayer(new BatchNormalization.Builder().nOut(3380).build()) - .addLayer(new OutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nIn(3380).nOut(10).build()) + .addLayer(BatchNormalization.builder().nOut(3380).build()) + .addLayer(OutputLayer.builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nIn(3380).nOut(10).build()) .build(); net2.fit(iter); @@ -731,11 +731,10 @@ public class BatchNormalizationTest extends BaseDL4JTest { .seed(12345) .weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same) - .list() - .layer(rnn ? new LSTM.Builder().nOut(3).build() : - new Convolution1DLayer.Builder().kernelSize(3).stride(1).nOut(3).build()) - .layer(new BatchNormalization()) - .layer(new RnnOutputLayer.Builder().nOut(3).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(rnn ? LSTM.builder().nOut(3).build() : + Convolution1DLayer.builder().kernelSize(3).stride(1).nOut(3).build()) + .layer(BatchNormalization.builder().build()) + .layer(RnnOutputLayer.builder().nOut(3).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) .inputType(InputType.recurrent(3)) .build(); @@ -757,8 +756,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { public void testInputValidation() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .list() - .layer(new BatchNormalization.Builder().nIn(10).nOut(10).build()) + .layer(BatchNormalization.builder().nIn(10).nOut(10).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java index c2f8cb3c4..0a2730a7a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/LocalResponseTest.java @@ -111,10 +111,11 @@ public class LocalResponseTest extends BaseDL4JTest { public void doBefore() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) - .layer(new LocalResponseNormalization.Builder().k(2).n(5).alpha(1e-4).beta(0.75).build()) + .layer(LocalResponseNormalization.builder().k(2).n(5).alpha(1e-4).beta(0.75).build()) .build(); - layer = new LocalResponseNormalization().instantiate(conf, null, 0, null, false, Nd4j.defaultFloatingPointType()); + layer = LocalResponseNormalization.builder().build() + .instantiate(conf, null, 0, null, false, Nd4j.defaultFloatingPointType()); activationsActual = layer.activate(x, false, LayerWorkspaceMgr.noWorkspaces()); } @@ -142,7 +143,7 @@ public class LocalResponseTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).l1(0.2) .l2(0.1).seed(123) - .layer(new LocalResponseNormalization.Builder().k(2).n(5).alpha(1e-4).beta(0.75).build()) + .layer(LocalResponseNormalization.builder().k(2).n(5).alpha(1e-4).beta(0.75).build()) .build(); } @@ -150,12 +151,12 @@ public class LocalResponseTest extends BaseDL4JTest { public void testMultiCNNLayer() throws Exception { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(123).list() - .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) + .layer(0, ConvolutionLayer.builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(1, new LocalResponseNormalization.Builder().build()).layer(2, - new DenseLayer.Builder() + .layer(1, LocalResponseNormalization.builder().build()).layer(2, + DenseLayer.builder() .nOut(2).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(3, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(10) .build()) .inputType(InputType.convolutionalFlat(28, 28, 1)).build(); @@ -201,7 +202,7 @@ public class LocalResponseTest extends BaseDL4JTest { } } - LocalResponseNormalization lrn = new LocalResponseNormalization.Builder().build(); + LocalResponseNormalization lrn = LocalResponseNormalization.builder().build(); NeuralNetConfiguration nnc = NeuralNetConfiguration.builder().layer(lrn).build(); org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization layer = (org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization) lrn.instantiate(nnc, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java index 5ef6fb110..56a8fee95 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java @@ -90,9 +90,9 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .l2(0.01) .list() - .layer(new ConvolutionLayer.Builder().nIn(depth).nOut(depth).kernelSize(1,1).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPrior) + .layer(ConvolutionLayer.builder().nIn(depth).nOut(depth).kernelSize(1,1).build()) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPrior) .build()) .build(); @@ -178,9 +178,9 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new ConvolutionLayer.Builder().nIn(1).nOut(1).kernelSize(1,1).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPrior) + .layer(ConvolutionLayer.builder().nIn(1).nOut(1).kernelSize(1,1).build()) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPrior) .build()) .build(); @@ -336,9 +336,9 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new ConvolutionLayer.Builder().kernelSize(3,3).stride(1,1).nIn(3).nOut(3).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPriors) + .layer(ConvolutionLayer.builder().kernelSize(3,3).stride(1,1).nIn(3).nOut(3).build()) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPriors) .build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -503,11 +503,11 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { .weightInit(WeightInit.RELU) .seed(12345) .list() - .layer(new ConvolutionLayer.Builder().kernelSize(5,5).stride(2,2).nOut(256).build()) - .layer(new SubsamplingLayer.Builder().kernelSize(2,2).stride(2,2)/*.poolingType(SubsamplingLayer.PoolingType.AVG)*/.build()) - .layer(new ConvolutionLayer.Builder().activation(Activation.IDENTITY).kernelSize(5,5).stride(1,1).nOut(depthOut).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPriors) + .layer(ConvolutionLayer.builder().kernelSize(5,5).stride(2,2).nOut(256).build()) + .layer(SubsamplingLayer.builder().kernelSize(2,2).stride(2,2)/*.poolingType(SubsamplingLayer.PoolingType.AVG)*/.build()) + .layer(ConvolutionLayer.builder().activation(Activation.IDENTITY).kernelSize(5,5).stride(1,1).nOut(depthOut).build()) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPriors) .build()) .inputType(InputType.convolutional(h,w,c)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java index a52716589..a896ff5c1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayerTest.java @@ -177,10 +177,10 @@ public class OCNNOutputLayerTest extends BaseDL4JTest { // 1e-2, // 0.1, // 20)).build()) - .layer(new DenseLayer.Builder().activation(new ActivationReLU()) + .layer(DenseLayer.builder().activation(new ActivationReLU()) .nIn(4).nOut(2).build()) .layer( - new org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer.Builder() + org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer.builder() .nIn(2).activation(new ActivationSigmoid()).initialRValue(0.1) .nu(0.1) .hiddenLayerSize(numHidden).build()) @@ -196,9 +196,9 @@ public class OCNNOutputLayerTest extends BaseDL4JTest { NeuralNetConfiguration configuration = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) .seed(42).updater(new NoOp()).miniBatch(false) - .layer(new DenseLayer.Builder().activation(new ActivationIdentity()).nIn(4).nOut(4).build()) + .layer(DenseLayer.builder().activation(new ActivationIdentity()).nIn(4).nOut(4).build()) .layer( - new org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer.Builder().nIn(4) + org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer.builder().nIn(4) .nu(0.002).activation(new ActivationSigmoid()) .hiddenLayerSize(numHidden).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java index 86d695f3d..7f4cd09ab 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingMaskingTests.java @@ -61,11 +61,11 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new NoOp()) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() - .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) + .layer(0, GravesLSTM.builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder() + .layer(1, org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.builder() .poolingType(PoolingType.AVG).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).build()) .build(); @@ -124,11 +124,11 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { for (PoolingType pt : poolingTypes) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() - .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(height, 2) + .layer(0, ConvolutionLayer.builder().nIn(depthIn).nOut(depthOut).kernelSize(height, 2) .stride(height, 1).activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder().poolingType(pt) + .layer(1, org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.builder().poolingType(pt) .build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(depthOut).nOut(nOut).build()) .build(); @@ -187,11 +187,11 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { for (PoolingType pt : poolingTypes) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() - .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, width) + .layer(0, ConvolutionLayer.builder().nIn(depthIn).nOut(depthOut).kernelSize(2, width) .stride(1, width).activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder().poolingType(pt) + .layer(1, org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.builder().poolingType(pt) .build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(depthOut).nOut(nOut).build()) .build(); @@ -251,11 +251,11 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { for (PoolingType pt : poolingTypes) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() - .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(height, 2) + .layer(0, ConvolutionLayer.builder().nIn(depthIn).nOut(depthOut).kernelSize(height, 2) .stride(height, 1).activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder().poolingType(pt) + .layer(1, org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.builder().poolingType(pt) .build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(depthOut).nOut(nOut).build()) .build(); @@ -310,11 +310,11 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { for (PoolingType pt : poolingTypes) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() - .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, width) + .layer(0, ConvolutionLayer.builder().nIn(depthIn).nOut(depthOut).kernelSize(2, width) .stride(1, width).activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder().poolingType(pt) + .layer(1, org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.builder().poolingType(pt) .build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(depthOut).nOut(nOut).build()) .build(); @@ -369,11 +369,11 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { for (PoolingType pt : poolingTypes) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() - .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, 2) + .layer(0, ConvolutionLayer.builder().nIn(depthIn).nOut(depthOut).kernelSize(2, 2) .stride(1, 1).activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder().poolingType(pt) + .layer(1, org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.builder().poolingType(pt) .build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(depthOut).nOut(nOut).build()) .build(); @@ -435,8 +435,8 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new GlobalPoolingLayer(pt)) - .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(GlobalPoolingLayer.builder(pt).build()) + .layer(OutputLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -449,7 +449,7 @@ public class GlobalPoolingMaskingTests extends BaseDL4JTest { NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .list() - .layer(new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java index 101a55edb..2f6f955b0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java @@ -20,6 +20,11 @@ package org.deeplearning4j.nn.layers.recurrent; +import static org.deeplearning4j.nn.conf.RNNFormat.NCW; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.SingletonMultiDataSetIterator; @@ -43,9 +48,12 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.updater.MultiLayerUpdater; import org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater; import org.deeplearning4j.nn.weights.WeightInit; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.ModelSerializer; import org.deeplearning4j.util.TimeSeriesUtils; import org.junit.jupiter.api.Test; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -55,666 +63,808 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.Adam; import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; - -import static org.deeplearning4j.nn.conf.RNNFormat.NCW; -import static org.junit.jupiter.api.Assertions.assertEquals; @Slf4j public class BidirectionalTest extends BaseDL4JTest { - private final RNNFormat rnnDataFormat; + private final RNNFormat rnnDataFormat; - public BidirectionalTest(RNNFormat rnnDataFormat){ - this.rnnDataFormat = rnnDataFormat; + public BidirectionalTest(RNNFormat rnnDataFormat) { + this.rnnDataFormat = rnnDataFormat; + } + + public static Object[] params() { + return RNNFormat.values(); + } + + @Test + public void compareImplementations() { + for (WorkspaceMode wsm : WorkspaceMode.values()) { + log.info("*** Starting workspace mode: " + wsm); + + // Bidirectional(GravesLSTM) and GravesBidirectionalLSTM should be equivalent, given + // equivalent params + // Note that GravesBidirectionalLSTM implements ADD mode only + + NeuralNetConfiguration conf1 = + NeuralNetConfiguration.builder() + .activation(Activation.TANH) + .weightInit(WeightInit.XAVIER) + .trainingWorkspaceMode(wsm) + .inferenceWorkspaceMode(wsm) + .updater(new Adam()) + .list() + .layer( + Bidirectional.builder( + Bidirectional.Mode.ADD, + GravesLSTM.builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .build()) + .layer( + Bidirectional.builder( + Bidirectional.Mode.ADD, + GravesLSTM.builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MSE) + .dataFormat(rnnDataFormat) + .nIn(10) + .nOut(10) + .build()) + .build(); + + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder() + .activation(Activation.TANH) + .weightInit(WeightInit.XAVIER) + .trainingWorkspaceMode(wsm) + .inferenceWorkspaceMode(wsm) + .updater(new Adam()) + .layer( + GravesBidirectionalLSTM.builder() + .nIn(10) + .nOut(10) + .dataFormat(rnnDataFormat) + .build()) + .layer( + GravesBidirectionalLSTM.builder() + .nIn(10) + .nOut(10) + .dataFormat(rnnDataFormat) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MSE) + .dataFormat(rnnDataFormat) + .nIn(10) + .nOut(10) + .build()) + .build(); + + MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); + net1.init(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net2.init(); + + assertEquals(net1.numParams(), net2.numParams()); + for (int i = 0; i < 3; i++) { + int n1 = (int) net1.getLayer(i).numParams(); + int n2 = (int) net2.getLayer(i).numParams(); + assertEquals(n1, n2); + } + + net2.setParams(net1.getModelParams()); // Assuming exact same layout here... + + INDArray in; + if (rnnDataFormat == NCW) { + in = Nd4j.rand(3, 10, 5); + } else { + in = Nd4j.rand(3, 5, 10); + } + + INDArray out1 = net1.output(in); + INDArray out2 = net2.output(in); + + assertEquals(out1, out2); + + INDArray labels; + if (rnnDataFormat == NCW) { + labels = Nd4j.rand(3, 10, 5); + } else { + labels = Nd4j.rand(3, 5, 10); + } + net1.setInput(in); + net1.setLabels(labels); + + net2.setInput(in); + net2.setLabels(labels); + + net1.computeGradientAndScore(); + net2.computeGradientAndScore(); + + // Ensure scores are equal: + assertEquals(net1.getScore(), net2.getScore(), 1e-6); + + // Ensure gradients are equal: + Gradient g1 = net1.gradient(); + Gradient g2 = net2.gradient(); + assertEquals(g1.gradient(), g2.gradient()); + + // Ensure updates are equal: + MultiLayerUpdater u1 = (MultiLayerUpdater) net1.getUpdater(); + MultiLayerUpdater u2 = (MultiLayerUpdater) net2.getUpdater(); + assertEquals(u1.getUpdaterStateViewArray(), u2.getUpdaterStateViewArray()); + u1.update(net1, g1, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); + u2.update(net2, g2, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); + assertEquals(g1.gradient(), g2.gradient()); + assertEquals(u1.getUpdaterStateViewArray(), u2.getUpdaterStateViewArray()); + + // Ensure params are equal, after fitting + net1.fit(in, labels); + net2.fit(in, labels); + + INDArray p1 = net1.getModelParams(); + INDArray p2 = net2.getModelParams(); + assertEquals(p1, p2); } + } - public static Object[] params(){ - return RNNFormat.values(); + @Test + public void compareImplementationsCompGraph() { + // for(WorkspaceMode wsm : WorkspaceMode.values()) { + for (WorkspaceMode wsm : new WorkspaceMode[] {WorkspaceMode.NONE, WorkspaceMode.ENABLED}) { + log.info("*** Starting workspace mode: " + wsm); + + // Bidirectional(GravesLSTM) and GravesBidirectionalLSTM should be equivalent, given + // equivalent params + // Note that GravesBidirectionalLSTM implements ADD mode only + + ComputationGraphConfiguration conf1 = + NeuralNetConfiguration.builder() + .activation(Activation.TANH) + .weightInit(WeightInit.XAVIER) + .updater(new Adam()) + .trainingWorkspaceMode(wsm) + .inferenceWorkspaceMode(wsm) + .graphBuilder() + .addInputs("in") + .layer( + "0", + Bidirectional.builder( + Bidirectional.Mode.ADD, GravesLSTM.builder().nIn(10).nOut(10).build()) + .build(), + "in") + .layer( + "1", + Bidirectional.builder( + Bidirectional.Mode.ADD, GravesLSTM.builder().nIn(10).nOut(10).build()) + .build(), + "0") + .layer( + "2", + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MSE) + .nIn(10) + .nOut(10) + .build(), + "1") + .setOutputs("2") + .build(); + + ComputationGraphConfiguration conf2 = + NeuralNetConfiguration.builder() + .activation(Activation.TANH) + .weightInit(WeightInit.XAVIER) + .updater(new Adam()) + .trainingWorkspaceMode(wsm) + .inferenceWorkspaceMode(wsm) + .graphBuilder() + .addInputs("in") + .layer("0", GravesBidirectionalLSTM.builder().nIn(10).nOut(10).build(), "in") + .layer("1", GravesBidirectionalLSTM.builder().nIn(10).nOut(10).build(), "0") + .layer( + "2", + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MSE) + .nIn(10) + .nOut(10) + .build(), + "1") + .setOutputs("2") + .build(); + + ComputationGraph net1 = new ComputationGraph(conf1); + net1.init(); + + ComputationGraph net2 = new ComputationGraph(conf2); + net2.init(); + + assertEquals(net1.numParams(), net2.numParams()); + for (int i = 0; i < 3; i++) { + int n1 = (int) net1.getLayer(i).numParams(); + int n2 = (int) net2.getLayer(i).numParams(); + assertEquals(n1, n2); + } + + net2.setParams(net1.getModelParams()); // Assuming exact same layout here... + + INDArray in = Nd4j.rand(3, 10, 5); + + INDArray out1 = net1.outputSingle(in); + INDArray out2 = net2.outputSingle(in); + + assertEquals(out1, out2); + + INDArray labels = Nd4j.rand(3, 10, 5); + + net1.setInput(0, in); + net1.setLabels(labels); + + net2.setInput(0, in); + net2.setLabels(labels); + + net1.computeGradientAndScore(); + net2.computeGradientAndScore(); + + // Ensure scores are equal: + assertEquals(net1.getScore(), net2.getScore(), 1e-6); + + // Ensure gradients are equal: + Gradient g1 = net1.gradient(); + Gradient g2 = net2.gradient(); + assertEquals(g1.gradient(), g2.gradient()); + + // Ensure updates are equal: + ComputationGraphUpdater u1 = net1.getUpdater(); + ComputationGraphUpdater u2 = net2.getUpdater(); + assertEquals(u1.getUpdaterStateViewArray(), u2.getUpdaterStateViewArray()); + u1.update(g1, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); + u2.update(g2, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); + assertEquals(g1.gradient(), g2.gradient()); + assertEquals(u1.getUpdaterStateViewArray(), u2.getUpdaterStateViewArray()); + + // Ensure params are equal, after fitting + net1.fit(new DataSet(in, labels)); + net2.fit(new DataSet(in, labels)); + + INDArray p1 = net1.getModelParams(); + INDArray p2 = net2.getModelParams(); + assertEquals(p1, p2); } - @Test - public void compareImplementations(){ - for(WorkspaceMode wsm : WorkspaceMode.values()) { - log.info("*** Starting workspace mode: " + wsm); + } - //Bidirectional(GravesLSTM) and GravesBidirectionalLSTM should be equivalent, given equivalent params - //Note that GravesBidirectionalLSTM implements ADD mode only + @Test + public void testSerialization() throws Exception { - NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() - .activation(Activation.TANH) - .weightInit(WeightInit.XAVIER) - .trainingWorkspaceMode(wsm) - .inferenceWorkspaceMode(wsm) - .updater(new Adam()) - .list() - .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build())) - .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build())) - .layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).dataFormat(rnnDataFormat) - .nIn(10).nOut(10).build()) - .build(); + for (WorkspaceMode wsm : WorkspaceMode.values()) { + log.info("*** Starting workspace mode: " + wsm); - NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() - .activation(Activation.TANH) - .weightInit(WeightInit.XAVIER) - .trainingWorkspaceMode(wsm) - .inferenceWorkspaceMode(wsm) - .updater(new Adam()) - .list() - .layer(new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) - .layer(new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) - .layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).dataFormat(rnnDataFormat) - .nIn(10).nOut(10).build()) - .build(); + Nd4j.getRandom().setSeed(12345); - MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); - net1.init(); + NeuralNetConfiguration conf1 = + NeuralNetConfiguration.builder() + .activation(Activation.TANH) + .weightInit(WeightInit.XAVIER) + .trainingWorkspaceMode(wsm) + .inferenceWorkspaceMode(wsm) + .updater(new Adam()) + .list() + .layer( + Bidirectional.builder( + Bidirectional.Mode.ADD, + GravesLSTM.builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .build()) + .layer( + Bidirectional.builder( + Bidirectional.Mode.ADD, + GravesLSTM.builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MSE) + .nIn(10) + .nOut(10) + .dataFormat(rnnDataFormat) + .build()) + .build(); - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); - net2.init(); + MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); + net1.init(); - assertEquals(net1.numParams(), net2.numParams()); - for (int i = 0; i < 3; i++) { - int n1 = (int)net1.getLayer(i).numParams(); - int n2 = (int)net2.getLayer(i).numParams(); - assertEquals(n1, n2); - } + INDArray in; + INDArray labels; - net2.setParams(net1.getModelParams()); //Assuming exact same layout here... + long[] inshape = rnnDataFormat == NCW ? new long[] {3, 10, 5} : new long[] {3, 5, 10}; - INDArray in; - if (rnnDataFormat == NCW){ - in = Nd4j.rand(3, 10, 5); - }else{ - in = Nd4j.rand(3, 5, 10); - } + in = Nd4j.rand(inshape); + labels = Nd4j.rand(inshape); - INDArray out1 = net1.output(in); - INDArray out2 = net2.output(in); + net1.fit(in, labels); - assertEquals(out1, out2); + byte[] bytes; + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + ModelSerializer.writeModel(net1, baos, true); + bytes = baos.toByteArray(); + } - INDArray labels; - if (rnnDataFormat == NCW){ - labels = Nd4j.rand(3, 10, 5); - }else{ - labels = Nd4j.rand(3, 5, 10); - } - net1.setInput(in); - net1.setLabels(labels); + MultiLayerNetwork net2 = + ModelSerializer.restoreMultiLayerNetwork(new ByteArrayInputStream(bytes), true); - net2.setInput(in); - net2.setLabels(labels); + in = Nd4j.rand(inshape); + labels = Nd4j.rand(inshape); - net1.computeGradientAndScore(); - net2.computeGradientAndScore(); + INDArray out1 = net1.output(in); + INDArray out2 = net2.output(in); - //Ensure scores are equal: - assertEquals(net1.getScore(), net2.getScore(), 1e-6); + assertEquals(out1, out2); - //Ensure gradients are equal: - Gradient g1 = net1.gradient(); - Gradient g2 = net2.gradient(); - assertEquals(g1.gradient(), g2.gradient()); + net1.setInput(in); + net2.setInput(in); + net1.setLabels(labels); + net2.setLabels(labels); - //Ensure updates are equal: - MultiLayerUpdater u1 = (MultiLayerUpdater) net1.getUpdater(); - MultiLayerUpdater u2 = (MultiLayerUpdater) net2.getUpdater(); - assertEquals(u1.getUpdaterStateViewArray(), u2.getUpdaterStateViewArray()); - u1.update(net1, g1, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); - u2.update(net2, g2, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); - assertEquals(g1.gradient(), g2.gradient()); - assertEquals(u1.getUpdaterStateViewArray(), u2.getUpdaterStateViewArray()); + net1.computeGradientAndScore(); + net2.computeGradientAndScore(); - //Ensure params are equal, after fitting - net1.fit(in, labels); - net2.fit(in, labels); + assertEquals(net1.getScore(), net2.getScore(), 1e-6); + assertEquals(net1.gradient().gradient(), net2.gradient().gradient()); + } + } - INDArray p1 = net1.getModelParams(); - INDArray p2 = net2.getModelParams(); - assertEquals(p1, p2); + @Test + public void testSerializationCompGraph() throws Exception { + + for (WorkspaceMode wsm : WorkspaceMode.values()) { + log.info("*** Starting workspace mode: " + wsm); + + Nd4j.getRandom().setSeed(12345); + + ComputationGraphConfiguration conf1 = + NeuralNetConfiguration.builder() + .activation(Activation.TANH) + .weightInit(WeightInit.XAVIER) + .trainingWorkspaceMode(wsm) + .inferenceWorkspaceMode(wsm) + .updater(new Adam()) + .graphBuilder() + .addInputs("in") + .layer( + "0", + Bidirectional.builder( + Bidirectional.Mode.ADD, + GravesLSTM.builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .build(), + "in") + .layer( + "1", + Bidirectional.builder( + Bidirectional.Mode.ADD, + GravesLSTM.builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .build(), + "0") + .layer( + "2", + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MSE) + .dataFormat(rnnDataFormat) + .nIn(10) + .nOut(10) + .build(), + "1") + .setOutputs("2") + .build(); + + ComputationGraph net1 = new ComputationGraph(conf1); + net1.init(); + long[] inshape = (rnnDataFormat == NCW) ? new long[] {3, 10, 5} : new long[] {3, 5, 10}; + INDArray in = Nd4j.rand(inshape); + INDArray labels = Nd4j.rand(inshape); + + net1.fit(new DataSet(in, labels)); + + byte[] bytes; + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + ModelSerializer.writeModel(net1, baos, true); + bytes = baos.toByteArray(); + } + + ComputationGraph net2 = + ModelSerializer.restoreComputationGraph(new ByteArrayInputStream(bytes), true); + + in = Nd4j.rand(inshape); + labels = Nd4j.rand(inshape); + + INDArray out1 = net1.outputSingle(in); + INDArray out2 = net2.outputSingle(in); + + assertEquals(out1, out2); + + net1.setInput(0, in); + net2.setInput(0, in); + net1.setLabels(labels); + net2.setLabels(labels); + + net1.computeGradientAndScore(); + net2.computeGradientAndScore(); + + assertEquals(net1.getScore(), net2.getScore(), 1e-6); + assertEquals(net1.gradient().gradient(), net2.gradient().gradient()); + } + } + + @Test + public void testSimpleBidirectional() { + + for (WorkspaceMode wsm : WorkspaceMode.values()) { + log.info("*** Starting workspace mode: " + wsm); + Nd4j.getRandom().setSeed(12345); + + Bidirectional.Mode[] modes = + new Bidirectional.Mode[] { + Bidirectional.Mode.CONCAT, + Bidirectional.Mode.ADD, + Bidirectional.Mode.AVERAGE, + Bidirectional.Mode.MUL + }; + + long[] inshape = rnnDataFormat == NCW ? new long[] {3, 10, 6} : new long[] {3, 6, 10}; + INDArray in = Nd4j.rand(inshape); + + for (Bidirectional.Mode m : modes) { + NeuralNetConfiguration conf1 = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .activation(Activation.TANH) + .weightInit(WeightInit.XAVIER) + .trainingWorkspaceMode(wsm) + .inferenceWorkspaceMode(wsm) + .updater(new Adam()) + .list() + .layer( + Bidirectional.builder( + m, + SimpleRnn.builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .build()) + .build(); + + MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); + net1.init(); + + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .activation(Activation.TANH) + .weightInit(WeightInit.XAVIER) + .updater(new Adam()) + .list() + .layer(SimpleRnn.builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .build(); + + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2.clone()); + net2.init(); + MultiLayerNetwork net3 = new MultiLayerNetwork(conf2.clone()); + net3.init(); + + net2.setParam("0_W", net1.getParam("0_fW")); + net2.setParam("0_RW", net1.getParam("0_fRW")); + net2.setParam("0_b", net1.getParam("0_fb")); + + net3.setParam("0_W", net1.getParam("0_bW")); + net3.setParam("0_RW", net1.getParam("0_bRW")); + net3.setParam("0_b", net1.getParam("0_bb")); + + INDArray inReverse = + TimeSeriesUtils.reverseTimeSeries( + in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat); + INDArray out1 = net1.output(in); + INDArray out2 = net2.output(in); + INDArray out3 = + TimeSeriesUtils.reverseTimeSeries( + net3.output(inReverse), + LayerWorkspaceMgr.noWorkspaces(), + ArrayType.INPUT, + rnnDataFormat); + + INDArray outExp; + switch (m) { + case ADD: + outExp = out2.add(out3); + break; + case MUL: + outExp = out2.mul(out3); + break; + case AVERAGE: + outExp = out2.add(out3).muli(0.5); + break; + case CONCAT: + outExp = Nd4j.concat((rnnDataFormat == NCW) ? 1 : 2, out2, out3); + break; + default: + throw new RuntimeException(); } - } - @Test - public void compareImplementationsCompGraph(){ -// for(WorkspaceMode wsm : WorkspaceMode.values()) { - for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.NONE, WorkspaceMode.ENABLED}) { - log.info("*** Starting workspace mode: " + wsm); + assertEquals(outExp, out1, m.toString()); - //Bidirectional(GravesLSTM) and GravesBidirectionalLSTM should be equivalent, given equivalent params - //Note that GravesBidirectionalLSTM implements ADD mode only + // Check gradients: + if (m == Bidirectional.Mode.ADD || m == Bidirectional.Mode.CONCAT) { - ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder() - .activation(Activation.TANH) - .weightInit(WeightInit.XAVIER) - .updater(new Adam()) - .trainingWorkspaceMode(wsm) - .inferenceWorkspaceMode(wsm) - .graphBuilder() - .addInputs("in") - .layer("0", new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).build()), "in") - .layer("1", new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).build()), "0") - .layer("2", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE) - .nIn(10).nOut(10).build(), "1") - .setOutputs("2") - .build(); + INDArray eps = Nd4j.rand(inshape); - ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() - .activation(Activation.TANH) - .weightInit(WeightInit.XAVIER) - .updater(new Adam()) - .trainingWorkspaceMode(wsm) - .inferenceWorkspaceMode(wsm) - .graphBuilder() - .addInputs("in") - .layer("0", new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).build(), "in") - .layer("1", new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).build(), "0") - .layer("2", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE) - .nIn(10).nOut(10).build(), "1") - .setOutputs("2") - .build(); + INDArray eps1; + if (m == Bidirectional.Mode.CONCAT) { + eps1 = Nd4j.concat((rnnDataFormat == NCW) ? 1 : 2, eps, eps); + } else { + eps1 = eps; + } - ComputationGraph net1 = new ComputationGraph(conf1); - net1.init(); + net1.setInput(in); + net2.setInput(in); + net3.setInput( + TimeSeriesUtils.reverseTimeSeries( + in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat)); + net1.feedForward(true, false); + net2.feedForward(true, false); + net3.feedForward(true, false); - ComputationGraph net2 = new ComputationGraph(conf2); - net2.init(); + Pair p1 = + net1.backpropGradient(eps1, LayerWorkspaceMgr.noWorkspaces()); + Pair p2 = + net2.backpropGradient(eps, LayerWorkspaceMgr.noWorkspaces()); + Pair p3 = + net3.backpropGradient( + TimeSeriesUtils.reverseTimeSeries( + eps, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat), + LayerWorkspaceMgr.noWorkspaces()); + Gradient g1 = p1.getFirst(); + Gradient g2 = p2.getFirst(); + Gradient g3 = p3.getFirst(); - assertEquals(net1.numParams(), net2.numParams()); - for (int i = 0; i < 3; i++) { - int n1 = (int)net1.getLayer(i).numParams(); - int n2 = (int)net2.getLayer(i).numParams(); - assertEquals(n1, n2); + for (boolean updates : new boolean[] {false, true}) { + if (updates) { + net1.getUpdater().update(net1, g1, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); + net2.getUpdater().update(net2, g2, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); + net3.getUpdater().update(net3, g3, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); } - net2.setParams(net1.getModelParams()); //Assuming exact same layout here... + assertEquals(g2.gradientForVariable().get("0_W"), g1.gradientForVariable().get("0_fW")); + assertEquals( + g2.gradientForVariable().get("0_RW"), g1.gradientForVariable().get("0_fRW")); + assertEquals(g2.gradientForVariable().get("0_b"), g1.gradientForVariable().get("0_fb")); - INDArray in = Nd4j.rand(3, 10, 5); - - INDArray out1 = net1.outputSingle(in); - INDArray out2 = net2.outputSingle(in); - - assertEquals(out1, out2); - - INDArray labels = Nd4j.rand(3, 10, 5); - - net1.setInput(0,in); - net1.setLabels(labels); - - net2.setInput(0,in); - net2.setLabels(labels); - - net1.computeGradientAndScore(); - net2.computeGradientAndScore(); - - //Ensure scores are equal: - assertEquals(net1.getScore(), net2.getScore(), 1e-6); - - //Ensure gradients are equal: - Gradient g1 = net1.gradient(); - Gradient g2 = net2.gradient(); - assertEquals(g1.gradient(), g2.gradient()); - - //Ensure updates are equal: - ComputationGraphUpdater u1 = net1.getUpdater(); - ComputationGraphUpdater u2 = net2.getUpdater(); - assertEquals(u1.getUpdaterStateViewArray(), u2.getUpdaterStateViewArray()); - u1.update(g1, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); - u2.update(g2, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); - assertEquals(g1.gradient(), g2.gradient()); - assertEquals(u1.getUpdaterStateViewArray(), u2.getUpdaterStateViewArray()); - - //Ensure params are equal, after fitting - net1.fit(new DataSet(in, labels)); - net2.fit(new DataSet(in, labels)); - - INDArray p1 = net1.getModelParams(); - INDArray p2 = net2.getModelParams(); - assertEquals(p1, p2); + assertEquals(g3.gradientForVariable().get("0_W"), g1.gradientForVariable().get("0_bW")); + assertEquals( + g3.gradientForVariable().get("0_RW"), g1.gradientForVariable().get("0_bRW")); + assertEquals(g3.gradientForVariable().get("0_b"), g1.gradientForVariable().get("0_bb")); + } } + } } - - - @Test - public void testSerialization() throws Exception { - - for(WorkspaceMode wsm : WorkspaceMode.values()) { - log.info("*** Starting workspace mode: " + wsm); - - Nd4j.getRandom().setSeed(12345); - - NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() - .activation(Activation.TANH) - .weightInit(WeightInit.XAVIER) - .trainingWorkspaceMode(wsm) - .inferenceWorkspaceMode(wsm) - .updater(new Adam()) - .list() - .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build())) - .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build())) - .layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE) - .nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) - .build(); - - MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); - net1.init(); - - INDArray in; - INDArray labels; - - long[] inshape = rnnDataFormat == NCW ? new long[]{3, 10, 5} : new long[]{3, 5, 10}; - - in = Nd4j.rand(inshape); - labels = Nd4j.rand(inshape); - - net1.fit(in, labels); - - byte[] bytes; - try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { - ModelSerializer.writeModel(net1, baos, true); - bytes = baos.toByteArray(); - } - - - MultiLayerNetwork net2 = ModelSerializer.restoreMultiLayerNetwork(new ByteArrayInputStream(bytes), true); - - - in = Nd4j.rand(inshape); - labels = Nd4j.rand(inshape); - - INDArray out1 = net1.output(in); - INDArray out2 = net2.output(in); - - assertEquals(out1, out2); - - net1.setInput(in); - net2.setInput(in); - net1.setLabels(labels); - net2.setLabels(labels); - - net1.computeGradientAndScore(); - net2.computeGradientAndScore(); - - assertEquals(net1.getScore(), net2.getScore(), 1e-6); - assertEquals(net1.gradient().gradient(), net2.gradient().gradient()); - } - } - - - @Test - public void testSerializationCompGraph() throws Exception { - - for(WorkspaceMode wsm : WorkspaceMode.values()) { - log.info("*** Starting workspace mode: " + wsm); - - Nd4j.getRandom().setSeed(12345); - - ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder() - .activation(Activation.TANH) - .weightInit(WeightInit.XAVIER) - .trainingWorkspaceMode(wsm) - .inferenceWorkspaceMode(wsm) - .updater(new Adam()) - .graphBuilder() - .addInputs("in") - .layer("0", new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()), "in") - .layer("1", new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()), "0") - .layer("2", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).dataFormat(rnnDataFormat) - .nIn(10).nOut(10).build(), "1") - .setOutputs("2") - .build(); - - ComputationGraph net1 = new ComputationGraph(conf1); - net1.init(); - long[] inshape = (rnnDataFormat == NCW)? new long[]{3, 10, 5}: new long[]{3, 5, 10}; - INDArray in = Nd4j.rand(inshape); - INDArray labels = Nd4j.rand(inshape); - - net1.fit(new DataSet(in, labels)); - - byte[] bytes; - try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { - ModelSerializer.writeModel(net1, baos, true); - bytes = baos.toByteArray(); - } - - - ComputationGraph net2 = ModelSerializer.restoreComputationGraph(new ByteArrayInputStream(bytes), true); - - - in = Nd4j.rand(inshape); - labels = Nd4j.rand(inshape); - - INDArray out1 = net1.outputSingle(in); - INDArray out2 = net2.outputSingle(in); - - assertEquals(out1, out2); - - net1.setInput(0, in); - net2.setInput(0, in); - net1.setLabels(labels); - net2.setLabels(labels); - - net1.computeGradientAndScore(); - net2.computeGradientAndScore(); - - assertEquals(net1.getScore(), net2.getScore(), 1e-6); - assertEquals(net1.gradient().gradient(), net2.gradient().gradient()); - } - } - - @Test - public void testSimpleBidirectional() { - - for (WorkspaceMode wsm : WorkspaceMode.values()) { - log.info("*** Starting workspace mode: " + wsm); - Nd4j.getRandom().setSeed(12345); - - Bidirectional.Mode[] modes = new Bidirectional.Mode[]{Bidirectional.Mode.CONCAT, Bidirectional.Mode.ADD, - Bidirectional.Mode.AVERAGE, Bidirectional.Mode.MUL}; - - long[] inshape = rnnDataFormat == NCW ? new long[]{3, 10, 6} : new long[]{3, 6, 10}; - INDArray in = Nd4j.rand(inshape); - - for (Bidirectional.Mode m : modes) { - NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .activation(Activation.TANH) - .weightInit(WeightInit.XAVIER) - .trainingWorkspaceMode(wsm) - .inferenceWorkspaceMode(wsm) - .updater(new Adam()) - .list() - .layer(new Bidirectional(m, new SimpleRnn.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build())) - .build(); - - MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); - net1.init(); - - NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .activation(Activation.TANH) - .weightInit(WeightInit.XAVIER) - .updater(new Adam()) - .list() - .layer(new SimpleRnn.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) - .build(); - - MultiLayerNetwork net2 = new MultiLayerNetwork(conf2.clone()); - net2.init(); - MultiLayerNetwork net3 = new MultiLayerNetwork(conf2.clone()); - net3.init(); - - net2.setParam("0_W", net1.getParam("0_fW")); - net2.setParam("0_RW", net1.getParam("0_fRW")); - net2.setParam("0_b", net1.getParam("0_fb")); - - net3.setParam("0_W", net1.getParam("0_bW")); - net3.setParam("0_RW", net1.getParam("0_bRW")); - net3.setParam("0_b", net1.getParam("0_bb")); - - INDArray inReverse = TimeSeriesUtils.reverseTimeSeries(in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat); - INDArray out1 = net1.output(in); - INDArray out2 = net2.output(in); - INDArray out3 = TimeSeriesUtils.reverseTimeSeries(net3.output(inReverse), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat); - - INDArray outExp; - switch (m) { - case ADD: - outExp = out2.add(out3); - break; - case MUL: - outExp = out2.mul(out3); - break; - case AVERAGE: - outExp = out2.add(out3).muli(0.5); - break; - case CONCAT: - outExp = Nd4j.concat((rnnDataFormat == NCW)?1:2, out2, out3); - break; - default: - throw new RuntimeException(); - } - - assertEquals(outExp, out1, m.toString()); - - - //Check gradients: - if (m == Bidirectional.Mode.ADD || m == Bidirectional.Mode.CONCAT) { - - INDArray eps = Nd4j.rand(inshape); - - INDArray eps1; - if (m == Bidirectional.Mode.CONCAT) { - eps1 = Nd4j.concat((rnnDataFormat == NCW)?1:2, eps, eps); - } else { - eps1 = eps; - } - - net1.setInput(in); - net2.setInput(in); - net3.setInput(TimeSeriesUtils.reverseTimeSeries(in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat)); - net1.feedForward(true, false); - net2.feedForward(true, false); - net3.feedForward(true, false); - - Pair p1 = net1.backpropGradient(eps1, LayerWorkspaceMgr.noWorkspaces()); - Pair p2 = net2.backpropGradient(eps, LayerWorkspaceMgr.noWorkspaces()); - Pair p3 = net3.backpropGradient(TimeSeriesUtils.reverseTimeSeries(eps, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat), LayerWorkspaceMgr.noWorkspaces()); - Gradient g1 = p1.getFirst(); - Gradient g2 = p2.getFirst(); - Gradient g3 = p3.getFirst(); - - for (boolean updates : new boolean[]{false, true}) { - if (updates) { - net1.getUpdater().update(net1, g1, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); - net2.getUpdater().update(net2, g2, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); - net3.getUpdater().update(net3, g3, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); - } - - assertEquals(g2.gradientForVariable().get("0_W"), g1.gradientForVariable().get("0_fW")); - assertEquals(g2.gradientForVariable().get("0_RW"), g1.gradientForVariable().get("0_fRW")); - assertEquals(g2.gradientForVariable().get("0_b"), g1.gradientForVariable().get("0_fb")); - - assertEquals(g3.gradientForVariable().get("0_W"), g1.gradientForVariable().get("0_bW")); - assertEquals(g3.gradientForVariable().get("0_RW"), g1.gradientForVariable().get("0_bRW")); - assertEquals(g3.gradientForVariable().get("0_b"), g1.gradientForVariable().get("0_bb")); - } - - } - } - } - } - - - @Test - public void testSimpleBidirectionalCompGraph() { - - for (WorkspaceMode wsm : WorkspaceMode.values()) { - log.info("*** Starting workspace mode: " + wsm); - Nd4j.getRandom().setSeed(12345); - - Bidirectional.Mode[] modes = new Bidirectional.Mode[]{Bidirectional.Mode.CONCAT, Bidirectional.Mode.ADD, - Bidirectional.Mode.AVERAGE, Bidirectional.Mode.MUL}; - - - long[] inshape = rnnDataFormat == NCW ? new long[]{3, 10, 6} : new long[]{3, 6, 10}; - INDArray in = Nd4j.rand(inshape); - - - for (Bidirectional.Mode m : modes) { - ComputationGraphConfiguration conf1 = NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .activation(Activation.TANH) - .weightInit(WeightInit.XAVIER) - .trainingWorkspaceMode(wsm) - .inferenceWorkspaceMode(wsm) - .updater(new Adam()) - .graphBuilder() - .addInputs("in") - .layer("0", new Bidirectional(m, new SimpleRnn.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()), "in") - .setOutputs("0") - .build(); - - ComputationGraph net1 = new ComputationGraph(conf1); - net1.init(); - - ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() - .dataType(DataType.DOUBLE) - .activation(Activation.TANH) - .weightInit(WeightInit.XAVIER) - .updater(new Adam()) - .graphBuilder() - .addInputs("in") - .layer("0", new SimpleRnn.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build(), "in") - .setOutputs("0") - .build(); - - ComputationGraph net2 = new ComputationGraph(conf2.clone()); - net2.init(); - ComputationGraph net3 = new ComputationGraph(conf2.clone()); - net3.init(); - - net2.setParam("0_W", net1.getParam("0_fW")); - net2.setParam("0_RW", net1.getParam("0_fRW")); - net2.setParam("0_b", net1.getParam("0_fb")); - - net3.setParam("0_W", net1.getParam("0_bW")); - net3.setParam("0_RW", net1.getParam("0_bRW")); - net3.setParam("0_b", net1.getParam("0_bb")); - - - INDArray out1 = net1.outputSingle(in); - INDArray out2 = net2.outputSingle(in); - INDArray out3; - INDArray inReverse; - if (rnnDataFormat == RNNFormat.NWC){ - inReverse = TimeSeriesUtils.reverseTimeSeries(in.permute(0, 2, 1), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT).permute(0, 2, 1); - out3 = net3.outputSingle(inReverse); - out3 = TimeSeriesUtils.reverseTimeSeries(out3.permute(0, 2, 1), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT).permute(0, 2, 1); - - } - else{ - inReverse = TimeSeriesUtils.reverseTimeSeries(in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT); - out3 = net3.outputSingle(inReverse); - out3 = TimeSeriesUtils.reverseTimeSeries(out3, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT); - - } - - INDArray outExp; - switch (m) { - case ADD: - outExp = out2.add(out3); - break; - case MUL: - outExp = out2.mul(out3); - break; - case AVERAGE: - outExp = out2.add(out3).muli(0.5); - break; - case CONCAT: - System.out.println(out2.shapeInfoToString()); - System.out.println(out3.shapeInfoToString()); - outExp = Nd4j.concat((rnnDataFormat == NCW)?1:2, out2, out3); - break; - default: - throw new RuntimeException(); - } - - assertEquals(outExp, out1, m.toString()); - - - //Check gradients: - if (m == Bidirectional.Mode.ADD || m == Bidirectional.Mode.CONCAT) { - - INDArray eps = Nd4j.rand(inshape); - - INDArray eps1; - if (m == Bidirectional.Mode.CONCAT) { - eps1 = Nd4j.concat((rnnDataFormat == NCW)?1:2, eps, eps); - } else { - eps1 = eps; - } - - INDArray epsReversed = (rnnDataFormat == NCW)? - TimeSeriesUtils.reverseTimeSeries(eps, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT): - TimeSeriesUtils.reverseTimeSeries(eps.permute(0, 2, 1), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT) - .permute(0, 2, 1); - net1.outputSingle(true, false, in); - net2.outputSingle(true, false, in); - net3.outputSingle(true, false, inReverse); - - Gradient g1 = net1.backpropGradient(eps1); - Gradient g2 = net2.backpropGradient(eps); - Gradient g3 = net3.backpropGradient(epsReversed); - - for (boolean updates : new boolean[]{false, true}) { - if (updates) { - net1.getUpdater().update(g1, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); - net2.getUpdater().update(g2, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); - net3.getUpdater().update(g3, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); - } - - assertEquals(g2.gradientForVariable().get("0_W"), g1.gradientForVariable().get("0_fW")); - assertEquals(g2.gradientForVariable().get("0_RW"), g1.gradientForVariable().get("0_fRW")); - assertEquals(g2.gradientForVariable().get("0_b"), g1.gradientForVariable().get("0_fb")); - - assertEquals(g3.gradientForVariable().get("0_W"), g1.gradientForVariable().get("0_bW")); - assertEquals(g3.gradientForVariable().get("0_RW"), g1.gradientForVariable().get("0_bRW")); - assertEquals(g3.gradientForVariable().get("0_b"), g1.gradientForVariable().get("0_bb")); - } - } - } - } - } - - - @Test - public void testIssue5472(){ - //https://github.com/deeplearning4j/deeplearning4j/issues/5472 - - int in = 2; - int out = 2; - ComputationGraphConfiguration.GraphBuilder builder = NeuralNetConfiguration.builder() - .updater(new Adam(0.01)) - .activation(Activation.RELU) + } + + @Test + public void testSimpleBidirectionalCompGraph() { + + for (WorkspaceMode wsm : WorkspaceMode.values()) { + log.info("*** Starting workspace mode: " + wsm); + Nd4j.getRandom().setSeed(12345); + + Bidirectional.Mode[] modes = + new Bidirectional.Mode[] { + Bidirectional.Mode.CONCAT, + Bidirectional.Mode.ADD, + Bidirectional.Mode.AVERAGE, + Bidirectional.Mode.MUL + }; + + long[] inshape = rnnDataFormat == NCW ? new long[] {3, 10, 6} : new long[] {3, 6, 10}; + INDArray in = Nd4j.rand(inshape); + + for (Bidirectional.Mode m : modes) { + ComputationGraphConfiguration conf1 = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .activation(Activation.TANH) + .weightInit(WeightInit.XAVIER) + .trainingWorkspaceMode(wsm) + .inferenceWorkspaceMode(wsm) + .updater(new Adam()) .graphBuilder() - .addInputs("IN") - .setInputTypes(InputType.recurrent(in)) - .addLayer("AUTOENCODER", - new VariationalAutoencoder.Builder() - .encoderLayerSizes(64) - .decoderLayerSizes(64) - .nOut(7) - .pzxActivationFunction(Activation.IDENTITY) - .reconstructionDistribution(new BernoulliReconstructionDistribution(Activation.SIGMOID.getActivationFunction())).build(), - "IN") - .addLayer("RNN", new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nOut(128).build()), "AUTOENCODER") - .addLayer("OUT", new RnnOutputLayer.Builder() - .nOut(out) - .activation(Activation.SOFTMAX) - .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "RNN") - .setOutputs("OUT") + .addInputs("in") + .layer( + "0", + Bidirectional.builder( + m, + SimpleRnn.builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .build(), + "in") + .setOutputs("0") + .build(); - ; + ComputationGraph net1 = new ComputationGraph(conf1); + net1.init(); - ComputationGraph net = new ComputationGraph(builder.build()); - net.init(); + ComputationGraphConfiguration conf2 = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .activation(Activation.TANH) + .weightInit(WeightInit.XAVIER) + .updater(new Adam()) + .graphBuilder() + .addInputs("in") + .layer( + "0", + SimpleRnn.builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build(), + "in") + .setOutputs("0") + .build(); - MultiDataSetIterator iterator = new SingletonMultiDataSetIterator(new MultiDataSet(Nd4j.create(10,in,5), Nd4j.create(10,out,5))); + ComputationGraph net2 = new ComputationGraph(conf2.clone()); + net2.init(); + ComputationGraph net3 = new ComputationGraph(conf2.clone()); + net3.init(); - EarlyStoppingConfiguration.Builder b = new EarlyStoppingConfiguration.Builder<>() - .epochTerminationConditions(new MaxEpochsTerminationCondition(10)) - .scoreCalculator(new DataSetLossCalculator(iterator, true)) - .evaluateEveryNEpochs(1) - .modelSaver(new InMemoryModelSaver<>()); + net2.setParam("0_W", net1.getParam("0_fW")); + net2.setParam("0_RW", net1.getParam("0_fRW")); + net2.setParam("0_b", net1.getParam("0_fb")); - EarlyStoppingGraphTrainer earlyStoppingGraphTrainer = new EarlyStoppingGraphTrainer(b.build(), net, iterator, null); - earlyStoppingGraphTrainer.fit(); + net3.setParam("0_W", net1.getParam("0_bW")); + net3.setParam("0_RW", net1.getParam("0_bRW")); + net3.setParam("0_b", net1.getParam("0_bb")); + + INDArray out1 = net1.outputSingle(in); + INDArray out2 = net2.outputSingle(in); + INDArray out3; + INDArray inReverse; + if (rnnDataFormat == RNNFormat.NWC) { + inReverse = + TimeSeriesUtils.reverseTimeSeries( + in.permute(0, 2, 1), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT) + .permute(0, 2, 1); + out3 = net3.outputSingle(inReverse); + out3 = + TimeSeriesUtils.reverseTimeSeries( + out3.permute(0, 2, 1), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT) + .permute(0, 2, 1); + + } else { + inReverse = + TimeSeriesUtils.reverseTimeSeries( + in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT); + out3 = net3.outputSingle(inReverse); + out3 = + TimeSeriesUtils.reverseTimeSeries( + out3, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT); + } + + INDArray outExp; + switch (m) { + case ADD: + outExp = out2.add(out3); + break; + case MUL: + outExp = out2.mul(out3); + break; + case AVERAGE: + outExp = out2.add(out3).muli(0.5); + break; + case CONCAT: + System.out.println(out2.shapeInfoToString()); + System.out.println(out3.shapeInfoToString()); + outExp = Nd4j.concat((rnnDataFormat == NCW) ? 1 : 2, out2, out3); + break; + default: + throw new RuntimeException(); + } + + assertEquals(outExp, out1, m.toString()); + + // Check gradients: + if (m == Bidirectional.Mode.ADD || m == Bidirectional.Mode.CONCAT) { + + INDArray eps = Nd4j.rand(inshape); + + INDArray eps1; + if (m == Bidirectional.Mode.CONCAT) { + eps1 = Nd4j.concat((rnnDataFormat == NCW) ? 1 : 2, eps, eps); + } else { + eps1 = eps; + } + + INDArray epsReversed = + (rnnDataFormat == NCW) + ? TimeSeriesUtils.reverseTimeSeries( + eps, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT) + : TimeSeriesUtils.reverseTimeSeries( + eps.permute(0, 2, 1), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT) + .permute(0, 2, 1); + net1.outputSingle(true, false, in); + net2.outputSingle(true, false, in); + net3.outputSingle(true, false, inReverse); + + Gradient g1 = net1.backpropGradient(eps1); + Gradient g2 = net2.backpropGradient(eps); + Gradient g3 = net3.backpropGradient(epsReversed); + + for (boolean updates : new boolean[] {false, true}) { + if (updates) { + net1.getUpdater().update(g1, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); + net2.getUpdater().update(g2, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); + net3.getUpdater().update(g3, 0, 0, 3, LayerWorkspaceMgr.noWorkspaces()); + } + + assertEquals(g2.gradientForVariable().get("0_W"), g1.gradientForVariable().get("0_fW")); + assertEquals( + g2.gradientForVariable().get("0_RW"), g1.gradientForVariable().get("0_fRW")); + assertEquals(g2.gradientForVariable().get("0_b"), g1.gradientForVariable().get("0_fb")); + + assertEquals(g3.gradientForVariable().get("0_W"), g1.gradientForVariable().get("0_bW")); + assertEquals( + g3.gradientForVariable().get("0_RW"), g1.gradientForVariable().get("0_bRW")); + assertEquals(g3.gradientForVariable().get("0_b"), g1.gradientForVariable().get("0_bb")); + } + } + } } + } + + @Test + public void testIssue5472() { + // https://github.com/deeplearning4j/deeplearning4j/issues/5472 + + int in = 2; + int out = 2; + ComputationGraphConfiguration.GraphBuilder builder = + NeuralNetConfiguration.builder() + .updater(new Adam(0.01)) + .activation(Activation.RELU) + .graphBuilder() + .addInputs("IN") + .setInputTypes(InputType.recurrent(in)) + .addLayer( + "AUTOENCODER", + VariationalAutoencoder.builder() + .encoderLayerSizes(64) + .decoderLayerSizes(64) + .nOut(7) + .pzxActivationFunction(Activation.IDENTITY.getActivationFunction()) + .reconstructionDistribution( + new BernoulliReconstructionDistribution( + Activation.SIGMOID.getActivationFunction())) + .build(), + "IN") + .addLayer( + "RNN", + Bidirectional.builder( + Bidirectional.Mode.ADD, GravesLSTM.builder().nOut(128).build()) + .build(), + "AUTOENCODER") + .addLayer( + "OUT", + RnnOutputLayer.builder() + .nOut(out) + .activation(Activation.SOFTMAX) + .lossFunction(LossFunctions.LossFunction.MCXENT) + .build(), + "RNN") + .setOutputs("OUT"); + + ComputationGraph net = new ComputationGraph(builder.build()); + net.init(); + + MultiDataSetIterator iterator = + new SingletonMultiDataSetIterator( + new MultiDataSet(Nd4j.create(10, in, 5), Nd4j.create(10, out, 5))); + + EarlyStoppingConfiguration.Builder b = + new EarlyStoppingConfiguration.Builder<>() + .epochTerminationConditions(new MaxEpochsTerminationCondition(10)) + .scoreCalculator(new DataSetLossCalculator(iterator, true)) + .evaluateEveryNEpochs(1) + .modelSaver(new InMemoryModelSaver<>()); + + EarlyStoppingGraphTrainer earlyStoppingGraphTrainer = + new EarlyStoppingGraphTrainer(b.build(), net, iterator, null); + earlyStoppingGraphTrainer.fit(); + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java index be04304b6..9c2f03ffd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java @@ -64,7 +64,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { int nHiddenUnits = 17; final NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn) + .layer(org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.builder().nIn(nIn) .nOut(nHiddenUnits).dataFormat(rnnDataFormat).activation(Activation.TANH).build()) .build(); @@ -130,7 +130,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { Nd4j.ones(miniBatchSize, timeSeriesLength, nIn); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn) + .layer(org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.builder().nIn(nIn) .nOut(lstmNHiddenUnits).dataFormat(rnnDataFormat) .dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()) .build(); @@ -202,7 +202,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { final int timeSeriesLength = 7; final NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn) + .layer(org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.builder().nIn(nIn) .nOut(layerSize) .dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()) .build(); @@ -260,7 +260,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); final NeuralNetConfiguration confBidirectional = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn) + .layer(org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.builder().nIn(nIn) .nOut(layerSize).dataFormat(rnnDataFormat) .dist(new UniformDistribution(-0.1, 0.1)).activation(Activation.TANH).build()) .build(); @@ -300,14 +300,14 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { final NeuralNetConfiguration confBidirectional = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder() + .layer(org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.builder() .nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat) .dist(new UniformDistribution(-0.1, 0.1)) .activation(Activation.TANH).updater(new NoOp()).build()) .build(); final NeuralNetConfiguration confForwards = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat) + .layer(org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat) .weightInit(WeightInit.ZERO).activation(Activation.TANH).build()) .build(); @@ -505,13 +505,13 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { .updater(new AdaGrad(0.1)) .l2(0.001) .seed(12345).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.builder() .activation(Activation.TANH).nIn(2).nOut(2) .dist(new UniformDistribution(-0.05, 0.05)).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder() + .layer(1, org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.builder() .activation(Activation.TANH).nIn(2).nOut(2) .dist(new UniformDistribution(-0.05, 0.05)).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder() + .layer(2, org.deeplearning4j.nn.conf.layers.RnnOutputLayer.builder() .activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(2).nOut(2).build()) .build(); @@ -534,10 +534,10 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .seed(12345).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder() - .gateActivationFunction(gateAfn).activation(Activation.TANH).nIn(2).nOut(2).dataFormat(rnnDataFormat) + .layer(0, org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.builder() + .gateActivationFunction(Activation.fromString(gateAfn).getActivationFunction()).activation(Activation.TANH).nIn(2).nOut(2).dataFormat(rnnDataFormat) .build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder() + .layer(1, org.deeplearning4j.nn.conf.layers.RnnOutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(2).dataFormat(rnnDataFormat) .activation(Activation.TANH).build()) .build(); @@ -546,7 +546,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest { net.init(); assertEquals(gateAfn, ((org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) net.getLayer(0).getNetConfiguration() - .getFlattenedLayerConfigurations().get(0)).getGateActivationFn().toString()); + .getFlattenedLayerConfigurations().get(0)).getGateActivationFunction().toString()); INDArray in = Nd4j.rand(3, 2, 5); INDArray labels = Nd4j.rand(3, 2, 5); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java index 791ff8fa6..567786eee 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTMTest.java @@ -59,7 +59,7 @@ public class GravesLSTMTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn) + .layer(org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn) .nOut(nHiddenUnits).activation(Activation.TANH).build()) .build(); @@ -104,7 +104,7 @@ public class GravesLSTMTest extends BaseDL4JTest { INDArray inputData = Nd4j.ones(miniBatchSize, nIn, timeSeriesLength); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn) + .layer(org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn) .nOut(lstmNHiddenUnits) .dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()) .build(); @@ -155,7 +155,7 @@ public class GravesLSTMTest extends BaseDL4JTest { int timeSeriesLength = 7; NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(layerSize) + .layer(org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(layerSize) .dist(new UniformDistribution(0, 1)) .activation(Activation.TANH).build()) .build(); @@ -199,9 +199,9 @@ public class GravesLSTMTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().activation(Activation.TANH) + .layer(0, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().activation(Activation.TANH) .nIn(2).nOut(2).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder() + .layer(1, org.deeplearning4j.nn.conf.layers.RnnOutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1) .activation(Activation.TANH).build()) .build(); @@ -256,10 +256,10 @@ public class GravesLSTMTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .seed(12345).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder() - .gateActivationFunction(gateAfn).activation(Activation.TANH).nIn(2).nOut(2) + .layer(0, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder() + .gateActivationFunction(Activation.fromString(gateAfn).getActivationFunction()).activation(Activation.TANH).nIn(2).nOut(2) .build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder() + .layer(1, org.deeplearning4j.nn.conf.layers.RnnOutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(2) .activation(Activation.TANH).build()) .build(); @@ -268,7 +268,7 @@ public class GravesLSTMTest extends BaseDL4JTest { net.init(); assertEquals(gateAfn, ((org.deeplearning4j.nn.conf.layers.GravesLSTM) net.getLayer(0).getLayerConfiguration()) - .getGateActivationFn().toString()); + .getGateActivationFunction().toString()); INDArray in = Nd4j.rand(3, 2, 5); INDArray labels = Nd4j.rand(3, 2, 5); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayerTest.java index 1a3bcbc65..b6ba5d165 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayerTest.java @@ -63,9 +63,9 @@ public class MaskZeroLayerTest extends BaseDL4JTest { }); // A LSTM which adds one for every non-zero timestep - org.deeplearning4j.nn.conf.layers.LSTM underlying = new org.deeplearning4j.nn.conf.layers.LSTM.Builder() + org.deeplearning4j.nn.conf.layers.LSTM underlying = org.deeplearning4j.nn.conf.layers.LSTM.builder() .activation(Activation.IDENTITY) - .gateActivationFunction(Activation.IDENTITY) + .gateActivationFunction(Activation.IDENTITY.getActivationFunction()) .nIn(2) .nOut(1).dataFormat(rnnDataFormat) .build(); @@ -108,8 +108,8 @@ public class MaskZeroLayerTest extends BaseDL4JTest { public void testSerialization(){ NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer.Builder() - .setMaskValue(0.0).setUnderlying(new LSTM.Builder().nIn(4).nOut(5).dataFormat(rnnDataFormat).build()).build()) + .layer(org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer.builder() + .maskingValue(0.0).underlying(LSTM.builder().nIn(4).nOut(5).dataFormat(rnnDataFormat).build()).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java index 93a60f38c..fca3550b0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java @@ -206,49 +206,49 @@ public class RnnDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getGravesBidirectionalLstmNet(RNNFormat format, boolean setOnLayerAlso, boolean lastTimeStep, boolean maskZeros) { if (setOnLayerAlso) { - return getNetWithLayer(new GravesBidirectionalLSTM.Builder().nOut(3) + return getNetWithLayer(GravesBidirectionalLSTM.builder().nOut(3) .dataFormat(format).build(), format, lastTimeStep, maskZeros); } else { - return getNetWithLayer(new GravesBidirectionalLSTM.Builder().nOut(3).build(), format, lastTimeStep, maskZeros); + return getNetWithLayer(GravesBidirectionalLSTM.builder().nOut(3).build(), format, lastTimeStep, maskZeros); } } private MultiLayerNetwork getGravesLstmNet(RNNFormat format, boolean setOnLayerAlso, boolean lastTimeStep, boolean maskZeros) { if (setOnLayerAlso) { - return getNetWithLayer(new GravesLSTM.Builder().nOut(3) + return getNetWithLayer(GravesLSTM.builder().nOut(3) .dataFormat(format).build(), format, lastTimeStep, maskZeros); } else { - return getNetWithLayer(new GravesLSTM.Builder().nOut(3).build(), format, lastTimeStep, maskZeros); + return getNetWithLayer(GravesLSTM.builder().nOut(3).build(), format, lastTimeStep, maskZeros); } } private MultiLayerNetwork getLstmNet(RNNFormat format, boolean setOnLayerAlso, boolean lastTimeStep, boolean maskZeros) { if (setOnLayerAlso) { - return getNetWithLayer(new LSTM.Builder().nOut(3) + return getNetWithLayer(LSTM.builder().nOut(3) .dataFormat(format).build(), format, lastTimeStep, maskZeros); } else { - return getNetWithLayer(new LSTM.Builder().nOut(3).build(), format, lastTimeStep, maskZeros); + return getNetWithLayer(LSTM.builder().nOut(3).build(), format, lastTimeStep, maskZeros); } } private MultiLayerNetwork getSimpleRnnNet(RNNFormat format, boolean setOnLayerAlso, boolean lastTimeStep, boolean maskZeros) { if (setOnLayerAlso) { - return getNetWithLayer(new SimpleRnn.Builder().nOut(3) + return getNetWithLayer(SimpleRnn.builder().nOut(3) .dataFormat(format).build(), format, lastTimeStep, maskZeros); } else { - return getNetWithLayer(new SimpleRnn.Builder().nOut(3).build(), format, lastTimeStep, maskZeros); + return getNetWithLayer(SimpleRnn.builder().nOut(3).build(), format, lastTimeStep, maskZeros); } } private MultiLayerNetwork getNetWithLayer(LayerConfiguration layer, RNNFormat format, boolean lastTimeStep, boolean maskZeros) { if (maskZeros){ - layer = new MaskZeroLayer.Builder().setMaskValue(0.).setUnderlying(layer).build(); + layer = MaskZeroLayer.builder().maskingValue(0.).underlying(layer).build(); } if(lastTimeStep){ - layer = new LastTimeStep(layer); + layer = LastTimeStep.builder(layer).build(); } NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = (NeuralNetConfiguration.NeuralNetConfigurationBuilder) NeuralNetConfiguration.builder() .seed(12345) .list() - .layer(new LSTM.Builder() + .layer(LSTM.builder() .nIn(3) .activation(Activation.TANH) .dataFormat(format) @@ -257,8 +257,8 @@ public class RnnDataFormatTests extends BaseDL4JTest { .build()) .layer(layer) .layer( - (lastTimeStep)?new OutputLayer.Builder().activation(Activation.SOFTMAX).nOut(10).build(): - new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).nOut(10).dataFormat(format).build() + (lastTimeStep)?OutputLayer.builder().activation(Activation.SOFTMAX).nOut(10).build(): + RnnOutputLayer.builder().activation(Activation.SOFTMAX).nOut(10).dataFormat(format).build() ) .inputType(InputType.recurrent(3, 12, format)); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestLastTimeStepLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestLastTimeStepLayer.java index 7755790e4..3202e81e0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestLastTimeStepLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestLastTimeStepLayer.java @@ -63,7 +63,7 @@ public class TestLastTimeStepLayer extends BaseDL4JTest { public void testLastTimeStepVertex() { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("lastTS", new LastTimeStep(new SimpleRnn.Builder() + .addLayer("lastTS", LastTimeStep.builder(SimpleRnn.builder() .nIn(5).nOut(6).dataFormat(rnnDataFormat).build()), "in") .setOutputs("lastTS") .build(); @@ -134,13 +134,13 @@ public class TestLastTimeStepLayer extends BaseDL4JTest { .graphBuilder() .addInputs("in") .setInputTypes(InputType.recurrent(1, rnnDataFormat)) - .addLayer("RNN", new LastTimeStep(new LSTM.Builder() + .addLayer("RNN", LastTimeStep.builder(LSTM.builder() .nOut(10).dataFormat(rnnDataFormat) .build()), "in") - .addLayer("dense", new DenseLayer.Builder() + .addLayer("dense", DenseLayer.builder() .nOut(10) .build(), "RNN") - .addLayer("out", new OutputLayer.Builder() + .addLayer("out", OutputLayer.builder() .activation(IDENTITY) .lossFunction(MSE) .nOut(10) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java index 3ea9cdbdb..7581e1db9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java @@ -27,6 +27,7 @@ import org.deeplearning4j.nn.conf.layers.GravesLSTM; import org.deeplearning4j.nn.conf.layers.LSTM; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.weights.WeightInitDistribution; import org.junit.jupiter.api.Test; import org.nd4j.linalg.api.ndarray.INDArray; @@ -47,18 +48,18 @@ public class TestRecurrentWeightInit extends BaseDL4JTest { if(rwInit) { switch (i) { case 0: - b.layer(new LSTM.Builder().nIn(10).nOut(10) - .weightInitRecurrent(new UniformDistribution(2, 3)) + b.layer(LSTM.builder().nIn(10).nOut(10) + .weightInitRecurrent(new WeightInitDistribution(new UniformDistribution(2, 3))) .build()); break; case 1: - b.layer(new GravesLSTM.Builder().nIn(10).nOut(10) - .weightInitRecurrent(new UniformDistribution(2, 3)) + b.layer(GravesLSTM.builder().nIn(10).nOut(10) + .weightInitRecurrent(new WeightInitDistribution(new UniformDistribution(2, 3))) .build()); break; case 2: - b.layer(new SimpleRnn.Builder().nIn(10).nOut(10) - .weightInitRecurrent(new UniformDistribution(2, 3)).build()); + b.layer(SimpleRnn.builder().nIn(10).nOut(10) + .weightInitRecurrent(new WeightInitDistribution(new UniformDistribution(2, 3))).build()); break; default: throw new RuntimeException(); @@ -66,13 +67,13 @@ public class TestRecurrentWeightInit extends BaseDL4JTest { } else { switch (i) { case 0: - b.layer(new LSTM.Builder().nIn(10).nOut(10).build()); + b.layer(LSTM.builder().nIn(10).nOut(10).build()); break; case 1: - b.layer(new GravesLSTM.Builder().nIn(10).nOut(10).build()); + b.layer(GravesLSTM.builder().nIn(10).nOut(10).build()); break; case 2: - b.layer(new SimpleRnn.Builder().nIn(10).nOut(10).build()); + b.layer(SimpleRnn.builder().nIn(10).nOut(10).build()); break; default: throw new RuntimeException(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java index d6e0369d4..9d50d8e95 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java @@ -70,9 +70,9 @@ public class TestRnnLayers extends BaseDL4JTest { .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .list() - .layer(new SimpleRnn.Builder().nIn(nIn).nOut(3).dataFormat(rnnDataFormat).build()) - .layer(new LSTM.Builder().nIn(3).nOut(5).dataFormat(rnnDataFormat).build()) - .layer(new RnnOutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).build()) + .layer(SimpleRnn.builder().nIn(nIn).nOut(3).dataFormat(rnnDataFormat).build()) + .layer(LSTM.builder().nIn(3).nOut(5).dataFormat(rnnDataFormat).build()) + .layer(RnnOutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); @@ -124,19 +124,19 @@ public class TestRnnLayers extends BaseDL4JTest { TestDropout.CustomDropout cd = new TestDropout.CustomDropout(); switch (s){ case "graves": - layer = new GravesLSTM.Builder().activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); - layerD = new GravesLSTM.Builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); - layerD2 = new GravesLSTM.Builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); + layer = GravesLSTM.builder().activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); + layerD = GravesLSTM.builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); + layerD2 = GravesLSTM.builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); break; case "lstm": - layer = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); - layerD = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); - layerD2 = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); + layer = org.deeplearning4j.nn.conf.layers.LSTM.builder().activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); + layerD = org.deeplearning4j.nn.conf.layers.LSTM.builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); + layerD2 = org.deeplearning4j.nn.conf.layers.LSTM.builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); break; case "simple": - layer = new SimpleRnn.Builder().activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); - layerD = new SimpleRnn.Builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); - layerD2 = new SimpleRnn.Builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); + layer = SimpleRnn.builder().activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); + layerD = SimpleRnn.builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); + layerD2 = SimpleRnn.builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build(); break; default: throw new RuntimeException(s); @@ -146,21 +146,21 @@ public class TestRnnLayers extends BaseDL4JTest { .seed(12345) .list() .layer(layer) - .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .layer(RnnOutputLayer.builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) .build(); NeuralNetConfiguration confD = NeuralNetConfiguration.builder() .seed(12345) .list() .layer(layerD) - .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .layer(RnnOutputLayer.builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) .build(); NeuralNetConfiguration confD2 = NeuralNetConfiguration.builder() .seed(12345) .list() .layer(layerD2) - .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) + .layer(RnnOutputLayer.builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -216,14 +216,14 @@ public class TestRnnLayers extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder lb = NeuralNetConfiguration.builder() - .layer(new SimpleRnn.Builder().nIn(5).nOut(5).dataFormat(rnnDataFormat).build()); + .layer(SimpleRnn.builder().nIn(5).nOut(5).dataFormat(rnnDataFormat).build()); switch (i){ case 0: - lb.layer(new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(5).dataFormat(rnnDataFormat).build()); + lb.layer(RnnOutputLayer.builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(5).dataFormat(rnnDataFormat).build()); break; case 1: - lb.layer(new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).dataFormat(rnnDataFormat).build()); + lb.layer(RnnLossLayer.builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).dataFormat(rnnDataFormat).build()); break; default: throw new RuntimeException(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestSimpleRnn.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestSimpleRnn.java index 2abd86487..a462432b1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestSimpleRnn.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestSimpleRnn.java @@ -72,7 +72,7 @@ public class TestSimpleRnn extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .activation(Activation.TANH) .list() - .layer(new SimpleRnn.Builder().nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat).build()) + .layer(SimpleRnn.builder().nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -130,7 +130,7 @@ public class TestSimpleRnn extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .activation(Activation.TANH) .list() - .layer(new SimpleRnn.Builder().nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat) + .layer(SimpleRnn.builder().nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat) .biasInit(100) .build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java index ec8008379..c94b4f6f8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java @@ -67,9 +67,9 @@ public class TestTimeDistributed extends BaseDL4JTest { .seed(12345) .updater(new Adam(0.1)) .list() - .layer(new LSTM.Builder().nIn(3).nOut(3).dataFormat(rnnDataFormat).build()) - .layer(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build()) - .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).dataFormat(rnnDataFormat) + .layer(LSTM.builder().nIn(3).nOut(3).dataFormat(rnnDataFormat).build()) + .layer(DenseLayer.builder().nIn(3).nOut(3).activation(Activation.TANH).build()) + .layer(RnnOutputLayer.builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).dataFormat(rnnDataFormat) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(3, rnnDataFormat)) .build(); @@ -79,10 +79,9 @@ public class TestTimeDistributed extends BaseDL4JTest { .inferenceWorkspaceMode(wsm) .seed(12345) .updater(new Adam(0.1)) - .list() - .layer(new LSTM.Builder().nIn(3).nOut(3).dataFormat(rnnDataFormat).build()) - .layer(new TimeDistributed(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build(), rnnDataFormat)) - .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).dataFormat(rnnDataFormat) + .layer(LSTM.builder().nIn(3).nOut(3).dataFormat(rnnDataFormat).build()) + .layer(TimeDistributed.builder().underlying(DenseLayer.builder().nIn(3).nOut(3).activation(Activation.TANH).build()).rnnDataFormat(rnnDataFormat)) + .layer(RnnOutputLayer.builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).dataFormat(rnnDataFormat) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(3, rnnDataFormat)) .build(); @@ -137,16 +136,16 @@ public class TestTimeDistributed extends BaseDL4JTest { LayerConfiguration l0, l2; switch (rnnType) { case 0: - l0 = new LSTM.Builder().nOut(5).build(); - l2 = new LSTM.Builder().nOut(5).build(); + l0 = LSTM.builder().nOut(5).build(); + l2 = LSTM.builder().nOut(5).build(); break; case 1: - l0 = new SimpleRnn.Builder().nOut(5).build(); - l2 = new SimpleRnn.Builder().nOut(5).build(); + l0 = SimpleRnn.builder().nOut(5).build(); + l2 = SimpleRnn.builder().nOut(5).build(); break; case 2: - l0 = new Bidirectional(new LSTM.Builder().nOut(5).build()); - l2 = new Bidirectional(new LSTM.Builder().nOut(5).build()); + l0 = Bidirectional.builder(LSTM.builder().nOut(5).build()).build(); + l2 = Bidirectional.builder(LSTM.builder().nOut(5).build()).build(); break; default: throw new RuntimeException("Not implemented: " + rnnType); @@ -155,13 +154,13 @@ public class TestTimeDistributed extends BaseDL4JTest { LayerConfiguration l1; switch (ffType){ case 0: - l1 = new DenseLayer.Builder().nOut(5).build(); + l1 = DenseLayer.builder().nOut(5).build(); break; case 1: - l1 = new VariationalAutoencoder.Builder().nOut(5).encoderLayerSizes(5).decoderLayerSizes(5).build(); + l1 = VariationalAutoencoder.builder().nOut(5).encoderLayerSizes(5).decoderLayerSizes(5).build(); break; case 2: - l1 = new AutoEncoder.Builder().nOut(5).build(); + l1 = AutoEncoder.builder().nOut(5).build(); break; default: throw new RuntimeException("Not implemented: " + ffType); @@ -185,8 +184,8 @@ public class TestTimeDistributed extends BaseDL4JTest { l0a = (BaseRecurrentLayer) ((Bidirectional) l0).getFwd(); l2a = (BaseRecurrentLayer) ((Bidirectional) l2).getFwd(); } - assertEquals(rnnDataFormat, l0a.getRnnDataFormat()); - assertEquals(rnnDataFormat, l2a.getRnnDataFormat()); + assertEquals(rnnDataFormat, l0a.getDataFormat()); + assertEquals(rnnDataFormat, l2a.getDataFormat()); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/SameDiffCustomLayerTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/SameDiffCustomLayerTests.java index 534af7bc2..daca128d4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/SameDiffCustomLayerTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/SameDiffCustomLayerTests.java @@ -76,7 +76,7 @@ public class SameDiffCustomLayerTests extends BaseDL4JTest { public void testInputValidationSameDiffLayer() { final NeuralNetConfiguration config = NeuralNetConfiguration.builder().list() .layer(new ValidatingSameDiffLayer()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(2).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(2).build()) .inputType(InputType.feedForward(2)) .build(); @@ -96,7 +96,7 @@ public class SameDiffCustomLayerTests extends BaseDL4JTest { public void testInputValidationSameDiffVertex(){ final ComputationGraphConfiguration config = NeuralNetConfiguration.builder().graphBuilder() .addVertex("a", new ValidatingSameDiffVertex(), "input") - .addLayer("output", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(2).build(), "a") + .addLayer("output", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(2).build(), "a") .addInputs("input") .setInputTypes(InputType.feedForward(2)) .setOutputs("output") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java index f0b23e335..44f216a86 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java @@ -67,7 +67,7 @@ public class TestSameDiffConv extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new SameDiffConv.Builder().nIn(nIn).nOut(nOut).kernelSize(kH, kW).build()) + .layer(SameDiffConv.builder().nIn(nIn).nOut(nOut).kernelSize(kH, kW).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -131,7 +131,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .dataType(DataType.DOUBLE) .seed(12345) .list() - .layer(new SameDiffConv.Builder() + .layer(SameDiffConv.builder() .weightInit(WeightInit.XAVIER) .nIn(nIn) .nOut(nOut) @@ -142,7 +142,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .activation(a) .hasBias(hasBias) .build()) - .layer(new SameDiffConv.Builder() + .layer(SameDiffConv.builder() .weightInit(WeightInit.XAVIER) .nIn(nOut) .nOut(nOut) @@ -165,7 +165,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .seed(12345) .list() - .layer(new ConvolutionLayer.Builder() + .layer(ConvolutionLayer.builder() .nIn(nIn) .nOut(nOut) .kernelSize(kernel) @@ -175,7 +175,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .activation(a) .hasBias(hasBias) .build()) - .layer(new ConvolutionLayer.Builder() + .layer(ConvolutionLayer.builder() .nIn(nOut) .nOut(nOut) .kernelSize(kernel) @@ -273,7 +273,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .list() - .layer(new SameDiffConv.Builder() + .layer(SameDiffConv.builder() .weightInit(WeightInit.XAVIER) .nIn(nIn) .nOut(nOut) @@ -284,7 +284,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .activation(Activation.TANH) .hasBias(hasBias) .build()) - .layer(new SameDiffConv.Builder() + .layer(SameDiffConv.builder() .weightInit(WeightInit.XAVIER) .nIn(nOut) .nOut(nOut) @@ -295,7 +295,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .activation(Activation.SIGMOID) .hasBias(hasBias) .build()) - .layer(new OutputLayer.Builder().activation(Activation.SOFTMAX) + .layer(OutputLayer.builder().activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(nOut * outH * outW) .nOut(nOut).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java index 93d9421c3..14a8d1fb6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java @@ -65,7 +65,7 @@ public class TestSameDiffDense extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut).build()) + .layer(SameDiffDense.builder().nIn(nIn).nOut(nOut).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -106,7 +106,7 @@ public class TestSameDiffDense extends BaseDL4JTest { .inferenceWorkspaceMode(wsm) .trainingWorkspaceMode(wsm) .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .layer(SameDiffDense.builder().nIn(nIn).nOut(nOut) .activation(a) .build()) .build(); @@ -118,7 +118,7 @@ public class TestSameDiffDense extends BaseDL4JTest { NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .list() - .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) + .layer(DenseLayer.builder().activation(a).nIn(nIn).nOut(nOut).build()) .build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); @@ -178,13 +178,13 @@ public class TestSameDiffDense extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .layer(SameDiffDense.builder().nIn(nIn).nOut(nOut) .weightInit(WeightInit.XAVIER) .activation(a).build()) - .layer(new SameDiffDense.Builder().nIn(nOut).nOut(nOut) + .layer(SameDiffDense.builder().nIn(nOut).nOut(nOut) .weightInit(WeightInit.XAVIER) .activation(a).build()) - .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut) + .layer(OutputLayer.builder().nIn(nOut).nOut(nOut) .weightInit(WeightInit.XAVIER) .activation(a).build()) .validateOutputLayerConfig(false) @@ -199,9 +199,9 @@ public class TestSameDiffDense extends BaseDL4JTest { .seed(12345) .weightInit(WeightInit.XAVIER) .list() - .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) - .layer(new DenseLayer.Builder().activation(a).nIn(nOut).nOut(nOut).build()) - .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut) + .layer(DenseLayer.builder().activation(a).nIn(nIn).nOut(nOut).build()) + .layer(DenseLayer.builder().activation(a).nIn(nOut).nOut(nOut).build()) + .layer(OutputLayer.builder().nIn(nOut).nOut(nOut) .activation(a).build()) .validateOutputLayerConfig(false) .build(); @@ -267,10 +267,10 @@ public class TestSameDiffDense extends BaseDL4JTest { .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .layer(SameDiffDense.builder().nIn(nIn).nOut(nOut) .activation(a) .build()) - .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) + .layer(OutputLayer.builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -279,8 +279,8 @@ public class TestSameDiffDense extends BaseDL4JTest { NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .list() - .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) - .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) + .layer(DenseLayer.builder().activation(a).nIn(nIn).nOut(nOut).build()) + .layer(OutputLayer.builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -357,9 +357,9 @@ public class TestSameDiffDense extends BaseDL4JTest { .inferenceWorkspaceMode(wsm) .updater(new Adam(0.1)) .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(5).activation(Activation.TANH).build()) - .layer(new SameDiffDense.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) - .layer(new OutputLayer.Builder().nIn(5).nOut(nOut).activation(Activation.SOFTMAX) + .layer(SameDiffDense.builder().nIn(nIn).nOut(5).activation(Activation.TANH).build()) + .layer(SameDiffDense.builder().nIn(5).nOut(5).activation(Activation.TANH).build()) + .layer(OutputLayer.builder().nIn(5).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -370,9 +370,9 @@ public class TestSameDiffDense extends BaseDL4JTest { .seed(12345) .updater(new Adam(0.1)) .list() - .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(nIn).nOut(5).build()) - .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(5).build()) - .layer(new OutputLayer.Builder().nIn(5).nOut(nOut).activation(Activation.SOFTMAX) + .layer(DenseLayer.builder().activation(Activation.TANH).nIn(nIn).nOut(5).build()) + .layer(DenseLayer.builder().activation(Activation.TANH).nIn(5).nOut(5).build()) + .layer(OutputLayer.builder().nIn(5).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -428,9 +428,9 @@ public class TestSameDiffDense extends BaseDL4JTest { .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut).activation(a).build()) - .layer(new SameDiffDense.Builder().nIn(nOut).nOut(nOut).activation(a).build()) - .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) + .layer(SameDiffDense.builder().nIn(nIn).nOut(nOut).activation(a).build()) + .layer(SameDiffDense.builder().nIn(nOut).nOut(nOut).activation(a).build()) + .layer(OutputLayer.builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) //.inputType(InputType.feedForward(nIn)) //TODO .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java index 5fd371d13..4a0f23c7b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDenseVertex.java @@ -74,7 +74,7 @@ public class TestSameDiffDenseVertex extends BaseDL4JTest { .addInputs("in") .addVertex("0", new SameDiffDenseVertex(nIn, nOut, a, WeightInit.XAVIER), "in") .addVertex("1", new SameDiffDenseVertex(nOut, nOut, a, WeightInit.XAVIER), "0") - .layer("2", new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) + .layer("2", OutputLayer.builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "1") .setOutputs("2") .build(); @@ -90,9 +90,9 @@ public class TestSameDiffDenseVertex extends BaseDL4JTest { .updater(new Sgd(0.0)) .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(nOut).activation(a).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(nOut).nOut(nOut).activation(a).build(), "0") - .layer("2", new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) + .addLayer("0", DenseLayer.builder().nIn(nIn).nOut(nOut).activation(a).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(nOut).nOut(nOut).activation(a).build(), "0") + .layer("2", OutputLayer.builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "1") .setOutputs("2") .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java index 1514a6709..a77c74d88 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java @@ -69,9 +69,9 @@ public class TestSameDiffLambda extends BaseDL4JTest { .updater(new Adam(0.01)) .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in") + .addLayer("0", DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in") .addLayer("1", new SameDiffSimpleLambdaLayer(), "0") - .addLayer("2", new OutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX) + .addLayer("2", OutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "1") .setOutputs("2") .build(); @@ -84,10 +84,10 @@ public class TestSameDiffLambda extends BaseDL4JTest { .updater(new Adam(0.01)) .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in") + .addLayer("0", DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in") .addVertex("1", new ShiftVertex(1.0), "0") .addVertex("2", new ScaleVertex(2.0), "1") - .addLayer("3", new OutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX) + .addLayer("3", OutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "2") .setOutputs("3") .build(); @@ -151,10 +151,10 @@ public class TestSameDiffLambda extends BaseDL4JTest { .updater(new Adam(0.01)) .graphBuilder() .addInputs("in1", "in2") - .addLayer("0", new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in1") - .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in2") + .addLayer("0", DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in1") + .addLayer("1", DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in2") .addVertex("lambda", new SameDiffSimpleLambdaVertex(), "0", "1") - .addLayer("2", new OutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX) + .addLayer("2", OutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "lambda") .setOutputs("2") .build(); @@ -168,10 +168,10 @@ public class TestSameDiffLambda extends BaseDL4JTest { .updater(new Adam(0.01)) .graphBuilder() .addInputs("in1", "in2") - .addLayer("0", new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in1") - .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in2") + .addLayer("0", DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in1") + .addLayer("1", DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build(), "in2") .addVertex("elementwise", new ElementWiseVertex(ElementWiseVertex.Op.Product), "0", "1") - .addLayer("3", new OutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX) + .addLayer("3", OutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "elementwise") .setOutputs("3") .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java index 0a3d2f915..2dc3df4e0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java @@ -51,7 +51,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { .seed(12345) .updater(new Adam(0.01)) .list() - .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) + .layer(DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new SameDiffMSELossLayer()) .build(); @@ -59,8 +59,8 @@ public class TestSameDiffOutput extends BaseDL4JTest { .seed(12345) .updater(new Adam(0.01)) .list() - .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) - .layer(new LossLayer.Builder().activation(Activation.IDENTITY).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build()) + .layer(LossLayer.builder().activation(Activation.IDENTITY).lossFunction(LossFunctions.LossFunction.MSE.getILossFunction()).build()) .build(); MultiLayerNetwork netSD = new MultiLayerNetwork(confSD); @@ -113,7 +113,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { .seed(12345) .updater(new Adam(0.01)) .list() - .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) + .layer(DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new SameDiffMSEOutputLayer(5, 5, a, WeightInit.XAVIER)) .build(); @@ -121,8 +121,8 @@ public class TestSameDiffOutput extends BaseDL4JTest { .seed(12345) .updater(new Adam(0.01)) .list() - .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) - .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(a).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build()) + .layer(OutputLayer.builder().nIn(5).nOut(5).activation(a).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork netSD = new MultiLayerNetwork(confSD); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java index f8a2f173b..35f87c35a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.layers.samediff.testlayers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -45,64 +46,74 @@ import java.util.*; @Data @EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties({"paramShapes"}) +@NoArgsConstructor +@SuperBuilder public class SameDiffConv extends SameDiffLayer { + public static abstract class SameDiffConvBuilder> extends + SameDiffLayerBuilder { + public B kernelSize(int... k) { + this.kernelSize$value = k; + this.kernelSize$set = true; + return self(); + } + + public B stride(int... s) { + this.stride$value = s; + this.stride$set = true; + return self(); + } + + public B padding(int... p) { + this.padding$value = p; + this.padding$set = true; + return self(); + } + } private static final List WEIGHT_KEYS = Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); private static final List BIAS_KEYS = Collections.singletonList(ConvolutionParamInitializer.BIAS_KEY); //Order to match 'vanilla' conv layer implementation, for easy comparison private static final List PARAM_KEYS = Arrays.asList(ConvolutionParamInitializer.BIAS_KEY, ConvolutionParamInitializer.WEIGHT_KEY); - private long nIn; - private long nOut; - private Activation activation; - private int[] kernel; - private int[] stride; - private int[] padding; - private ConvolutionMode cm; - private int[] dilation; - private boolean hasBias; - protected SameDiffConv(Builder b) { - super(b); - this.nIn = b.nIn; - this.nOut = b.nOut; - this.activation = b.activation; - this.kernel = b.kernel; - this.stride = b.stride; - this.padding = b.padding; - this.cm = b.cm; - this.dilation = b.dilation; - this.hasBias = b.hasBias; - } + private int nIn; + private int nOut; + @Builder.Default private Activation activation = Activation.TANH; + @Builder.Default private int[] kernelSize = new int[]{2, 2}; + + @Builder.Default private int[] stride = new int[]{1, 1}; + @Builder.Default private int[] padding = new int[]{0, 0}; + @Builder.Default private int[] dilation = new int[]{1, 1}; + @Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Same; + @Builder.Default private boolean hasBias = true; + + - private SameDiffConv(){ - //No arg constructor for Jackson/JSON serialization - } @Override public InputType getOutputType(int layerIndex, InputType inputType) { InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, padding, new int[]{1, 1}, - cm, nOut, layerIndex, getLayerName(), SameDiffConv.class); + return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, new int[]{1, 1}, + convolutionMode, nOut, layerIndex, getName(), SameDiffConv.class); } @Override public void setNIn(InputType inputType, boolean override) { if (nIn <= 0 || override) { InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - this.nIn = c.getChannels(); + this.nIn = (int) c.getChannels(); } } @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); } @Override public void defineParameters(SDLayerParams params) { params.clear(); - val weightsShape = new long[]{kernel[0], kernel[1], nIn, nOut}; //[kH, kW, iC, oC] in libnd4j + val weightsShape = new long[]{kernelSize[0], kernelSize[1], nIn, nOut}; //[kH, kW, iC, oC] in libnd4j params.addWeightParam(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); if(hasBias) { val biasShape = new long[]{1, nOut}; @@ -113,8 +124,8 @@ public class SameDiffConv extends SameDiffLayer { @Override public void initializeParameters(Map params) { try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - double fanIn = nIn * kernel[0] * kernel[1]; - double fanOut = nOut * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]); + double fanIn = nIn * kernelSize[0] * kernelSize[1]; + double fanOut = nOut * kernelSize[0] * kernelSize[1] / ((double) stride[0] * stride[1]); for (Map.Entry e : params.entrySet()) { if(paramWeightInit != null && paramWeightInit.containsKey(e.getKey())){ paramWeightInit.get(e.getKey()).init(fanIn, fanOut, e.getValue().shape(), 'c', e.getValue()); @@ -135,11 +146,11 @@ public class SameDiffConv extends SameDiffLayer { SDVariable w = paramTable.get(ConvolutionParamInitializer.WEIGHT_KEY); Conv2DConfig c = Conv2DConfig.builder() - .kH(kernel[0]).kW(kernel[1]) + .kH(kernelSize[0]).kW(kernelSize[1]) .pH(padding[0]).pW(padding[1]) .sH(stride[0]).sW(stride[1]) .dH(dilation[0]).dW(dilation[1]) - .isSameMode(this.cm == ConvolutionMode.Same) + .isSameMode(this.convolutionMode == ConvolutionMode.Same) .build(); SDVariable conv = null; @@ -159,72 +170,10 @@ public class SameDiffConv extends SameDiffLayer { if (activation == null) { activation = SameDiffLayerUtils.fromIActivation(clone.getActivation()); } - if (cm == null) { - cm = clone.getConvolutionMode(); + if (convolutionMode == null) { + convolutionMode = clone.getConvolutionMode(); } } - public static class Builder extends SameDiffLayer.Builder { - private int nIn; - private int nOut; - private Activation activation = Activation.TANH; - private int[] kernel = new int[]{2, 2}; - - private int[] stride = new int[]{1, 1}; - private int[] padding = new int[]{0, 0}; - private int[] dilation = new int[]{1, 1}; - private ConvolutionMode cm = ConvolutionMode.Same; - private boolean hasBias = true; - - public Builder nIn(int nIn) { - this.nIn = nIn; - return this; - } - - public Builder nOut(int nOut) { - this.nOut = nOut; - return this; - } - - public Builder activation(Activation activation) { - this.activation = activation; - return this; - } - - public Builder kernelSize(int... k) { - this.kernel = k; - return this; - } - - public Builder stride(int... s) { - this.stride = s; - return this; - } - - public Builder padding(int... p) { - this.padding = p; - return this; - } - - public Builder convolutionMode(ConvolutionMode cm) { - this.cm = cm; - return this; - } - - public Builder dilation(int... d) { - this.dilation = d; - return this; - } - - public Builder hasBias(boolean hasBias){ - this.hasBias = hasBias; - return this; - } - - @Override - public SameDiffConv build() { - return new SameDiffConv(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java index e1799443d..7f3e41915 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java @@ -20,13 +20,17 @@ package org.deeplearning4j.nn.layers.samediff.testlayers; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.*; import lombok.Data; import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer; import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; +import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.weights.WeightInitUtil; @@ -34,125 +38,93 @@ import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -import java.util.*; @Data -@EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"}) +@EqualsAndHashCode( + callSuper = true, + exclude = {"paramShapes"}) +@NoArgsConstructor() @JsonIgnoreProperties("paramShapes") +@SuperBuilder public class SameDiffDense extends SameDiffLayer { - private static final List W_KEYS = Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY); - private static final List B_KEYS = Collections.singletonList(DefaultParamInitializer.BIAS_KEY); - private static final List PARAM_KEYS = Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY); + private static final List W_KEYS = + Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY); + private static final List B_KEYS = + Collections.singletonList(DefaultParamInitializer.BIAS_KEY); + private static final List PARAM_KEYS = + Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY); - private Map paramShapes; + private final Map paramShapes = new HashMap<>(); - private long nIn; - private long nOut; - private Activation activation; + private long nIn; + private long nOut; + private Activation activation; - protected SameDiffDense(Builder builder) { - super(builder); + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + return null; + } - nIn = builder.nIn; - nOut = builder.nOut; - activation = builder.activation; + @Override + public void setNIn(InputType inputType, boolean override) { + if (override) { + this.nIn = ((InputType.InputTypeFeedForward) inputType).getSize(); } + } - private SameDiffDense(){ - //No op constructor for Jackson - } + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return null; + } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - return null; - } + @Override + public void defineParameters(SDLayerParams params) { + params.clear(); + params.addWeightParam(DefaultParamInitializer.WEIGHT_KEY, nIn, nOut); + params.addBiasParam(DefaultParamInitializer.BIAS_KEY, 1, nOut); + } - @Override - public void setNIn(InputType inputType, boolean override) { - if(override){ - this.nIn = ((InputType.InputTypeFeedForward)inputType).getSize(); + @Override + public void initializeParameters(Map params) { + for (Map.Entry e : params.entrySet()) { + if (paramWeightInit != null && paramWeightInit.containsKey(e.getKey())) { + paramWeightInit.get(e.getKey()).init(nIn, nOut, e.getValue().shape(), 'c', e.getValue()); + } else { + if (DefaultParamInitializer.BIAS_KEY.equals(e.getKey())) { + e.getValue().assign(0.0); + } else { + // Normally use 'c' order, but use 'f' for direct comparison to DL4J + // DenseLayerConfiguration + WeightInitUtil.initWeights( + nIn, nOut, new long[] {nIn, nOut}, weightInit, null, 'f', e.getValue()); } + } } + } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return null; + @Override + public SDVariable defineLayer( + SameDiff sd, SDVariable layerInput, Map paramTable, SDVariable mask) { + SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY); + SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY); + + SDVariable mmul = sd.mmul("mmul", layerInput, weights); + SDVariable z = mmul.add("z", bias); + return activation.asSameDiff("out", sd, z); + } + + @Override + public void applyGlobalConfigToLayer( + NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { + NeuralNetConfiguration clone = globalConfig.clone().build(); + if (activation == null) { + activation = SameDiffLayerUtils.fromIActivation(clone.getActivation()); } + } - @Override - public void defineParameters(SDLayerParams params) { - params.clear(); - params.addWeightParam(DefaultParamInitializer.WEIGHT_KEY, nIn, nOut); - params.addBiasParam(DefaultParamInitializer.BIAS_KEY, 1, nOut); - } - - @Override - public void initializeParameters(Map params){ - for(Map.Entry e : params.entrySet()){ - if(paramWeightInit != null && paramWeightInit.containsKey(e.getKey())){ - paramWeightInit.get(e.getKey()).init(nIn, nOut, e.getValue().shape(), 'c', e.getValue()); - } else { - if(DefaultParamInitializer.BIAS_KEY.equals(e.getKey())){ - e.getValue().assign(0.0); - } else { - //Normally use 'c' order, but use 'f' for direct comparison to DL4J DenseLayerConfiguration - WeightInitUtil.initWeights(nIn, nOut, new long[]{nIn, nOut}, weightInit, null, 'f', e.getValue()); - } - } - } - } - - @Override - public SDVariable defineLayer(SameDiff sd, SDVariable layerInput, Map paramTable, SDVariable mask) { - SDVariable weights = paramTable.get(DefaultParamInitializer.WEIGHT_KEY); - SDVariable bias = paramTable.get(DefaultParamInitializer.BIAS_KEY); - - SDVariable mmul = sd.mmul("mmul", layerInput, weights); - SDVariable z = mmul.add("z", bias); - return activation.asSameDiff("out", sd, z); - } - - @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { - NeuralNetConfiguration clone = globalConfig.clone().build(); - if(activation == null){ - activation = SameDiffLayerUtils.fromIActivation(clone.getActivation()); - } - } - - public char paramReshapeOrder(String param){ - //To match DL4J for easy comparison - return 'f'; - } - - public static class Builder extends SameDiffLayer.Builder { - - private int nIn; - private int nOut; - - private Activation activation; - - public Builder nIn(int nIn){ - this.nIn = nIn; - return this; - } - - public Builder nOut(int nOut){ - this.nOut = nOut; - return this; - } - - public Builder activation(Activation activation){ - this.activation = activation; - return this; - } - - @Override - public SameDiffDense build() { - return new SameDiffDense(this); - } - } + public char paramReshapeOrder(String param) { + // To match DL4J for easy comparison + return 'f'; + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffMSEOutputLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffMSEOutputLayer.java index a93db0e56..24a2b01cd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffMSEOutputLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffMSEOutputLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.samediff.testlayers; +import java.util.Map; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; @@ -31,62 +32,63 @@ import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Map; - public class SameDiffMSEOutputLayer extends SameDiffOutputLayer { - private final int nIn; - private final int nOut; - private final Activation activation; - private final WeightInit weightInit; + private final int nIn; + private final int nOut; + private final Activation activation; + private final WeightInit weightInit; - public SameDiffMSEOutputLayer(int nIn, int nOut, Activation activation, WeightInit weightInit){ - this.nIn = nIn; - this.nOut = nOut; - this.activation = activation; - this.weightInit = weightInit; - } + public SameDiffMSEOutputLayer(int nIn, int nOut, Activation activation, WeightInit weightInit) { + this.nIn = nIn; + this.nOut = nOut; + this.activation = activation; + this.weightInit = weightInit; + } - @Override - public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput, SDVariable labels, Map paramTable) { - SDVariable z = sameDiff.mmul(layerInput, paramTable.get("W")).add(paramTable.get("b")); - SDVariable out = activation.asSameDiff("out", sameDiff, z); - //MSE: 1/nOut * (input-labels)^2 - SDVariable diff = out.sub(labels); - return diff.mul(diff).mean(1).sum(); - } + @Override + public SDVariable defineLayer( + SameDiff sameDiff, + SDVariable layerInput, + SDVariable labels, + Map paramTable) { + SDVariable z = sameDiff.mmul(layerInput, paramTable.get("W")).add(paramTable.get("b")); + SDVariable out = activation.asSameDiff("out", sameDiff, z); + // MSE: 1/nOut * (input-labels)^2 + SDVariable diff = out.sub(labels); + return diff.mul(diff).mean(1).sum(); + } - @Override - public String activationsVertexName() { - return "out"; - } + @Override + public String activationsVertexName() { + return "out"; + } - @Override - public void defineParameters(SDLayerParams params) { - params.addWeightParam("W", nIn, nOut); - params.addBiasParam("b", 1, nOut); - } + @Override + public void defineParameters(SDLayerParams params) { + params.addWeightParam("W", nIn, nOut); + params.addBiasParam("b", 1, nOut); + } - @Override - public void initializeParameters(Map params) { - WeightInitUtil.initWeights(nIn, nOut, new long[]{nIn, nOut}, weightInit, null, 'f', params.get("W")); - params.get("b").assign(0.0); - } + @Override + public void initializeParameters(Map params) { + WeightInitUtil.initWeights( + nIn, nOut, new long[] {nIn, nOut}, weightInit, null, 'f', params.get("W")); + params.get("b").assign(0.0); + } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - return InputType.feedForward(nOut); - } + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + return InputType.feedForward(nOut); + } - @Override - public char paramReshapeOrder(String param){ - //To match DL4J for easy comparison - return 'f'; - } - - @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig){ - - } + @Override + public char paramReshapeOrder(String param) { + // To match DL4J for easy comparison + return 'f'; + } + @Override + public void applyGlobalConfigToLayer( + NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) {} } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java index b7c89e007..6e16cacc2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java @@ -58,7 +58,7 @@ public class TestVAE extends BaseDL4JTest { NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .nIn(10).nOut(5).encoderLayerSizes(12).decoderLayerSizes(13) .build()) .build(); @@ -95,7 +95,7 @@ public class TestVAE extends BaseDL4JTest { for (int i = 0; i < encLayerSizes.length; i++) { NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list().layer(0, - new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().nIn(10) + org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder().nIn(10) .nOut(5).encoderLayerSizes(encLayerSizes[i]).decoderLayerSizes(13).build()) .build(); @@ -121,7 +121,7 @@ public class TestVAE extends BaseDL4JTest { int inputSize = 3; NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .nIn(inputSize).nOut(4).encoderLayerSizes(5).decoderLayerSizes(6).build()) .build(); @@ -159,7 +159,7 @@ public class TestVAE extends BaseDL4JTest { public void testParamGradientOrderAndViews() { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .nIn(10).nOut(5).encoderLayerSizes(12, 13).decoderLayerSizes(14, 15).build()) .build(); @@ -217,9 +217,9 @@ public class TestVAE extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().seed(12345).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .nIn(10).nOut(5).encoderLayerSizes(12, 13).decoderLayerSizes(14, 15).build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5).nOut(6) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5).nOut(6) .activation(new ActivationTanH()).build()) .build(); @@ -269,22 +269,22 @@ public class TestVAE extends BaseDL4JTest { public void testJsonYaml() { NeuralNetConfiguration config = NeuralNetConfiguration.builder().seed(12345).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new GaussianReconstructionDistribution(Activation.IDENTITY)) .nIn(3).nOut(4).encoderLayerSizes(5).decoderLayerSizes(6).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(1, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new GaussianReconstructionDistribution(Activation.TANH)) .nIn(7).nOut(8).encoderLayerSizes(9).decoderLayerSizes(10).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(2, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new BernoulliReconstructionDistribution()).nIn(11) .nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(3, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new ExponentialReconstructionDistribution(Activation.TANH)) .nIn(11).nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()) - .layer(4, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(4, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .lossFunction(new ActivationTanH(), LossFunctions.LossFunction.MSE).nIn(11) .nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()) - .layer(5, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(5, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new CompositeReconstructionDistribution.Builder() .addDistribution(5, new GaussianReconstructionDistribution()) .addDistribution(5, @@ -292,7 +292,7 @@ public class TestVAE extends BaseDL4JTest { .addDistribution(5, new BernoulliReconstructionDistribution()) .build()) .nIn(15).nOut(16).encoderLayerSizes(17).decoderLayerSizes(18).build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(18) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(18) .nOut(19).activation(new ActivationTanH()).build()) .build(); @@ -354,7 +354,7 @@ public class TestVAE extends BaseDL4JTest { .updater(new Sgd(1.0)) .seed(12345L).dist(new NormalDistribution(0, 1)) .list().layer(0, - new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3) + VariationalAutoencoder.builder().nIn(inOutSize).nOut(3) .encoderLayerSizes(5).decoderLayerSizes(6) .pzxActivationFunction(Activation.TANH) .reconstructionDistribution( @@ -420,7 +420,7 @@ public class TestVAE extends BaseDL4JTest { .updater(new Sgd(1.0)) .seed(12345L).dist(new NormalDistribution(0, 1)) .list().layer(0, - new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3) + VariationalAutoencoder.builder().nIn(inOutSize).nOut(3) .encoderLayerSizes(5).decoderLayerSizes(6) .pzxActivationFunction(Activation.TANH) .reconstructionDistribution( @@ -462,7 +462,7 @@ public class TestVAE extends BaseDL4JTest { .inferenceWorkspaceMode(ws ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .weightNoise(new WeightNoise(new org.deeplearning4j.nn.conf.distribution.NormalDistribution(0.1, 0.3))) .list().layer(0, - new VariationalAutoencoder.Builder().nIn(10).nOut(3) + VariationalAutoencoder.builder().nIn(10).nOut(3) .encoderLayerSizes(5).decoderLayerSizes(6) .pzxActivationFunction(Activation.TANH) .reconstructionDistribution(new GaussianReconstructionDistribution()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java index 5ed2a9c2b..50f423a4c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/CloseNetworkTests.java @@ -42,11 +42,11 @@ public class CloseNetworkTests extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-3)) - .layer(new ConvolutionLayer.Builder().nOut(5).kernelSize(3, 3).activation(Activation.TANH).build()) - .layer(new BatchNormalization.Builder().nOut(5).build()) - .layer(new SubsamplingLayer.Builder().build()) - .layer(new DenseLayer.Builder().nOut(10).activation(Activation.RELU).build()) - .layer(new OutputLayer.Builder().nOut(10).build()) + .layer(ConvolutionLayer.builder().nOut(5).kernelSize(3, 3).activation(Activation.TANH).build()) + .layer(BatchNormalization.builder().nOut(5).build()) + .layer(SubsamplingLayer.builder().build()) + .layer(DenseLayer.builder().nOut(10).activation(Activation.RELU).build()) + .layer(OutputLayer.builder().nOut(10).build()) .inputType(InputType.convolutional(28, 28, 1)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java index 052e1fa07..2c36549ee 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/LargeNetTest.java @@ -50,8 +50,8 @@ public class LargeNetTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new EmbeddingLayer.Builder().nIn(10_000_000).nOut(300).build()) - .layer(new OutputLayer.Builder().nIn(300).nOut(10).activation(Activation.SOFTMAX).build()) + .layer(EmbeddingLayer.builder().nIn(10_000_000).nOut(300).build()) + .layer(OutputLayer.builder().nIn(300).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -83,8 +83,8 @@ public class LargeNetTest extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("0", new EmbeddingLayer.Builder().nIn(10_000_000).nOut(300).build(), "in") - .layer("1", new OutputLayer.Builder().nIn(300).nOut(10).activation(Activation.SOFTMAX).build(), "0") + .layer("0", EmbeddingLayer.builder().nIn(10_000_000).nOut(300).build(), "in") + .layer("1", OutputLayer.builder().nIn(300).nOut(10).activation(Activation.SOFTMAX).build(), "0") .setOutputs("1") .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java index 69099f0a0..a8b9982b4 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestLrChanges.java @@ -51,9 +51,9 @@ public class TestLrChanges extends BaseDL4JTest { .activation(Activation.TANH) .seed(12345) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam(0.1)).build()) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp(0.01)).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).updater(new Adam(0.1)).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).updater(new RmsProp(0.01)).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -68,9 +68,9 @@ public class TestLrChanges extends BaseDL4JTest { .activation(Activation.TANH) .seed(12345) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam(0.5)).build()) //0.5 LR - .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp(0.01)).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).updater(new Adam(0.5)).build()) //0.5 LR + .layer(DenseLayer.builder().nIn(10).nOut(10).updater(new RmsProp(0.01)).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); @@ -118,9 +118,9 @@ public class TestLrChanges extends BaseDL4JTest { .activation(Activation.TANH) .seed(12345) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam(0.3)).build()) //0.5 LR - .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp(0.3)).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).updater(new Adam(0.3)).build()) //0.5 LR + .layer(DenseLayer.builder().nIn(10).nOut(10).updater(new RmsProp(0.3)).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net3 = new MultiLayerNetwork(conf3); net3.init(); @@ -151,9 +151,9 @@ public class TestLrChanges extends BaseDL4JTest { .seed(12345) .updater(new Sgd(0.1)) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -180,9 +180,9 @@ public class TestLrChanges extends BaseDL4JTest { .seed(12345) .updater(new Adam(0.1)) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -198,9 +198,9 @@ public class TestLrChanges extends BaseDL4JTest { .seed(12345) .updater(new Adam(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 ))) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); @@ -242,9 +242,9 @@ public class TestLrChanges extends BaseDL4JTest { .seed(12345) .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam(0.1)).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp(0.01)).build(), "0") - .addLayer("2", new OutputLayer.Builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build(), "1") + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).updater(new Adam(0.1)).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(10).nOut(10).updater(new RmsProp(0.01)).build(), "0") + .addLayer("2", OutputLayer.builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build(), "1") .setOutputs("2") .build(); @@ -261,9 +261,9 @@ public class TestLrChanges extends BaseDL4JTest { .seed(12345) .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam(0.5)).build(), "in") //0.5 LR - .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp(0.01)).build(), "0") - .addLayer("2", new OutputLayer.Builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build(), "1") + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).updater(new Adam(0.5)).build(), "in") //0.5 LR + .addLayer("1", DenseLayer.builder().nIn(10).nOut(10).updater(new RmsProp(0.01)).build(), "0") + .addLayer("2", OutputLayer.builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build(), "1") .setOutputs("2") .build(); ComputationGraph net2 = new ComputationGraph(conf2); @@ -312,9 +312,9 @@ public class TestLrChanges extends BaseDL4JTest { .activation(Activation.TANH) .seed(12345) .list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new Adam(0.3)).build()) //0.5 LR - .layer(new DenseLayer.Builder().nIn(10).nOut(10).updater(new RmsProp(0.3)).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).updater(new Adam(0.3)).build()) //0.5 LR + .layer(DenseLayer.builder().nIn(10).nOut(10).updater(new RmsProp(0.3)).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).updater(new NoOp()).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net3 = new MultiLayerNetwork(conf3); net3.init(); @@ -346,9 +346,9 @@ public class TestLrChanges extends BaseDL4JTest { .updater(new Adam(0.1)) .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).build(), "0") - .addLayer("2", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(10).nOut(10).build(), "0") + .addLayer("2", OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") .setOutputs("2") .build(); @@ -366,9 +366,9 @@ public class TestLrChanges extends BaseDL4JTest { .updater(new Adam(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 ))) .graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).build(), "0") - .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(10).nOut(10).build(), "0") + .layer("2", OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") .setOutputs("2") .build(); ComputationGraph net2 = new ComputationGraph(conf2); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java index b22bfec2f..2909a7521 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java @@ -54,18 +54,18 @@ public class TestMemoryReports extends BaseDL4JTest { public static List> getTestLayers() { List> l = new ArrayList<>(); - l.add(new Pair<>(new ActivationLayer.Builder().activation(Activation.TANH).build(), InputType.feedForward(20))); - l.add(new Pair<>(new DenseLayer.Builder().nIn(20).nOut(20).build(), InputType.feedForward(20))); - l.add(new Pair<>(new DropoutLayer.Builder().nIn(20).nOut(20).build(), InputType.feedForward(20))); - l.add(new Pair<>(new EmbeddingLayer.Builder().nIn(1).nOut(20).build(), InputType.feedForward(20))); - l.add(new Pair<>(new OutputLayer.Builder().nIn(20).nOut(20).build(), InputType.feedForward(20))); - l.add(new Pair<>(new LossLayer.Builder().build(), InputType.feedForward(20))); + l.add(new Pair<>(ActivationLayer.builder().activation(Activation.TANH).build(), InputType.feedForward(20))); + l.add(new Pair<>(DenseLayer.builder().nIn(20).nOut(20).build(), InputType.feedForward(20))); + l.add(new Pair<>(DropoutLayer.builder().nIn(20).nOut(20).build(), InputType.feedForward(20))); + l.add(new Pair<>(EmbeddingLayer.builder().nIn(1).nOut(20).build(), InputType.feedForward(20))); + l.add(new Pair<>(OutputLayer.builder().nIn(20).nOut(20).build(), InputType.feedForward(20))); + l.add(new Pair<>(LossLayer.builder().build(), InputType.feedForward(20))); //RNN layers: - l.add(new Pair<>(new GravesLSTM.Builder().nIn(20).nOut(20).build(), InputType.recurrent(20, 30))); - l.add(new Pair<>(new LSTM.Builder().nIn(20).nOut(20).build(), InputType.recurrent(20, 30))); - l.add(new Pair<>(new GravesBidirectionalLSTM.Builder().nIn(20).nOut(20).build(), InputType.recurrent(20, 30))); - l.add(new Pair<>(new RnnOutputLayer.Builder().nIn(20).nOut(20).build(), InputType.recurrent(20, 30))); + l.add(new Pair<>(GravesLSTM.builder().nIn(20).nOut(20).build(), InputType.recurrent(20, 30))); + l.add(new Pair<>(LSTM.builder().nIn(20).nOut(20).build(), InputType.recurrent(20, 30))); + l.add(new Pair<>(GravesBidirectionalLSTM.builder().nIn(20).nOut(20).build(), InputType.recurrent(20, 30))); + l.add(new Pair<>(RnnOutputLayer.builder().nIn(20).nOut(20).build(), InputType.recurrent(20, 30))); return l; } @@ -216,8 +216,8 @@ public class TestMemoryReports extends BaseDL4JTest { public void validateSimple() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(20).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(27).build()).build(); + .layer(0, DenseLayer.builder().nIn(10).nOut(20).build()) + .layer(1, DenseLayer.builder().nIn(20).nOut(27).build()).build(); MemoryReport mr = conf.getMemoryReport(InputType.feedForward(10)); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java index 01278db4e..6c6c3d853 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestNetConversion.java @@ -96,10 +96,10 @@ public class TestNetConversion extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .updater(new Sgd(0.1)) - .layer(new ConvolutionLayer.Builder().nIn(3).nOut(5).kernelSize(2, 2).stride(1, 1).build()) - .layer(new SubsamplingLayer.Builder().kernelSize(2, 2).stride(1, 1).build()) - .layer(new DenseLayer.Builder().nOut(32).build()) - .layer(new OutputLayer.Builder().nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(ConvolutionLayer.builder().nIn(3).nOut(5).kernelSize(2, 2).stride(1, 1).build()) + .layer(SubsamplingLayer.builder().kernelSize(2, 2).stride(1, 1).build()) + .layer(DenseLayer.builder().nOut(32).build()) + .layer(OutputLayer.builder().nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) .inputType(InputType.convolutional(10, 10, 3)) .build(); @@ -126,9 +126,9 @@ public class TestNetConversion extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .updater(new Sgd(0.1)) - .layer(new GravesLSTM.Builder().nOut(8).build()) - .layer(new LSTM.Builder().nOut(8).build()) - .layer(new RnnOutputLayer.Builder().nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(GravesLSTM.builder().nOut(8).build()) + .layer(LSTM.builder().nOut(8).build()) + .layer(RnnOutputLayer.builder().nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) .inputType(InputType.recurrent(5)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java index 904dd845b..becd89ab2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java @@ -95,9 +95,9 @@ public class WorkspaceTests extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L) - .layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, 2) + .layer(0, ConvolutionLayer.builder().nIn(depthIn).nOut(depthOut).kernelSize(2, 2) .stride(1, 1).activation(Activation.TANH).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) .inputType(InputType.convolutional(5, 5, 2)) .build(); @@ -123,11 +123,11 @@ public class WorkspaceTests extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .addLayer("0", new ConvolutionLayer.Builder().nOut(3) + .addLayer("0", ConvolutionLayer.builder().nOut(3) .kernelSize(2, 2).stride(2, 2).build(), "in") - .addLayer("1", new ConvolutionLayer.Builder().nOut(3) + .addLayer("1", ConvolutionLayer.builder().nOut(3) .kernelSize(2, 2).stride(2, 2).build(), "0") - .addLayer("out", new OutputLayer.Builder().nOut(10) + .addLayer("out", OutputLayer.builder().nOut(10) .activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE) .build(), "1") .setOutputs("out") @@ -154,10 +154,10 @@ public class WorkspaceTests extends BaseDL4JTest { .inferenceWorkspaceMode(wm) .graphBuilder() .addInputs("in") - .addLayer("e", new GravesLSTM.Builder().nIn(10).nOut(5).build(), new DupPreProcessor(), "in") -// .addLayer("e", new GravesLSTM.Builder().nIn(10).nOut(5).build(), "in") //Note that no preprocessor is OK - .addLayer("rnn", new GravesLSTM.Builder().nIn(5).nOut(8).build(), "e") - .addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE) + .addLayer("e", GravesLSTM.builder().nIn(10).nOut(5).build(), new DupPreProcessor(), "in") +// .addLayer("e", GravesLSTM.builder().nIn(10).nOut(5).build(), "in") //Note that no preprocessor is OK + .addLayer("rnn", GravesLSTM.builder().nIn(5).nOut(8).build(), "e") + .addLayer("out", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.SIGMOID).nOut(3).build(), "rnn") .setInputTypes(InputType.recurrent(10)) .setOutputs("out") @@ -188,9 +188,9 @@ public class WorkspaceTests extends BaseDL4JTest { .trainingWorkspaceMode(wm) .inferenceWorkspaceMode(wm) - .layer(new GravesLSTM.Builder().nIn(10).nOut(5).build()) - .layer(new GravesLSTM.Builder().nIn(5).nOut(8).build()) - .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(3).build()) + .layer(GravesLSTM.builder().nIn(10).nOut(5).build()) + .layer(GravesLSTM.builder().nIn(5).nOut(8).build()) + .layer(RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(3).build()) .inputPreProcessor(0, new DupPreProcessor()) .inputType(InputType.recurrent(10)) .build(); @@ -264,32 +264,32 @@ public class WorkspaceTests extends BaseDL4JTest { switch (i) { case 0: - b.layer(new SimpleRnn.Builder().nIn(10).nOut(10).build()); - b.layer(new SimpleRnn.Builder().nIn(10).nOut(10).build()); + b.layer(SimpleRnn.builder().nIn(10).nOut(10).build()); + b.layer(SimpleRnn.builder().nIn(10).nOut(10).build()); - gb.addLayer("0", new SimpleRnn.Builder().nIn(10).nOut(10).build(), "in"); - gb.addLayer("1", new SimpleRnn.Builder().nIn(10).nOut(10).build(), "0"); + gb.addLayer("0", SimpleRnn.builder().nIn(10).nOut(10).build(), "in"); + gb.addLayer("1", SimpleRnn.builder().nIn(10).nOut(10).build(), "0"); break; case 1: - b.layer(new LSTM.Builder().nIn(10).nOut(10).build()); - b.layer(new LSTM.Builder().nIn(10).nOut(10).build()); + b.layer(LSTM.builder().nIn(10).nOut(10).build()); + b.layer(LSTM.builder().nIn(10).nOut(10).build()); - gb.addLayer("0", new LSTM.Builder().nIn(10).nOut(10).build(), "in"); - gb.addLayer("1", new LSTM.Builder().nIn(10).nOut(10).build(), "0"); + gb.addLayer("0", LSTM.builder().nIn(10).nOut(10).build(), "in"); + gb.addLayer("1", LSTM.builder().nIn(10).nOut(10).build(), "0"); break; case 2: - b.layer(new GravesLSTM.Builder().nIn(10).nOut(10).build()); - b.layer(new GravesLSTM.Builder().nIn(10).nOut(10).build()); + b.layer(GravesLSTM.builder().nIn(10).nOut(10).build()); + b.layer(GravesLSTM.builder().nIn(10).nOut(10).build()); - gb.addLayer("0", new GravesLSTM.Builder().nIn(10).nOut(10).build(), "in"); - gb.addLayer("1", new GravesLSTM.Builder().nIn(10).nOut(10).build(), "0"); + gb.addLayer("0", GravesLSTM.builder().nIn(10).nOut(10).build(), "in"); + gb.addLayer("1", GravesLSTM.builder().nIn(10).nOut(10).build(), "0"); break; default: throw new RuntimeException(); } - b.layer(new RnnOutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()); - gb.addLayer("out", new RnnOutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1"); + b.layer(RnnOutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()); + gb.addLayer("out", RnnOutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1"); gb.setOutputs("out"); NeuralNetConfiguration conf = b.build(); @@ -325,7 +325,7 @@ public class WorkspaceTests extends BaseDL4JTest { .activation(Activation.TANH) .inferenceWorkspaceMode(ws) .trainingWorkspaceMode(ws) - .list(); + ; ComputationGraphConfiguration.GraphBuilder gb = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) @@ -337,36 +337,36 @@ public class WorkspaceTests extends BaseDL4JTest { switch (i) { case 0: - b.layer(new SimpleRnn.Builder().nIn(10).nOut(10).build()); - b.layer(new SimpleRnn.Builder().nIn(10).nOut(10).build()); + b.layer(SimpleRnn.builder().nIn(10).nOut(10).build()); + b.layer(SimpleRnn.builder().nIn(10).nOut(10).build()); - gb.addLayer("0", new SimpleRnn.Builder().nIn(10).nOut(10).build(), "in"); - gb.addLayer("1", new SimpleRnn.Builder().nIn(10).nOut(10).build(), "0"); + gb.addLayer("0", SimpleRnn.builder().nIn(10).nOut(10).build(), "in"); + gb.addLayer("1", SimpleRnn.builder().nIn(10).nOut(10).build(), "0"); break; case 1: - b.layer(new LSTM.Builder().nIn(10).nOut(10).build()); - b.layer(new LSTM.Builder().nIn(10).nOut(10).build()); + b.layer(LSTM.builder().nIn(10).nOut(10).build()); + b.layer(LSTM.builder().nIn(10).nOut(10).build()); - gb.addLayer("0", new LSTM.Builder().nIn(10).nOut(10).build(), "in"); - gb.addLayer("1", new LSTM.Builder().nIn(10).nOut(10).build(), "0"); + gb.addLayer("0", LSTM.builder().nIn(10).nOut(10).build(), "in"); + gb.addLayer("1", LSTM.builder().nIn(10).nOut(10).build(), "0"); break; case 2: - b.layer(new GravesLSTM.Builder().nIn(10).nOut(10).build()); - b.layer(new GravesLSTM.Builder().nIn(10).nOut(10).build()); + b.layer(GravesLSTM.builder().nIn(10).nOut(10).build()); + b.layer(GravesLSTM.builder().nIn(10).nOut(10).build()); - gb.addLayer("0", new GravesLSTM.Builder().nIn(10).nOut(10).build(), "in"); - gb.addLayer("1", new GravesLSTM.Builder().nIn(10).nOut(10).build(), "0"); + gb.addLayer("0", GravesLSTM.builder().nIn(10).nOut(10).build(), "in"); + gb.addLayer("1", GravesLSTM.builder().nIn(10).nOut(10).build(), "0"); break; default: throw new RuntimeException(); } - b.layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build()); - gb.addLayer("out", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE) + b.layer(RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build()); + gb.addLayer("out", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .nIn(10).nOut(10).build(), "1"); gb.setOutputs("out"); - NeuralNetConfiguration conf = b + NeuralNetConfiguration conf = (NeuralNetConfiguration) b .backpropType(BackpropType.TruncatedBPTT) .tbpttBackLength(5).tbpttFwdLength(5) .build(); @@ -405,7 +405,7 @@ public class WorkspaceTests extends BaseDL4JTest { .seed(12345) .trainingWorkspaceMode(ws).inferenceWorkspaceMode(ws) .list() - .layer(new OutputLayer.Builder().nIn(3).nOut(1).activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.XENT).build()) + .layer(OutputLayer.builder().nIn(3).nOut(1).activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.XENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -434,7 +434,7 @@ public class WorkspaceTests extends BaseDL4JTest { .seed(12345) .trainingWorkspaceMode(wsm).inferenceWorkspaceMode(wsm) .list() - .layer(new OutputLayer.Builder().nIn(3).nOut(1).lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).build()) + .layer(OutputLayer.builder().nIn(3).nOut(1).lossFunction(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).build()) .build(); assertEquals(wsm, conf.getTrainingWorkspaceMode()); @@ -446,7 +446,7 @@ public class WorkspaceTests extends BaseDL4JTest { .seed(12345) .trainingWorkspaceMode(wsm).inferenceWorkspaceMode(wsm) .list() - .layer(new OutputLayer.Builder().nIn(3).nOut(1).activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(OutputLayer.builder().nIn(3).nOut(1).activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); assertEquals(wsm, conf2.getTrainingWorkspaceMode()); @@ -465,11 +465,11 @@ public class WorkspaceTests extends BaseDL4JTest { .graphBuilder() .addInputs("in") .setInputTypes(InputType.recurrent(200)) - .addLayer("embeddings", new EmbeddingLayer.Builder().nIn(200).nOut(50).build(), "in") - .addLayer("a", new GravesLSTM.Builder().nOut(300).activation(Activation.HARDTANH).build(), "embeddings") + .addLayer("embeddings", EmbeddingLayer.builder().nIn(200).nOut(50).build(), "in") + .addLayer("a", GravesLSTM.builder().nOut(300).activation(Activation.HARDTANH).build(), "embeddings") .addVertex("b", new LastTimeStepVertex("in"), "a") - .addLayer("c", new DenseLayer.Builder().nOut(300).activation(Activation.HARDTANH).build(), "b") - .addLayer("output", new LossLayer.Builder().lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY).build(), "c") + .addLayer("c", DenseLayer.builder().nOut(300).activation(Activation.HARDTANH).build(), "b") + .addLayer("output", LossLayer.builder().lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY.getILossFunction()).build(), "c") .setOutputs("output") .build(); @@ -505,8 +505,8 @@ public class WorkspaceTests extends BaseDL4JTest { .seed(12345) .weightInit(WeightInit.XAVIER) .list() - .layer(new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.TANH).build()) - .layer(new OutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(DenseLayer.builder().nIn(4).nOut(3).activation(Activation.TANH).build()) + .layer(OutputLayer.builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(netConf); @@ -559,7 +559,7 @@ public class WorkspaceTests extends BaseDL4JTest { final ComputationGraphConfiguration computationGraphConfiguration = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("state") - .addLayer("value_output", new OutputLayer.Builder().nIn(30).nOut(1).activation(Activation.IDENTITY) + .addLayer("value_output", OutputLayer.builder().nIn(30).nOut(1).activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).build(), "state") .setOutputs("value_output") .build(); @@ -580,7 +580,7 @@ public class WorkspaceTests extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new OutputLayer.Builder().nIn(30).nOut(1).activation(Activation.IDENTITY).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(OutputLayer.builder().nIn(30).nOut(1).activation(Activation.IDENTITY).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -609,9 +609,9 @@ public class WorkspaceTests extends BaseDL4JTest { NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same).seed(12345L).list() - .layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(2).kernelSize(2, 2) + .layer(0, ConvolutionLayer.builder().nIn(1).nOut(2).kernelSize(2, 2) .stride(1, 1).activation(Activation.TANH).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(10).build()) .inputType(InputType.convolutional(5, 5, 1)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java index ca9c0f67c..ead6953f1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java @@ -78,25 +78,25 @@ public class ValidateMKLDNN extends BaseDL4JTest { .convolutionMode(cm) .seed(12345) .list() - .layer(new ConvolutionLayer.Builder().activation(Activation.TANH) + .layer(ConvolutionLayer.builder().activation(Activation.TANH) .kernelSize(kernel) .stride(stride) .padding(0, 0) .nOut(3) .build()) - .layer(new SubsamplingLayer.Builder() + .layer(SubsamplingLayer.builder() .poolingType(pt) .kernelSize(kernel) .stride(stride) .padding(0, 0) .build()) - .layer(new ConvolutionLayer.Builder().activation(Activation.TANH) + .layer(ConvolutionLayer.builder().activation(Activation.TANH) .kernelSize(kernel) .stride(stride) .padding(0, 0) .nOut(3) .build()) - .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1])) .build(); @@ -154,20 +154,20 @@ public class ValidateMKLDNN extends BaseDL4JTest { .convolutionMode(cm) .seed(12345) .list() - .layer(new ConvolutionLayer.Builder().activation(Activation.TANH) + .layer(ConvolutionLayer.builder().activation(Activation.TANH) .kernelSize(kernel) .stride(stride) .padding(0, 0) .nOut(3) .build()) - .layer(new BatchNormalization.Builder().useLogStd(b).helperAllowFallback(false)/*.eps(0)*/.build()) - .layer(new ConvolutionLayer.Builder().activation(Activation.TANH) + .layer(BatchNormalization.builder().useLogStd(b).helperAllowFallback(false)/*.eps(0)*/.build()) + .layer(ConvolutionLayer.builder().activation(Activation.TANH) .kernelSize(kernel) .stride(stride) .padding(0, 0) .nOut(3) .build()) - .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1])) .build(); @@ -228,19 +228,19 @@ public class ValidateMKLDNN extends BaseDL4JTest { .weightInit(new NormalDistribution(0,1)) .seed(12345) .list() - .layer(new ConvolutionLayer.Builder().activation(Activation.TANH) + .layer(ConvolutionLayer.builder().activation(Activation.TANH) .kernelSize(kernel) .stride(stride) .padding(0, 0) .nOut(3) .build()) - .layer(new LocalResponseNormalization.Builder() + .layer(LocalResponseNormalization.builder() .alpha(a[i]) .beta(b[i]) .n(n[i]) .k(k[i]) .cudnnAllowFallback(false).build()) - .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1])) .build(); @@ -295,7 +295,7 @@ public class ValidateMKLDNN extends BaseDL4JTest { .inferenceWorkspaceMode(WorkspaceMode.NONE) .trainingWorkspaceMode(WorkspaceMode.NONE) .list() - .layer(new BatchNormalization.Builder().nIn(3).nOut(3).build()) + .layer(BatchNormalization.builder().nIn(3).nOut(3).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java index c818f2281..680bea143 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/BackPropMLPTest.java @@ -326,12 +326,12 @@ public class BackPropMLPTest extends BaseDL4JTest { for (int i = 0; i < hiddenLayerSizes.length; i++) { int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]); - lb.layer(i, new DenseLayer.Builder().nIn(nIn).nOut(hiddenLayerSizes[i]).weightInit(WeightInit.XAVIER) + lb.layer(i, DenseLayer.builder().nIn(nIn).nOut(hiddenLayerSizes[i]).weightInit(WeightInit.XAVIER) .activation(activationFunction).build()); } lb.layer(hiddenLayerSizes.length, - new OutputLayer.Builder(LossFunction.MCXENT).nIn(hiddenLayerSizes[hiddenLayerSizes.length - 1]) + OutputLayer.builder().lossFunction(LossFunction.MCXENT).nIn(hiddenLayerSizes[hiddenLayerSizes.length - 1]) .nOut(3).weightInit(WeightInit.XAVIER) .activation(activationFunction.equals(Activation.IDENTITY) ? Activation.IDENTITY : Activation.SOFTMAX) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java index ac1626eda..0db9170fa 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java @@ -138,11 +138,11 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L) .list().layer(0, - new DenseLayer.Builder().nIn(4).nOut(3) + DenseLayer.builder().nIn(4).nOut(3) .dist(new NormalDistribution(0, 1)) .build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) @@ -176,9 +176,9 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list().layer(0, - new DenseLayer.Builder().nIn(4).nOut(3) + DenseLayer.builder().nIn(4).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).build()) .build(); MultiLayerNetwork network3 = new MultiLayerNetwork(conf); @@ -199,12 +199,12 @@ public class MultiLayerTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(123); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(123).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(2, new BatchNormalization.Builder().nOut(2).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(2,BatchNormalization.builder().nOut(2).build()) + .layer(3, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).nIn(2).nOut(3).build()) .build(); @@ -231,11 +231,11 @@ public class MultiLayerTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(123); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).seed(123).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(2, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).nIn(2).nOut(3).build()) .build(); @@ -313,17 +313,17 @@ public class MultiLayerTest extends BaseDL4JTest { log.info("Build model...."); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(seed) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() - .layer(0, new DenseLayer.Builder().nIn(numRows * numColumns).nOut(1000).build()) - .layer(1, new DenseLayer.Builder().nIn(1000).nOut(500).build()) - .layer(2, new DenseLayer.Builder().nIn(500).nOut(250).build()) - .layer(3, new DenseLayer.Builder().nIn(250).nOut(100).build()) - .layer(4, new DenseLayer.Builder().nIn(100).nOut(30).build()) //encoding stops - .layer(5, new DenseLayer.Builder().nIn(30).nOut(100).build()) //decoding starts - .layer(6, new DenseLayer.Builder().nIn(100).nOut(250).build()) - .layer(7, new DenseLayer.Builder().nIn(250).nOut(500).build()) - .layer(8, new DenseLayer.Builder().nIn(500).nOut(1000).build()) + .layer(0, DenseLayer.builder().nIn(numRows * numColumns).nOut(1000).build()) + .layer(1, DenseLayer.builder().nIn(1000).nOut(500).build()) + .layer(2, DenseLayer.builder().nIn(500).nOut(250).build()) + .layer(3, DenseLayer.builder().nIn(250).nOut(100).build()) + .layer(4, DenseLayer.builder().nIn(100).nOut(30).build()) //encoding stops + .layer(5, DenseLayer.builder().nIn(30).nOut(100).build()) //decoding starts + .layer(6, DenseLayer.builder().nIn(100).nOut(250).build()) + .layer(7, DenseLayer.builder().nIn(250).nOut(500).build()) + .layer(8, DenseLayer.builder().nIn(500).nOut(1000).build()) .layer(9, - new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(1000) + OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(1000) .nOut(numRows * numColumns).activation(Activation.SOFTMAX).build()) .build(); @@ -373,19 +373,19 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration.builder().optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .updater(new Sgd(1e-3)) .list().layer( - 0, new DenseLayer.Builder().nIn(nIn).nOut(600) + 0, DenseLayer.builder().nIn(nIn).nOut(600) .dist(new NormalDistribution(0, 1e-5)) .build()) - .layer(1, new DenseLayer.Builder() + .layer(1, DenseLayer.builder() .nIn(600).nOut(250) .dist(new NormalDistribution(0, 1e-5)) .build()) - .layer(2, new DenseLayer.Builder() + .layer(2, DenseLayer.builder() .nIn(250).nOut(100) .dist(new NormalDistribution(0, 1e-5)) .build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(3, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT).nIn(100).nOut(25) .activation(Activation.SOFTMAX) .weightInit(new NormalDistribution(0, 1e-5)).build()) @@ -426,11 +426,11 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd(0.1)).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).activation(Activation.RELU) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(20).activation(Activation.RELU) .weightInit(WeightInit.XAVIER).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).activation(Activation.RELU) + .layer(1, DenseLayer.builder().nIn(20).nOut(30).activation(Activation.RELU) .weightInit(WeightInit.XAVIER).build()) - .layer(2, new DenseLayer.Builder().nIn(30).nOut(nOut).activation(Activation.RELU) + .layer(2, DenseLayer.builder().nIn(30).nOut(nOut).activation(Activation.RELU) .weightInit(WeightInit.XAVIER).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -491,17 +491,17 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Sgd(0.1)).list() .layer(0, - new DenseLayer.Builder().name("dnn1").nIn(nIn).nOut(20).activation(Activation.RELU) + DenseLayer.builder().name("dnn1").nIn(nIn).nOut(20).activation(Activation.RELU) .weightInit(WeightInit.XAVIER).build()) - .layer(1, new DenseLayer.Builder().name("dnn2").nIn(20).nOut(30).activation(Activation.RELU) + .layer(1, DenseLayer.builder().name("dnn2").nIn(20).nOut(30).activation(Activation.RELU) .weightInit(WeightInit.XAVIER).build()) - .layer(2, new DenseLayer.Builder().name("dnn3").nIn(30).nOut(nOut) + .layer(2, DenseLayer.builder().name("dnn3").nIn(30).nOut(nOut) .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); - assertEquals(layerNameList.get(0), net.getLayer(0).getLayerConfiguration().getLayerName()); + assertEquals(layerNameList.get(0), net.getLayer(0).getLayerConfiguration().getName()); assertEquals(layerNameList, net.getLayerNames()); BaseLayerConfiguration b = (BaseLayerConfiguration) net.getLayer(layerNameList.get(2)).getLayerConfiguration(); assertEquals("softmax", b.getActivationFn().toString()); @@ -516,17 +516,17 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER) .list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()) - .layer(2, new OutputLayer.Builder() + .layer(0, DenseLayer.builder().nIn(nIn).nOut(20).build()) + .layer(1, DenseLayer.builder().nIn(20).nOut(30).build()) + .layer(2, OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) .build(); NeuralNetConfiguration confNoReg = NeuralNetConfiguration.builder().seed(12345) .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()) - .layer(2, new OutputLayer.Builder() + .layer(0, DenseLayer.builder().nIn(nIn).nOut(20).build()) + .layer(1, DenseLayer.builder().nIn(20).nOut(30).build()) + .layer(2, OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) .build(); @@ -570,8 +570,8 @@ public class MultiLayerTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER).seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.SIGMOID).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).activation(Activation.SIGMOID).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3).build()) .build(); @@ -594,8 +594,8 @@ public class MultiLayerTest extends BaseDL4JTest { int nOut = 2; NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .seed(12345L).list().layer(0, new ConvolutionLayer.Builder(2, 2).nOut(1).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .seed(12345L).list().layer(0, ConvolutionLayer.builder(2, 2).nOut(1).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(2).build()) .inputType(InputType.convolutionalFlat(height, width, depth)) .build(); @@ -620,8 +620,8 @@ public class MultiLayerTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER).seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(784).nOut(50).activation(Activation.RELU).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().nIn(784).nOut(50).activation(Activation.RELU).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(50).nOut(10).build()) .inputType(InputType.convolutional(28, 28, 1)).build(); @@ -660,8 +660,8 @@ public class MultiLayerTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER).seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(784).nOut(50).activation(Activation.RELU).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().nIn(784).nOut(50).activation(Activation.RELU).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(50).nOut(10).build()) .inputType(InputType.convolutional(28, 28, 1)).build(); @@ -695,8 +695,8 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(1.0)) .activation(Activation.RELU).weightInit(WeightInit.XAVIER) - .list().layer(0, new DenseLayer.Builder().name("dnn1").nIn(4).nOut(5).build()) - .layer(1, new OutputLayer.Builder().name("output").nIn(5).nOut(3) + .list().layer(0, DenseLayer.builder().name("dnn1").nIn(4).nOut(5).build()) + .layer(1, OutputLayer.builder().name("output").nIn(5).nOut(3) .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER) .build()) .build(); @@ -737,9 +737,9 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() .layer(0, - new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0).nIn(2) + ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).padding(0, 0).nIn(2) .nOut(2).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(2).build()) .inputType(InputType.convolutional(height, width, depth)) .build(); @@ -784,10 +784,10 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration vae = NeuralNetConfiguration.builder() .seed(42).updater(new NoOp()) .weightInit(WeightInit.UNIFORM) - .layer(new AutoEncoder.Builder() + .layer(AutoEncoder.builder() .activation(Activation.IDENTITY).nOut(nIn).build()) .layer( - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.COSINE_PROXIMITY) .activation(Activation.IDENTITY).nOut(nOut) .build()) @@ -805,9 +805,9 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build(); @@ -844,8 +844,8 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).activation(Activation.TANH).seed(123).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(10).nOut(10) .build()) .build(); @@ -853,8 +853,8 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .l1Bias(0.1).l2Bias(0.2).weightInit(WeightInit.XAVIER).activation(Activation.TANH) - .seed(123).list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .seed(123).list().layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(10).nOut(10) .build()) .build(); @@ -929,27 +929,27 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration.builder().seed(12345).l2(0.001) //l2 regularization on all layers .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .list() - .layer(0, new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB + .layer(0, ConvolutionLayer.builder(10, 10).nIn(3) //3 channels: RGB .nOut(30).stride(4, 4).activation(Activation.RELU).weightInit( WeightInit.RELU) - .updater(Updater.ADAGRAD).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .updater(Updater.ADAGRAD.getIUpdaterWithDefaultConfig()).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(3, 3).stride(2, 2).build()) //(31-3+0)/2+1 = 15 - .layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2) + .layer(2, ConvolutionLayer.builder(3, 3).nIn(30).nOut(10).stride(2, 2) .activation(Activation.RELU).weightInit(WeightInit.RELU) .updater(Updater.ADAGRAD).build()) //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490 - .layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50) + .layer(3, DenseLayer.builder().activation(Activation.RELU).nIn(490).nOut(50) .weightInit(WeightInit.RELU).updater(Updater.ADAGRAD) .gradientNormalization( GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10).build()) - .layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50) + .layer(4, GravesLSTM.builder().activation(Activation.SOFTSIGN).nIn(50) .nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD) .gradientNormalization( GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10) .build()) - .layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(5, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(50) .nOut(4) //4 possible shapes: circle, square, arc, line .updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER) @@ -974,7 +974,7 @@ public class MultiLayerTest extends BaseDL4JTest { public void testErrorNoOutputLayer() { assertThrows(DL4JException.class, () -> { NeuralNetConfiguration c = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).build(); + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(c); net.init(); @@ -995,23 +995,23 @@ public class MultiLayerTest extends BaseDL4JTest { Nd4j.getRandom().setSeed(123); NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder().seed(123).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(2, new LSTM.Builder().nIn(2).nOut(2).build()) - .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, LSTM.builder().nIn(2).nOut(2).build()) + .layer(3, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) .build()) .build(); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(987).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(2, new LSTM.Builder().nIn(2).nOut(2).build()) - .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, LSTM.builder().nIn(2).nOut(2).build()) + .layer(3, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) .build()) .build(); @@ -1036,23 +1036,23 @@ public class MultiLayerTest extends BaseDL4JTest { //Simple test: compare .layer(int, ILayer) and .layer(ILayer) are identical NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder().seed(123).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(2, new LSTM.Builder().nIn(2).nOut(2).build()) - .layer(3, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, LSTM.builder().nIn(2).nOut(2).build()) + .layer(3, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) .build()) .build(); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(123).list() - .layer(new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + .layer(DenseLayer.builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(new DenseLayer.Builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) + .layer(DenseLayer.builder().nIn(3).nOut(2).weightInit(WeightInit.XAVIER) .activation(Activation.TANH).build()) - .layer(new LSTM.Builder().nIn(2).nOut(2).build()) - .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(LSTM.builder().nIn(2).nOut(2).build()) + .layer(RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(2).nOut(3) .build()) .build(); @@ -1066,7 +1066,7 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -1095,10 +1095,10 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .convolutionMode(ConvolutionMode.Same) .list() - .layer(new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(1).nOut(1).build()) - .layer(new SubsamplingLayer.Builder().kernelSize(2, 2).stride(1, 1).build()) - .layer(new DenseLayer.Builder().nOut(10).build()) - .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).build()) + .layer(ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nIn(1).nOut(1).build()) + .layer(SubsamplingLayer.builder().kernelSize(2, 2).stride(1, 1).build()) + .layer(DenseLayer.builder().nOut(10).build()) + .layer(OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)) .build(); @@ -1138,8 +1138,8 @@ public class MultiLayerTest extends BaseDL4JTest { .trainingWorkspaceMode(ws) .inferenceWorkspaceMode(ws) .seed(12345).list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) .nOut(10).build()) .build(); MultiLayerNetwork s = new MultiLayerNetwork(standard); @@ -1150,7 +1150,7 @@ public class MultiLayerTest extends BaseDL4JTest { .trainingWorkspaceMode(ws) .inferenceWorkspaceMode(ws) .seed(12345).list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) .build(); MultiLayerNetwork e = new MultiLayerNetwork(external); @@ -1202,8 +1202,8 @@ public class MultiLayerTest extends BaseDL4JTest { .trainingWorkspaceMode(ws) .inferenceWorkspaceMode(ws) .list() - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.RELU).build()) - .layer(new ActivationLayer.Builder().activation(Activation.IDENTITY).build()) + .layer(DenseLayer.builder().nIn(nIn).nOut(nOut).activation(Activation.RELU).build()) + .layer(ActivationLayer.builder().activation(Activation.IDENTITY).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(1, new FeedForwardToRnnPreProcessor()) .build(); @@ -1248,10 +1248,10 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new ConvolutionLayer.Builder().kernelSize(2, 2).nOut(6).build()) - .layer(new SubsamplingLayer.Builder().kernelSize(2, 2).build()) - .layer(new DenseLayer.Builder().nOut(30).build()) - .layer(new OutputLayer.Builder().nOut(13).activation(Activation.SOFTMAX).build()) + .layer(ConvolutionLayer.builder().kernelSize(2, 2).nOut(6).build()) + .layer(SubsamplingLayer.builder().kernelSize(2, 2).build()) + .layer(DenseLayer.builder().nOut(30).build()) + .layer(OutputLayer.builder().nOut(13).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 3)) .build(); @@ -1276,9 +1276,9 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new SubsamplingLayer.Builder().kernelSize(2, 2).stride(2, 2).build()) - .layer(new LossLayer.Builder().activation(Activation.SIGMOID) - .lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(SubsamplingLayer.builder().kernelSize(2, 2).stride(2, 2).build()) + .layer(LossLayer.builder().activation(Activation.SIGMOID) + .lossFunction(LossFunctions.LossFunction.MSE.getILossFunction()).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)) .build(); @@ -1309,9 +1309,9 @@ public class MultiLayerTest extends BaseDL4JTest { .seed(12345) .activation(Activation.TANH) .list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) .layer( - new OutputLayer.Builder().nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE) + OutputLayer.builder().nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE) .build()) .build(); @@ -1350,10 +1350,10 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder() .list() - .layer(new LSTM.Builder().nOut(6).build()) - .layer(new LSTM.Builder().nOut(7).build()) - .layer(new GlobalPoolingLayer()) - .layer(new OutputLayer.Builder().nOut(8).activation(Activation.SOFTMAX).build()) + .layer(LSTM.builder().nOut(6).build()) + .layer(LSTM.builder().nOut(7).build()) + .layer(GlobalPoolingLayer.builder().build()) + .layer(OutputLayer.builder().nOut(8).activation(Activation.SOFTMAX).build()) .inputType(InputType.recurrent(10)); NeuralNetConfiguration conf = builder.build(); @@ -1379,7 +1379,7 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -1399,9 +1399,9 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new VariationalAutoencoder.Builder() + .layer(VariationalAutoencoder.builder() .nIn(10).nOut(10).encoderLayerSizes(10).decoderLayerSizes(10).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) .build(); @@ -1455,10 +1455,10 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .l2(0.01) - .list() - .layer(new ConvolutionLayer.Builder().nIn(depth).nOut(depth).kernelSize(1, 1).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPrior) + + .layer(ConvolutionLayer.builder().nIn(depth).nOut(depth).kernelSize(1, 1).build()) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPrior) .build()) .build(); @@ -1482,9 +1482,9 @@ public class MultiLayerTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .updater(new Adam(lr)) .list() - .layer(new DenseLayer.Builder().nIn(5).nOut(3).build()) - .layer(new DenseLayer.Builder().nIn(3).nOut(2).build()) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.XENT).nIn(2).nOut(1) + .layer(DenseLayer.builder().nIn(5).nOut(3).build()) + .layer(DenseLayer.builder().nIn(3).nOut(2).build()) + .layer(OutputLayer.builder(LossFunctions.LossFunction.XENT).nIn(2).nOut(1) .activation(Activation.SIGMOID).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java index 29d7e7a6a..7189feeae 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java @@ -71,11 +71,11 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list().layer(0, - new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder() + org.deeplearning4j.nn.conf.layers.GravesLSTM.builder() .nIn(nIn).nOut(nHiddenUnits) .activation(Activation.TANH).build()) - .layer(1, new RnnOutputLayer.Builder(LossFunction.MSE).nIn(nHiddenUnits) + .layer(1, RnnOutputLayer.builder(LossFunction.MSE).nIn(nHiddenUnits) .nOut(nOut) .activation(Activation.TANH).build()) .build(); @@ -114,13 +114,13 @@ public class MultiLayerTestRNN extends BaseDL4JTest { int nOut = 25; int[] nHiddenUnits = {17, 19, 23}; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(17) + .layer(0, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(17) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(17).nOut(19) + .layer(1, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(17).nOut(19) .activation(Activation.TANH).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(19).nOut(23) + .layer(2, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(19).nOut(23) .activation(Activation.TANH).build()) - .layer(3, new RnnOutputLayer.Builder(LossFunction.MSE).nIn(23).nOut(nOut) + .layer(3, RnnOutputLayer.builder(LossFunction.MSE).nIn(23).nOut(nOut) .activation(Activation.TANH).build()) .build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); @@ -164,17 +164,17 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list().layer(0, - new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder() + org.deeplearning4j.nn.conf.layers.GravesLSTM.builder() .nIn(5).nOut(7).activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7) + .layer(1, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7) .nOut(8).activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)) .build()) - .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .nIn(8).nOut(4) .activation(Activation.SOFTMAX) @@ -231,26 +231,26 @@ public class MultiLayerTestRNN extends BaseDL4JTest { String lastActKey; if(layerType == 0){ - l0 = new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7) + l0 = org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(5).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(); - l1 = new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + l1 = org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(); lastActKey = GravesLSTM.STATE_KEY_PREV_ACTIVATION; } else if(layerType == 1){ - l0 = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().nIn(5).nOut(7) + l0 = org.deeplearning4j.nn.conf.layers.LSTM.builder().nIn(5).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(); - l1 = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().nIn(7).nOut(8) + l1 = org.deeplearning4j.nn.conf.layers.LSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(); lastActKey = LSTM.STATE_KEY_PREV_ACTIVATION; } else { - l0 = new org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn.Builder().nIn(5).nOut(7) + l0 = org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn.builder().nIn(5).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(); - l1 = new org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn.Builder().nIn(7).nOut(8) + l1 = org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build(); lastActKey = SimpleRnn.STATE_KEY_PREV_ACTIVATION; @@ -266,12 +266,12 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).list() .layer(0, l0) .layer(1, l1) - .layer(2, new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH) + .layer(2, DenseLayer.builder().nIn(8).nOut(9).activation(Activation.TANH) .dist( new NormalDistribution(0, 0.5)) .build()) - .layer(3, new RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(3, RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .nIn(9).nOut(4).activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)) .build()) @@ -353,17 +353,17 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list().layer(0, - new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder() + org.deeplearning4j.nn.conf.layers.GravesLSTM.builder() .nIn(5).nOut(7).activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7) + .layer(1, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7) .nOut(8).activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)) .build()) - .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .nIn(8).nOut(4) .activation(Activation.SOFTMAX) @@ -412,16 +412,16 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .trainingWorkspaceMode(WorkspaceMode.NONE).inferenceWorkspaceMode(WorkspaceMode.NONE) .list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) + .layer(0, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .layer(1, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist( new NormalDistribution(0, 0.5)) .build()) - .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .nIn(8).nOut(nOut).activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)) .build()) @@ -431,16 +431,16 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration confTBPTT = NeuralNetConfiguration.builder().seed(12345) .trainingWorkspaceMode(WorkspaceMode.NONE).inferenceWorkspaceMode(WorkspaceMode.NONE) .list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) + .layer(0, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .layer(1, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist( new NormalDistribution(0, 0.5)) .build()) - .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .nIn(8).nOut(nOut).activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)) .build()) @@ -524,16 +524,16 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).list().layer(0, - new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) + org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7) + .layer(1, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7) .nOut(8).activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)) .build()) - .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .nIn(8).nOut(nOut) .activation(Activation.SOFTMAX) @@ -606,16 +606,16 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) + .layer(0, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .layer(1, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist( new NormalDistribution(0, 0.5)) .build()) - .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .nIn(8).nOut(nOut).activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)) .build()) @@ -643,16 +643,16 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) + .layer(0, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(7) .activation(Activation.TANH) .dist(new NormalDistribution(0, 0.5)).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .layer(1, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH) .dist( new NormalDistribution(0, 0.5)) .build()) - .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunction.MCXENT) .nIn(8).nOut(nOut).activation(Activation.SOFTMAX) .dist(new NormalDistribution(0, 0.5)) .build()) @@ -681,11 +681,11 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10) + .layer(0, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(10) .nOut(10).activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10) + .layer(1, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(10) .nOut(10).activation(Activation.TANH).build()) - .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) .inputPreProcessor(0, new FeedForwardToRnnPreProcessor()) .build(); @@ -703,11 +703,11 @@ public class MultiLayerTestRNN extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10) + .addLayer("0", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(10).nOut(10) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10) + .addLayer("1", org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(10).nOut(10) .activation(Activation.TANH).build(), "0") - .addLayer("2", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("2", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "1") .setOutputs("2").inputPreProcessor("0", new FeedForwardToRnnPreProcessor()) .build(); @@ -732,11 +732,11 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7) + .layer(0, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(nIn).nOut(7) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8) + .layer(1, org.deeplearning4j.nn.conf.layers.GravesLSTM.builder().nIn(7).nOut(8) .activation(Activation.TANH).build()) - .layer(2, new RnnOutputLayer.Builder(LossFunction.MSE).nIn(8).nOut(nOut) + .layer(2, RnnOutputLayer.builder(LossFunction.MSE).nIn(8).nOut(nOut) .activation(Activation.IDENTITY).build()) .backpropType(BackpropType.TruncatedBPTT) .tbpttBackLength(tbpttLength).tbpttFwdLength(tbpttLength).build(); @@ -768,9 +768,9 @@ public class MultiLayerTestRNN extends BaseDL4JTest { try { NeuralNetConfiguration.builder() .list() - .layer(new org.deeplearning4j.nn.conf.layers.LSTM.Builder().nIn(nIn).nOut(nHiddenUnits).build()) - .layer(new GlobalPoolingLayer()) - .layer(new OutputLayer.Builder(LossFunction.MSE).nIn(nHiddenUnits) + .layer(org.deeplearning4j.nn.conf.layers.LSTM.builder().nIn(nIn).nOut(nHiddenUnits).build()) + .layer(GlobalPoolingLayer.builder().build()) + .layer(OutputLayer.builder(LossFunction.MSE).nIn(nHiddenUnits) .nOut(nOut) .activation(Activation.TANH).build()) .backpropType(BackpropType.TruncatedBPTT) @@ -787,7 +787,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new FrozenLayer(new org.deeplearning4j.nn.conf.layers.LSTM.Builder() + .layer(FrozenLayer.builder(org.deeplearning4j.nn.conf.layers.LSTM.builder() .nIn(5).nOut(5).build())) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java index d98cd58b2..5e42a147d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestMasking.java @@ -67,7 +67,7 @@ public class TestMasking extends BaseDL4JTest { for (boolean tbptt : new boolean[] {true, false}) { //Simple "does it throw an exception" type test... NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).list() - .layer(0, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE) + .layer(0, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY).nIn(1).nOut(1).build()) .backpropType(tbptt ? BackpropType.TruncatedBPTT : BackpropType.Standard) @@ -153,9 +153,9 @@ public class TestMasking extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new NoOp()) .dist(new NormalDistribution(0, 1)).seed(12345) .list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .build()) - .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf) + .layer(1, OutputLayer.builder().nIn(layerSize).nOut(nOut).lossFunction(lf) .activation(a).build()) .validateOutputLayerConfig(false) .build(); @@ -199,9 +199,9 @@ public class TestMasking extends BaseDL4JTest { ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().updater(new NoOp()) .dist(new NormalDistribution(0, 1)).seed(12345) .graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize) + .addLayer("0", DenseLayer.builder().nIn(nIn).nOut(layerSize) .activation(Activation.TANH).build(), "in") - .addLayer("1", new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf) + .addLayer("1", OutputLayer.builder().nIn(layerSize).nOut(nOut).lossFunction(lf) .activation(a).build(), "0") .setOutputs("1").validateOutputLayerConfig(false).build(); @@ -241,9 +241,9 @@ public class TestMasking extends BaseDL4JTest { ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder().updater(new NoOp()) .dist(new NormalDistribution(0, 1)).seed(12345) .graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) + .addLayer("0", DenseLayer.builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .build(), "in") - .addLayer("1", new OutputLayer.Builder().nIn(layerSize).nOut(nOut) + .addLayer("1", OutputLayer.builder().nIn(layerSize).nOut(nOut) .lossFunction(LossFunctions.LossFunction.XENT).activation(Activation.SIGMOID) .build(), "0") .setOutputs("1").build(); @@ -277,13 +277,13 @@ public class TestMasking extends BaseDL4JTest { .graphBuilder() .addInputs("inputs") .addLayer("cnn1", - new ConvolutionLayer.Builder(new int[] { kernelSize1, kernelSize1 }, + ConvolutionLayer.builder(new int[] { kernelSize1, kernelSize1 }, new int[] { cnnStride1, cnnStride1 }, new int[] { padding, padding }) .nIn(channels) .nOut(2).build(), "inputs") - .addLayer("lstm1", new LSTM.Builder().nIn(7 * 7 * 2).nOut(2).build(), "cnn1") - .addLayer("output", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE) + .addLayer("lstm1", LSTM.builder().nIn(7 * 7 * 2).nOut(2).build(), "cnn1") + .addLayer("output", RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.RELU).nIn(2).nOut(2).build(), "lstm1") .setOutputs("output") .setInputTypes(InputType.recurrent(7*7, 1)) @@ -314,14 +314,14 @@ public class TestMasking extends BaseDL4JTest { ) .addInputs("m1", "m2") .addVertex("stack", new StackVertex(), "m1", "m2") - .addLayer("lastUnStacked", new LastTimeStep(new LSTM.Builder().nIn(3).nOut(1).activation(Activation.TANH).build()), "stack") + .addLayer("lastUnStacked", LastTimeStep.builder(LSTM.builder().nIn(3).nOut(1).activation(Activation.TANH).build()), "stack") .addVertex("unstacked1", new UnstackVertex(0, 2), "lastUnStacked") .addVertex("unstacked2", new UnstackVertex(1, 2), "lastUnStacked") .addVertex("restacked", new StackVertex(), "unstacked1", "unstacked2") .addVertex("un1", new UnstackVertex(0, 2), "restacked") .addVertex("un2", new UnstackVertex(1, 2), "restacked") .addVertex("q", new MergeVertex(), "un1", "un2") - .addLayer("probability", new OutputLayer.Builder().nIn(2).nOut(6).lossFunction(LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR).build(), "q") + .addLayer("probability", OutputLayer.builder().nIn(2).nOut(6).lossFunction(LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR).build(), "q") .setOutputs("probability") .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java index 9c3c1407b..36d9c3bca 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestSetGetParameters.java @@ -40,13 +40,13 @@ public class TestSetGetParameters extends BaseDL4JTest { public void testSetParameters() { //Set up a MLN, then do set(get) on parameters. Results should be identical compared to before doing this. NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(9).nOut(10) + .layer(0, DenseLayer.builder().nIn(9).nOut(10) .dist(new NormalDistribution(0, 1)).build()) - .layer(1, new DenseLayer.Builder().nIn(10).nOut(11) + .layer(1, DenseLayer.builder().nIn(10).nOut(11) .dist(new NormalDistribution(0, 1)).build()) - .layer(2, new AutoEncoder.Builder().corruptionLevel(0.5).nIn(11).nOut(12) + .layer(2, AutoEncoder.builder().corruptionLevel(0.5).nIn(11).nOut(12) .dist(new NormalDistribution(0, 1)).build()) - .layer(3, new OutputLayer.Builder(LossFunction.MSE).nIn(12).nOut(12) + .layer(3, OutputLayer.builder(LossFunction.MSE).nIn(12).nOut(12) .dist(new NormalDistribution(0, 1)).build()) .build(); @@ -79,11 +79,11 @@ public class TestSetGetParameters extends BaseDL4JTest { //Set up a MLN, then do set(get) on parameters. Results should be identical compared to before doing this. NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new GravesLSTM.Builder().nIn(9).nOut(10) + .layer(0, GravesLSTM.builder().nIn(9).nOut(10) .dist(new NormalDistribution(0, 1)).build()) - .layer(1, new GravesLSTM.Builder().nIn(10).nOut(11) + .layer(1, GravesLSTM.builder().nIn(10).nOut(11) .dist(new NormalDistribution(0, 1)).build()) - .layer(2, new RnnOutputLayer.Builder(LossFunction.MSE) + .layer(2, RnnOutputLayer.builder(LossFunction.MSE) .dist(new NormalDistribution(0, 1)).nIn(11).nOut(12).build()) .build(); @@ -118,12 +118,12 @@ public class TestSetGetParameters extends BaseDL4JTest { //Create configuration. Doesn't matter if this doesn't actually work for forward/backward pass here NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).list() - .layer(0, new ConvolutionLayer.Builder().nIn(10).nOut(10).kernelSize(2, 2).stride(2, 2) + .layer(0, ConvolutionLayer.builder().nIn(10).nOut(10).kernelSize(2, 2).stride(2, 2) .padding(2, 2).build()) - .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(2, new GravesLSTM.Builder().nIn(10).nOut(10).build()) - .layer(3, new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).build()) - .layer(4, new OutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) + .layer(1, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(2, GravesLSTM.builder().nIn(10).nOut(10).build()) + .layer(3, GravesBidirectionalLSTM.builder().nIn(10).nOut(10).build()) + .layer(4, OutputLayer.builder().lossFunction(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java index 6f3747e84..08543fc10 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java @@ -73,8 +73,8 @@ public class TestVariableLengthTS extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() - .layer(0, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()) - .layer(1, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2) + .layer(0, GravesLSTM.builder().activation(Activation.TANH).nIn(2).nOut(2).build()) + .layer(1, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2) .nOut(1).activation(Activation.TANH).build()) .build(); @@ -161,10 +161,10 @@ public class TestVariableLengthTS extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() - .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()) - .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()) - .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()) - .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR).nIn(2) + .layer(0, DenseLayer.builder().activation(Activation.TANH).nIn(2).nOut(2).build()) + .layer(1, DenseLayer.builder().activation(Activation.TANH).nIn(2).nOut(2).build()) + .layer(2, LSTM.builder().activation(Activation.TANH).nIn(2).nOut(2).build()) + .layer(3, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR).nIn(2) .nOut(1).activation(Activation.TANH).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(2, new FeedForwardToRnnPreProcessor()) @@ -305,11 +305,11 @@ public class TestVariableLengthTS extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L).list() - .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5) + .layer(0, GravesLSTM.builder().nIn(nIn).nOut(5) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).build()) - .layer(1, new RnnOutputLayer.Builder( + .layer(1, RnnOutputLayer.builder( LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(5).nOut(nOut) @@ -368,11 +368,11 @@ public class TestVariableLengthTS extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L).list() - .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5) + .layer(0, GravesLSTM.builder().nIn(nIn).nOut(5) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).build()) - .layer(1, new RnnOutputLayer.Builder( + .layer(1, RnnOutputLayer.builder( LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(5).nOut(nOut) @@ -384,11 +384,11 @@ public class TestVariableLengthTS extends BaseDL4JTest { NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345L).list() - .layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5) + .layer(0, GravesLSTM.builder().nIn(nIn).nOut(5) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()).build()) - .layer(1, new RnnOutputLayer.Builder( + .layer(1, RnnOutputLayer.builder( LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(5).nOut(nOut) @@ -440,9 +440,9 @@ public class TestVariableLengthTS extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .activation(Activation.TANH).list() - .layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).build()) - .layer(1, new GravesBidirectionalLSTM.Builder().nIn(layerSize).nOut(layerSize).build()) - .layer(2, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE) + .layer(0, GravesBidirectionalLSTM.builder().nIn(nIn).nOut(layerSize).build()) + .layer(1, GravesBidirectionalLSTM.builder().nIn(layerSize).nOut(layerSize).build()) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .nIn(layerSize).nOut(nOut).build()) .build(); @@ -517,14 +517,14 @@ public class TestVariableLengthTS extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .activation(Activation.TANH).list().layer(0, bidirectional - ? new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).build() - : new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).build()) + ? GravesBidirectionalLSTM.builder().nIn(nIn).nOut(layerSize).build() + : GravesLSTM.builder().nIn(nIn).nOut(layerSize).build()) .layer(1, bidirectional - ? new GravesBidirectionalLSTM.Builder().nIn(layerSize).nOut(layerSize) + ? GravesBidirectionalLSTM.builder().nIn(layerSize).nOut(layerSize) .build() - : new GravesLSTM.Builder().nIn(layerSize).nOut(layerSize).build()) - .layer(2, new GlobalPoolingLayer.Builder().poolingType(pt).build()) - .layer(3, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE) + : GravesLSTM.builder().nIn(layerSize).nOut(layerSize).build()) + .layer(2, GlobalPoolingLayer.builder().poolingType(pt).build()) + .layer(3, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE) .nIn(layerSize).nOut(nOut).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java index 0539c6262..85bd892d9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/rl/TestMultiModelGradientApplication.java @@ -59,9 +59,9 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { .weightInit(WeightInit.XAVIER).updater(u) .l1(regularization ? 0.2 : 0.0) .l2(regularization ? 0.3 : 0.0) - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(10).build()) - .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(2, - new OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(nIn).nOut(10).build()) + .layer(1, DenseLayer.builder().nIn(10).nOut(10).build()).layer(2, + OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nIn(10).nOut(nOut) @@ -151,9 +151,9 @@ public class TestMultiModelGradientApplication extends BaseDL4JTest { .weightInit(WeightInit.XAVIER).updater(u) .l1(regularization ? 0.2 : 0.0) .l2(regularization ? 0.3 : 0.0).graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(10).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).build(), "0") - .addLayer("2", new OutputLayer.Builder( + .addLayer("0", DenseLayer.builder().nIn(nIn).nOut(10).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(10).nOut(10).build(), "0") + .addLayer("2", OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(10) .nOut(nOut).build(), diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestFrozenLayers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestFrozenLayers.java index 5c5fb204e..667c20472 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestFrozenLayers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestFrozenLayers.java @@ -66,7 +66,7 @@ public class TestFrozenLayers extends BaseDL4JTest { .fineTuneConfiguration(ftc) .setFeatureExtractor(4) .removeOutputLayer() - .addLayer(new OutputLayer.Builder().nIn(64).nOut(10).lossFunction(LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR).build()) + .addLayer(OutputLayer.builder().nIn(64).nOut(10).lossFunction(LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR).build()) .build(); assertEquals(6, transfer.getnLayers()); @@ -117,7 +117,7 @@ public class TestFrozenLayers extends BaseDL4JTest { .fineTuneConfiguration(ftc) .setFeatureExtractor("4") .removeVertexAndConnections("5") - .addLayer("5", new OutputLayer.Builder().nIn(64).nOut(10).lossFunction(LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR).build(), "4") + .addLayer("5", OutputLayer.builder().nIn(64).nOut(10).lossFunction(LossFunctions.LossFunction.MEAN_ABSOLUTE_ERROR).build(), "4") .setOutputs("5") .build(); @@ -158,12 +158,12 @@ public class TestFrozenLayers extends BaseDL4JTest { .convolutionMode(ConvolutionMode.Same) .updater(new Sgd(0.3)) .list() - .layer(new ConvolutionLayer.Builder().nOut(3).kernelSize(2,2).stride(1,1).build()) - .layer(new SubsamplingLayer.Builder().kernelSize(2,2).stride(1,1).build()) - .layer(new ConvolutionLayer.Builder().nIn(3).nOut(3).kernelSize(2,2).stride(1,1).build()) - .layer(new DenseLayer.Builder().nOut(64).build()) - .layer(new DenseLayer.Builder().nIn(64).nOut(64).build()) - .layer(new OutputLayer.Builder().nIn(64).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(ConvolutionLayer.builder().nOut(3).kernelSize(2,2).stride(1,1).build()) + .layer(SubsamplingLayer.builder().kernelSize(2,2).stride(1,1).build()) + .layer(ConvolutionLayer.builder().nIn(3).nOut(3).kernelSize(2,2).stride(1,1).build()) + .layer(DenseLayer.builder().nOut(64).build()) + .layer(DenseLayer.builder().nIn(64).nOut(64).build()) + .layer(OutputLayer.builder().nIn(64).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) .inputType(InputType.convolutionalFlat(28,28,1)) .build(); @@ -182,12 +182,12 @@ public class TestFrozenLayers extends BaseDL4JTest { .updater(new Sgd(0.3)) .graphBuilder() .addInputs("in") - .layer("0", new ConvolutionLayer.Builder().nOut(3).kernelSize(2,2).stride(1,1).build(), "in") - .layer("1", new SubsamplingLayer.Builder().kernelSize(2,2).stride(1,1).build(), "0") - .layer("2", new ConvolutionLayer.Builder().nIn(3).nOut(3).kernelSize(2,2).stride(1,1).build(), "1") - .layer("3", new DenseLayer.Builder().nOut(64).build(), "2") - .layer("4", new DenseLayer.Builder().nIn(64).nOut(64).build(), "3") - .layer("5", new OutputLayer.Builder().nIn(64).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build(), "4") + .layer("0", ConvolutionLayer.builder().nOut(3).kernelSize(2,2).stride(1,1).build(), "in") + .layer("1", SubsamplingLayer.builder().kernelSize(2,2).stride(1,1).build(), "0") + .layer("2", ConvolutionLayer.builder().nIn(3).nOut(3).kernelSize(2,2).stride(1,1).build(), "1") + .layer("3", DenseLayer.builder().nOut(64).build(), "2") + .layer("4", DenseLayer.builder().nIn(64).nOut(64).build(), "3") + .layer("5", OutputLayer.builder().nIn(64).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build(), "4") .setOutputs("5") .setInputTypes(InputType.convolutionalFlat(28,28,1)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java index 6d6ce41c0..a45181fc6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TestTransferLearningModelSerializer.java @@ -54,10 +54,10 @@ public class TestTransferLearningModelSerializer extends BaseDL4JTest { NeuralNetConfiguration origConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.TANH).dropOut(0.5).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5).build()) - .layer(1, new DenseLayer.Builder().nIn(5).nOut(4).build()) - .layer(2, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(nIn).nOut(5).build()) + .layer(1, DenseLayer.builder().nIn(5).nOut(4).build()) + .layer(2, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(3, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3) .nOut(nOut).build()) .build(); @@ -102,10 +102,10 @@ public class TestTransferLearningModelSerializer extends BaseDL4JTest { int nOut = 3; ComputationGraphConfiguration origConf = NeuralNetConfiguration.builder().activation(Activation.TANH).graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(5).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(4).build(), "0") - .addLayer("2", new DenseLayer.Builder().nIn(4).nOut(3).build(), "1") - .addLayer("3", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("0", DenseLayer.builder().nIn(nIn).nOut(5).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(5).nOut(4).build(), "0") + .addLayer("2", DenseLayer.builder().nIn(4).nOut(3).build(), "1") + .addLayer("3", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3) .nOut(nOut).build(), "2") diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java index 954e8ed18..75938577a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java @@ -66,9 +66,9 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { ComputationGraphConfiguration confToChange = NeuralNetConfiguration.builder().seed(rng) .optimizationAlgo(OptimizationAlgorithm.LBFGS).updater(new Nesterovs(0.01, 0.99)) .graphBuilder().addInputs("layer0In").setInputTypes(InputType.feedForward(4)) - .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In") + .addLayer("layer0", DenseLayer.builder().nIn(4).nOut(3).build(), "layer0In") .addLayer("layer1", - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), @@ -80,9 +80,9 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .updater(new RmsProp(0.2)) .graphBuilder().addInputs("layer0In") .setInputTypes(InputType.feedForward(4)) - .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In") + .addLayer("layer0", DenseLayer.builder().nIn(4).nOut(3).build(), "layer0In") .addLayer("layer1", - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), @@ -121,11 +121,11 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .activation(Activation.IDENTITY).build(); ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") - .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(5).build(), "layer0In") - .addLayer("layer1", new DenseLayer.Builder().nIn(3).nOut(2).build(), "layer0") - .addLayer("layer2", new DenseLayer.Builder().nIn(2).nOut(3).build(), "layer1") + .addLayer("layer0", DenseLayer.builder().nIn(4).nOut(5).build(), "layer0In") + .addLayer("layer1", DenseLayer.builder().nIn(3).nOut(2).build(), "layer0") + .addLayer("layer2", DenseLayer.builder().nIn(2).nOut(3).build(), "layer1") .addLayer("layer3", - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), @@ -146,11 +146,11 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { assertEquals(bl1.getWeightInit(), new WeightInitXavier()); ComputationGraph modelExpectedArch = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") - .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In") - .addLayer("layer1", new DenseLayer.Builder().nIn(3).nOut(2).build(), "layer0") - .addLayer("layer2", new DenseLayer.Builder().nIn(2).nOut(3).build(), "layer1") + .addLayer("layer0", DenseLayer.builder().nIn(4).nOut(3).build(), "layer0In") + .addLayer("layer1", DenseLayer.builder().nIn(3).nOut(2).build(), "layer0") + .addLayer("layer2", DenseLayer.builder().nIn(2).nOut(3).build(), "layer1") .addLayer("layer3", - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(2) .build(), @@ -188,11 +188,11 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .activation(Activation.IDENTITY).build(); ComputationGraph modelToFineTune = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") - .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(5).build(), "layer0In") - .addLayer("layer1", new DenseLayer.Builder().nIn(5).nOut(2).build(), "layer0") - .addLayer("layer2", new DenseLayer.Builder().nIn(2).nOut(3).build(), "layer1") + .addLayer("layer0", DenseLayer.builder().nIn(4).nOut(5).build(), "layer0In") + .addLayer("layer1", DenseLayer.builder().nIn(5).nOut(2).build(), "layer0") + .addLayer("layer2", DenseLayer.builder().nIn(2).nOut(3).build(), "layer1") .addLayer("layer3", - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), @@ -205,18 +205,18 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .nOutReplace("layer0", 7, WeightInit.XAVIER, WeightInit.XAVIER) .nOutReplace("layer2", 5, WeightInit.XAVIER).removeVertexKeepConnections("layer3") .addLayer("layer3", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(3) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(3) .activation(Activation.SOFTMAX).build(), "layer2") //.setOutputs("layer3") .build(); ComputationGraph modelExpectedArch = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") - .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(7).build(), "layer0In") - .addLayer("layer1", new DenseLayer.Builder().nIn(7).nOut(2).build(), "layer0") - .addLayer("layer2", new DenseLayer.Builder().nIn(2).nOut(5).build(), "layer1") + .addLayer("layer0", DenseLayer.builder().nIn(4).nOut(7).build(), "layer0In") + .addLayer("layer1", DenseLayer.builder().nIn(7).nOut(2).build(), "layer0") + .addLayer("layer2", DenseLayer.builder().nIn(2).nOut(5).build(), "layer1") .addLayer("layer3", - new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(5).nOut(3) .build(), @@ -257,43 +257,43 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .setInputTypes(InputType.convolutionalFlat(28, 28, 3)) .addLayer("layer0", - new ConvolutionLayer.Builder(5, 5).nIn(3) + ConvolutionLayer.builder(5, 5).nIn(3) .stride(1, 1).nOut(20) .activation(Activation.IDENTITY) .build(), "layer0In") .addLayer("layer1", - new SubsamplingLayer.Builder( + SubsamplingLayer.builder( SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2) .stride(2, 2) .build(), "layer0") .addLayer("layer2", - new ConvolutionLayer.Builder(5, 5).stride(1, 1) + ConvolutionLayer.builder(5, 5).stride(1, 1) .nOut(50) .activation(Activation.IDENTITY) .build(), "layer1") .addLayer("layer3", - new SubsamplingLayer.Builder( + SubsamplingLayer.builder( SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2) .stride(2, 2) .build(), "layer2") .addLayer("layer4", - new DenseLayer.Builder() + DenseLayer.builder() .activation(Activation.RELU) .nOut(500).build(), "layer3") .addLayer("layer5", - new DenseLayer.Builder() + DenseLayer.builder() .activation(Activation.RELU) .nOut(250).build(), "layer4") .addLayer("layer6", - new OutputLayer.Builder( + OutputLayer.builder( LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(100) .activation(Activation.SOFTMAX) @@ -313,19 +313,19 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .removeVertexAndConnections("layer5").removeVertexAndConnections("layer6") .setInputs("layer0In").setInputTypes(InputType.convolutionalFlat(28, 28, 3)) .addLayer("layer5", - new DenseLayer.Builder().activation(Activation.RELU).nIn(600) + DenseLayer.builder().activation(Activation.RELU).nIn(600) .nOut(300).build(), "layer4") .addLayer("layer6", - new DenseLayer.Builder().activation(Activation.RELU).nIn(300) + DenseLayer.builder().activation(Activation.RELU).nIn(300) .nOut(150).build(), "layer5") .addLayer("layer7", - new DenseLayer.Builder().activation(Activation.RELU).nIn(150) + DenseLayer.builder().activation(Activation.RELU).nIn(150) .nOut(50).build(), "layer6") .addLayer("layer8", - new OutputLayer.Builder( + OutputLayer.builder( LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX) .nIn(50).nOut(10).build(), @@ -336,42 +336,42 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In") .setInputTypes(InputType.convolutionalFlat(28,28, 3)) .addLayer("layer0", - new FrozenLayer(new ConvolutionLayer.Builder(5, 5).nIn(3) + FrozenLayer.builder(ConvolutionLayer.builder(5, 5).nIn(3) .stride(1, 1).nOut(20) .activation(Activation.IDENTITY).build()), "layer0In") .addLayer("layer1", - new FrozenLayer(new SubsamplingLayer.Builder( + FrozenLayer.builder(SubsamplingLayer.builder( SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(2, 2) .build()), "layer0") .addLayer("layer2", - new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50) + ConvolutionLayer.builder(5, 5).stride(1, 1).nOut(50) .activation(Activation.IDENTITY).build(), "layer1") .addLayer("layer3", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(2, 2).build(), "layer2") .addLayer("layer4", - new DenseLayer.Builder().activation(Activation.RELU).nOut(600) + DenseLayer.builder().activation(Activation.RELU).nOut(600) .build(), "layer3") .addLayer("layer5", - new DenseLayer.Builder().activation(Activation.RELU).nOut(300) + DenseLayer.builder().activation(Activation.RELU).nOut(300) .build(), "layer4") .addLayer("layer6", - new DenseLayer.Builder().activation(Activation.RELU).nOut(150) + DenseLayer.builder().activation(Activation.RELU).nOut(150) .build(), "layer5") .addLayer("layer7", - new DenseLayer.Builder().activation(Activation.RELU).nOut(50) + DenseLayer.builder().activation(Activation.RELU).nOut(50) .build(), "layer6") .addLayer("layer8", - new OutputLayer.Builder( + OutputLayer.builder( LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(10) .activation(Activation.SOFTMAX) @@ -402,12 +402,12 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new Adam(0.1)) .weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") - .addLayer("blstm1",new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10) + .addLayer("blstm1",GravesBidirectionalLSTM.builder().nIn(10).nOut(10) .activation(Activation.TANH).build(), "in") - .addLayer("pool", new GlobalPoolingLayer.Builder().build(), "blstm1") - .addLayer("dense", new DenseLayer.Builder().nIn(10).nOut(10).build(), "pool") - .addLayer("out", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.IDENTITY) + .addLayer("pool", GlobalPoolingLayer.builder().build(), "blstm1") + .addLayer("dense", DenseLayer.builder().nIn(10).nOut(10).build(), "pool") + .addLayer("out", OutputLayer.builder().nIn(10).nOut(10).activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).build(), "dense") .setOutputs("out").build(); @@ -419,7 +419,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { ComputationGraph graph = new TransferLearning.GraphBuilder(g).fineTuneConfiguration(fineTuneConfiguration) .removeVertexKeepConnections("out").setFeatureExtractor("dense") - .addLayer("out", new OutputLayer.Builder().updater(new Adam(0.1)) + .addLayer("out", OutputLayer.builder().updater(new Adam(0.1)) .weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(10).nOut(5).build(), "dense") @@ -430,12 +430,12 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") .addLayer("blstm1", - new FrozenLayer(new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10) + FrozenLayer.builder(GravesBidirectionalLSTM.builder().nIn(10).nOut(10) .activation(Activation.TANH).build()), "in") - .addLayer("pool", new FrozenLayer(new GlobalPoolingLayer.Builder().build()), "blstm1") - .addLayer("dense", new FrozenLayer(new DenseLayer.Builder().nIn(10).nOut(10).build()), "pool") - .addLayer("out", new OutputLayer.Builder().nIn(10).nOut(5).activation(Activation.SOFTMAX) + .addLayer("pool", FrozenLayer.builder(GlobalPoolingLayer.builder().build()), "blstm1") + .addLayer("dense", FrozenLayer.builder(DenseLayer.builder().nIn(10).nOut(10).build()), "pool") + .addLayer("out", OutputLayer.builder().nIn(10).nOut(5).activation(Activation.SOFTMAX) .updater(new Adam(0.1)) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "dense") .setOutputs("out").build(); @@ -459,7 +459,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .constrainWeights(new UnitNormConstraint()) .graphBuilder() .addInputs("in") - .addLayer("layer", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") + .addLayer("layer", DenseLayer.builder().nIn(10).nOut(10).build(), "in") .setOutputs("layer") .build(); @@ -479,7 +479,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { DenseLayer l = (DenseLayer) transfer.getLayer(0).getLayerConfiguration(); - assertNull(l.getIDropout()); + assertNull(l.getDropOut()); assertNull(l.getWeightNoise()); assertNull(l.getConstraints()); assertNull(TestUtils.getL2Reg(l)); @@ -500,13 +500,13 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .addInputs(inputName) .setOutputs(outputName) .setInputTypes(InputType.inferInputTypes(input)) - .addLayer(firstConv, new Convolution2D.Builder(3, 3) + .addLayer(firstConv, Convolution2D.builder(3, 3) .nOut(10) .build(), inputName) - .addLayer(secondConv, new Convolution2D.Builder(1, 1) + .addLayer(secondConv, Convolution2D.builder(1, 1) .nOut(3) .build(), firstConv) - .addLayer(outputName, new OutputLayer.Builder() + .addLayer(outputName, OutputLayer.builder() .nOut(2) .lossFunction(LossFunctions.LossFunction.MSE) .build(), secondConv) @@ -518,7 +518,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .nOutReplace(firstConv, 7, new ConstantDistribution(333)) .nOutReplace(secondConv, 3, new ConstantDistribution(111)) .removeVertexAndConnections(outputName) - .addLayer(outputName, new OutputLayer.Builder() + .addLayer(outputName, OutputLayer.builder() .nIn(48).nOut(2) .lossFunction(LossFunctions.LossFunction.MSE) .build(), new CnnToFeedForwardPreProcessor(4,4,3), secondConv) @@ -546,14 +546,14 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .addInputs(inputName) .setOutputs(outputName) .setInputTypes(InputType.inferInputTypes(input)) - .addLayer(changeNoutName, new Convolution2D.Builder(1, 1) + .addLayer(changeNoutName, Convolution2D.builder(1, 1) .nOut(10) .build(), inputName) - .addLayer(poolName, new SubsamplingLayer.Builder(1,1).build(), changeNoutName) - .addLayer(afterPoolName, new Convolution2D.Builder(1, 1) + .addLayer(poolName, SubsamplingLayer.builder(1,1).build(), changeNoutName) + .addLayer(afterPoolName, Convolution2D.builder(1, 1) .nOut(7) .build(), poolName) - .addLayer(outputName, new OutputLayer.Builder() + .addLayer(outputName, OutputLayer.builder() .activation(Activation.SOFTMAX) .nOut(2) .build(), afterPoolName) @@ -582,9 +582,9 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .graphBuilder() .addInputs("in") - .layer("l0", new LSTM.Builder().nIn(5).nOut(5).build(), "in") - .layer("l1", new RecurrentAttentionLayer.Builder().nHeads(1).headSize(5).nIn(5).nOut(5).build(), "l0") - .layer("out", new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build(), "l1") + .layer("l0", LSTM.builder().nIn(5).nOut(5).build(), "in") + .layer("l1", RecurrentAttentionLayer.builder().nHeads(1).headSize(5).nIn(5).nOut(5).build(), "l0") + .layer("out", RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build(), "l1") .setOutputs("out") .build(); @@ -598,7 +598,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { ComputationGraph cg2 = new TransferLearning.GraphBuilder(cg).removeVertexAndConnections("out") .fineTuneConfiguration(FineTuneConfiguration.builder().updater(new Adam(0.01)).build()) .removeVertexAndConnections("out") - .addLayer("newOut", new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build(), "l1") + .addLayer("newOut", RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build(), "l1") .setOutputs("newOut") .build(); @@ -628,9 +628,9 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .graphBuilder() .addInputs("in") - .layer("l0", new LSTM.Builder().nIn(5).nOut(5).build(), "in") + .layer("l0", LSTM.builder().nIn(5).nOut(5).build(), "in") .addVertex("l1", new AttentionVertex.Builder().nHeads(1).headSize(5).nInKeys(5).nInQueries(5).nInValues(5).nOut(5).build(), "l0", "l0", "l0") - .layer("out", new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build(), "l1") + .layer("out", RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build(), "l1") .setOutputs("out") .build(); @@ -644,7 +644,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { ComputationGraph cg2 = new TransferLearning.GraphBuilder(cg).removeVertexAndConnections("out") .fineTuneConfiguration(FineTuneConfiguration.builder().updater(new Adam(0.01)).build()) .removeVertexAndConnections("out") - .addLayer("newOut", new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build(), "l1") + .addLayer("newOut", RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build(), "l1") .setOutputs("newOut") .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java index cee6e2f90..17d711726 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningComplex.java @@ -57,11 +57,11 @@ public class TransferLearningComplex extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(new Adam(1e-4)) .activation(Activation.LEAKYRELU).graphBuilder().addInputs("in1", "in2") - .addLayer("A", new DenseLayer.Builder().nIn(10).nOut(9).build(), "in1") - .addLayer("B", new DenseLayer.Builder().nIn(9).nOut(8).build(), "A") - .addLayer("C", new DenseLayer.Builder().nIn(7).nOut(6).build(), "in2") - .addLayer("D", new DenseLayer.Builder().nIn(8 + 7).nOut(5).build(), "B", "C") - .addLayer("out", new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.LEAKYRELU).build(), "D") + .addLayer("A", DenseLayer.builder().nIn(10).nOut(9).build(), "in1") + .addLayer("B", DenseLayer.builder().nIn(9).nOut(8).build(), "A") + .addLayer("C", DenseLayer.builder().nIn(7).nOut(6).build(), "in2") + .addLayer("D", DenseLayer.builder().nIn(8 + 7).nOut(5).build(), "B", "C") + .addLayer("out", OutputLayer.builder().nIn(5).nOut(4).activation(Activation.LEAKYRELU).build(), "D") .setOutputs("out") .validateOutputLayerConfig(false) .build(); @@ -87,9 +87,9 @@ public class TransferLearningComplex extends BaseDL4JTest { Layer[] layers = graph2.getLayers(); for (Layer l : layers) { - String name = l.getLayerConfiguration().getLayerName(); + String name = l.getLayerConfiguration().getName(); log.info(name + "\t frozen: " + (l instanceof FrozenLayer)); - if ("C".equals(l.getLayerConfiguration().getLayerName())) { + if ("C".equals(l.getLayerConfiguration().getName())) { //Only C should be frozen in this config cFound = true; assertTrue(l instanceof FrozenLayer, name); @@ -125,11 +125,11 @@ public class TransferLearningComplex extends BaseDL4JTest { */ ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") - .addLayer("denseCentre0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "inCentre") - .addLayer("denseRight0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "inRight") + .addLayer("denseCentre0", DenseLayer.builder().nIn(2).nOut(2).build(), "inCentre") + .addLayer("denseRight0", DenseLayer.builder().nIn(2).nOut(2).build(), "inRight") .addVertex("mergeRight", new MergeVertex(), "denseCentre0", "denseRight0") .addLayer("outRight", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(4).nOut(2).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(4).nOut(2).build(), "mergeRight") .setOutputs("outRight").build(); ComputationGraph modelToTune = new ComputationGraph(conf); @@ -143,11 +143,11 @@ public class TransferLearningComplex extends BaseDL4JTest { ComputationGraphConfiguration otherConf = overallConf.graphBuilder().addInputs("denseCentre0", "inRight") - .addLayer("denseRight0", new DenseLayer.Builder().nIn(2).nOut(2).build(), + .addLayer("denseRight0", DenseLayer.builder().nIn(2).nOut(2).build(), "inRight") .addVertex("mergeRight", new MergeVertex(), "denseCentre0", "denseRight0") .addLayer("outRight", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(4) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(4) .nOut(2).build(), "mergeRight") .setOutputs("outRight").build(); @@ -206,11 +206,11 @@ public class TransferLearningComplex extends BaseDL4JTest { */ ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") - .addLayer("denseCentre0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "inCentre") - .addLayer("outCentre", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(2).nOut(2).build(),"denseCentre0") - .addLayer("denseRight0", new DenseLayer.Builder().nIn(3).nOut(2).build(), "inRight") + .addLayer("denseCentre0", DenseLayer.builder().nIn(2).nOut(2).build(), "inCentre") + .addLayer("outCentre", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(2).build(),"denseCentre0") + .addLayer("denseRight0", DenseLayer.builder().nIn(3).nOut(2).build(), "inRight") .addVertex("mergeRight", new MergeVertex(), "denseCentre0", "denseRight0") - .addLayer("outRight", new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(4).nOut(2).build(),"mergeRight") + .addLayer("outRight", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(4).nOut(2).build(),"mergeRight") .setOutputs("outCentre", "outRight").build(); ComputationGraph modelToTune = new ComputationGraph(conf); modelToTune.init(); @@ -252,11 +252,11 @@ public class TransferLearningComplex extends BaseDL4JTest { .activation(Activation.IDENTITY); ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") - .addLayer("denseCentre0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "inCentre") - .addLayer("denseRight0", new DenseLayer.Builder().nIn(2).nOut(2).build(), "inRight") + .addLayer("denseCentre0", DenseLayer.builder().nIn(2).nOut(2).build(), "inCentre") + .addLayer("denseRight0", DenseLayer.builder().nIn(2).nOut(2).build(), "inRight") .addVertex("mergeRight", new MergeVertex(), "denseCentre0", "denseRight0") .addLayer("outRight", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(4).nOut(2).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(4).nOut(2).build(), "mergeRight") .setOutputs("outRight").build(); ComputationGraph modelToTune = new ComputationGraph(conf); @@ -265,7 +265,7 @@ public class TransferLearningComplex extends BaseDL4JTest { ComputationGraph modelNow = new TransferLearning.GraphBuilder(modelToTune) .addLayer("outCentre", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(2) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2) .nOut(3).build(), "denseCentre0") .setOutputs("outRight", "outCentre").build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java index 48963619b..9b3bc0c11 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java @@ -28,7 +28,6 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.graph.SubsetVertex; import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.DenseLayer.Builder; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -71,24 +70,24 @@ public class TransferLearningHelperTest extends BaseDL4JTest { */ ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") - .addLayer("denseCentre0", new DenseLayer.Builder().nIn(10).nOut(9).build(), "inCentre") - .addLayer("denseCentre1", new DenseLayer.Builder().nIn(9).nOut(8).build(), "denseCentre0") - .addLayer("denseCentre2", new DenseLayer.Builder().nIn(8).nOut(7).build(), "denseCentre1") - .addLayer("denseCentre3", new DenseLayer.Builder().nIn(7).nOut(7).build(), "denseCentre2") + .addLayer("denseCentre0", DenseLayer.builder().nIn(10).nOut(9).build(), "inCentre") + .addLayer("denseCentre1", DenseLayer.builder().nIn(9).nOut(8).build(), "denseCentre0") + .addLayer("denseCentre2", DenseLayer.builder().nIn(8).nOut(7).build(), "denseCentre1") + .addLayer("denseCentre3", DenseLayer.builder().nIn(7).nOut(7).build(), "denseCentre2") .addLayer("outCentre", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(7).nOut(4).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(7).nOut(4).build(), "denseCentre3") .addVertex("subsetLeft", new SubsetVertex(0, 3), "denseCentre1") - .addLayer("denseLeft0", new DenseLayer.Builder().nIn(4).nOut(5).build(), "subsetLeft") + .addLayer("denseLeft0", DenseLayer.builder().nIn(4).nOut(5).build(), "subsetLeft") .addLayer("outLeft", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(5).nOut(6).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5).nOut(6).build(), "denseLeft0") - .addLayer("denseRight", new DenseLayer.Builder().nIn(7).nOut(7).build(), "denseCentre2") - .addLayer("denseRight0", new DenseLayer.Builder().nIn(2).nOut(3).build(), "inRight") + .addLayer("denseRight", DenseLayer.builder().nIn(7).nOut(7).build(), "denseCentre2") + .addLayer("denseRight0", DenseLayer.builder().nIn(2).nOut(3).build(), "inRight") .addVertex("mergeRight", new MergeVertex(), "denseRight", "denseRight0") - .addLayer("denseRight1", new DenseLayer.Builder().nIn(10).nOut(5).build(), "mergeRight") + .addLayer("denseRight1", DenseLayer.builder().nIn(10).nOut(5).build(), "mergeRight") .addLayer("outRight", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(5).nOut(5).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5).nOut(5).build(), "denseRight1") .setOutputs("outLeft", "outCentre", "outRight").build(); @@ -101,28 +100,28 @@ public class TransferLearningHelperTest extends BaseDL4JTest { ComputationGraphConfiguration expectedConf = overallConf.graphBuilder().addInputs("denseCentre1", "denseCentre2", "inRight") //inputs are in sorted order - .addLayer("denseCentre3", new DenseLayer.Builder().nIn(7).nOut(7).build(), + .addLayer("denseCentre3", DenseLayer.builder().nIn(7).nOut(7).build(), "denseCentre2") .addLayer("outCentre", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(7) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(7) .nOut(4).build(), "denseCentre3") .addVertex("subsetLeft", new SubsetVertex(0, 3), "denseCentre1") - .addLayer("denseLeft0", new DenseLayer.Builder().nIn(4).nOut(5).build(), + .addLayer("denseLeft0", DenseLayer.builder().nIn(4).nOut(5).build(), "subsetLeft") .addLayer("outLeft", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(5) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5) .nOut(6).build(), "denseLeft0") - .addLayer("denseRight", new DenseLayer.Builder().nIn(7).nOut(7).build(), + .addLayer("denseRight", DenseLayer.builder().nIn(7).nOut(7).build(), "denseCentre2") - .addLayer("denseRight0", new DenseLayer.Builder().nIn(2).nOut(3).build(), + .addLayer("denseRight0", DenseLayer.builder().nIn(2).nOut(3).build(), "inRight") .addVertex("mergeRight", new MergeVertex(), "denseRight", "denseRight0") - .addLayer("denseRight1", new DenseLayer.Builder().nIn(10).nOut(5).build(), + .addLayer("denseRight1", DenseLayer.builder().nIn(10).nOut(5).build(), "mergeRight") .addLayer("outRight", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(5) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5) .nOut(5).build(), "denseRight1") .setOutputs("outLeft", "outCentre", "outRight").build(); @@ -139,24 +138,24 @@ public class TransferLearningHelperTest extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT); ComputationGraphConfiguration conf = overallConf.graphBuilder().addInputs("inCentre", "inRight") - .addLayer("denseCentre0", new DenseLayer.Builder().nIn(10).nOut(9).build(), "inCentre") - .addLayer("denseCentre1", new DenseLayer.Builder().nIn(9).nOut(8).build(), "denseCentre0") - .addLayer("denseCentre2", new DenseLayer.Builder().nIn(8).nOut(7).build(), "denseCentre1") - .addLayer("denseCentre3", new DenseLayer.Builder().nIn(7).nOut(7).build(), "denseCentre2") + .addLayer("denseCentre0", DenseLayer.builder().nIn(10).nOut(9).build(), "inCentre") + .addLayer("denseCentre1", DenseLayer.builder().nIn(9).nOut(8).build(), "denseCentre0") + .addLayer("denseCentre2", DenseLayer.builder().nIn(8).nOut(7).build(), "denseCentre1") + .addLayer("denseCentre3", DenseLayer.builder().nIn(7).nOut(7).build(), "denseCentre2") .addLayer("outCentre", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(7).nOut(4).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(7).nOut(4).build(), "denseCentre3") .addVertex("subsetLeft", new SubsetVertex(0, 3), "denseCentre1") - .addLayer("denseLeft0", new DenseLayer.Builder().nIn(4).nOut(5).build(), "subsetLeft") + .addLayer("denseLeft0", DenseLayer.builder().nIn(4).nOut(5).build(), "subsetLeft") .addLayer("outLeft", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(5).nOut(6).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5).nOut(6).build(), "denseLeft0") - .addLayer("denseRight", new DenseLayer.Builder().nIn(7).nOut(7).build(), "denseCentre2") - .addLayer("denseRight0", new DenseLayer.Builder().nIn(2).nOut(3).build(), "inRight") + .addLayer("denseRight", DenseLayer.builder().nIn(7).nOut(7).build(), "denseCentre2") + .addLayer("denseRight0", DenseLayer.builder().nIn(2).nOut(3).build(), "inRight") .addVertex("mergeRight", new MergeVertex(), "denseRight", "denseRight0") - .addLayer("denseRight1", new DenseLayer.Builder().nIn(10).nOut(5).build(), "mergeRight") + .addLayer("denseRight1", DenseLayer.builder().nIn(10).nOut(5).build(), "mergeRight") .addLayer("outRight", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(5).nOut(5).build(), + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5).nOut(5).build(), "denseRight1") .setOutputs("outLeft", "outCentre", "outRight").build(); @@ -214,10 +213,10 @@ public class TransferLearningHelperTest extends BaseDL4JTest { MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( (NeuralNetConfiguration) overallConf.clone() - .layer(0, new Builder().nIn(4).nOut(3).build()) - .layer(1, new Builder().nIn(3).nOut(2).build()) - .layer(2, new Builder().nIn(2).nOut(3).build()) - .layer(3, new OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).build()) + .layer(2, DenseLayer.builder().nIn(2).nOut(3).build()) + .layer(3, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); @@ -233,8 +232,8 @@ public class TransferLearningHelperTest extends BaseDL4JTest { Nd4j.hstack(modelToFineTune.getLayer(2).getParams(), modelToFineTune.getLayer(3).getParams()); MultiLayerNetwork notFrozen = new MultiLayerNetwork( (NeuralNetConfiguration) overallConf.clone().list() - .layer(0, new Builder().nIn(2).nOut(3).build()) - .layer(1, new OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(2).nOut(3).build()) + .layer(1, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build(), paramsLastTwoLayers); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java index 7d10a3bc7..1776d0d70 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java @@ -32,7 +32,7 @@ import org.deeplearning4j.nn.conf.distribution.ConstantDistribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; -import org.deeplearning4j.nn.conf.layers.DenseLayer.Builder; + import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor; @@ -74,8 +74,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( (NeuralNetConfiguration) confToChange.list() - .layer(0, new Builder().nIn(4).nOut(3).build()) - .layer(1, new OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); @@ -101,8 +101,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .updater(new RmsProp(0.5)).l2(0.4); MultiLayerNetwork expectedModel = new MultiLayerNetwork((NeuralNetConfiguration) confSet.list() - .layer(0, new Builder().nIn(4).nOut(3).build()) - .layer(1, new OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); @@ -135,10 +135,10 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .build(); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(equivalentConf - .layer(0, new DenseLayer.Builder().nIn(4).nOut(5).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()) - .layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(4).nOut(5).build()) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).build()) + .layer(2, DenseLayer.builder().nIn(2).nOut(3).build()) + .layer(3, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); @@ -148,10 +148,10 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .nOutReplace(0, 3, WeightInit.XAVIER, new NormalDistribution(1, 1e-1)).build(); MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(equivalentConf - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(2).build()) - .layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).build()) + .layer(2, DenseLayer.builder().nIn(2).nOut(3).build()) + .layer(3, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(2) .build()) .build()); @@ -197,10 +197,10 @@ public class TransferLearningMLNTest extends BaseDL4JTest { FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(//overallConf.list() - equivalentConf.list().layer(0, new DenseLayer.Builder().nIn(4).nOut(5).build()) - .layer(1, new DenseLayer.Builder().nIn(5).nOut(2).build()) - .layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + equivalentConf.list().layer(0, DenseLayer.builder().nIn(4).nOut(5).build()) + .layer(1, DenseLayer.builder().nIn(5).nOut(2).build()) + .layer(2, DenseLayer.builder().nIn(2).nOut(3).build()) + .layer(3, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) @@ -211,16 +211,16 @@ public class TransferLearningMLNTest extends BaseDL4JTest { new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(overallConf) .nOutReplace(0, 7, WeightInit.XAVIER, WeightInit.XAVIER) .nOutReplace(2, 5, WeightInit.XAVIER).removeOutputLayer() - .addLayer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5) + .addLayer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5) .nOut(3).updater(new Sgd(0.5)).activation(Activation.SOFTMAX) .build()) .build(); MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(equivalentConf.list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(7).build()) - .layer(1, new DenseLayer.Builder().nIn(7).nOut(2).build()) - .layer(2, new DenseLayer.Builder().nIn(2).nOut(5).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(4).nOut(7).build()) + .layer(1, DenseLayer.builder().nIn(7).nOut(2).build()) + .layer(2, DenseLayer.builder().nIn(2).nOut(5).build()) + .layer(3, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) .updater(new Sgd(0.5)).nIn(5).nOut(3).build()) .build()); @@ -254,23 +254,23 @@ public class TransferLearningMLNTest extends BaseDL4JTest { NeuralNetConfiguration.builder().seed(12345).l2(0.001) //l2 regularization on all layers .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new AdaGrad(0.4)).list() - .layer(0, new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB + .layer(0, ConvolutionLayer.builder(10, 10).nIn(3) //3 channels: RGB .nOut(30).stride(4, 4).activation(Activation.RELU).weightInit( WeightInit.RELU).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(3, 3).stride(2, 2).build()) //(31-3+0)/2+1 = 15 - .layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2) + .layer(2, ConvolutionLayer.builder(3, 3).nIn(30).nOut(10).stride(2, 2) .activation(Activation.RELU).weightInit(WeightInit.RELU) .build()) //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490 - .layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50) + .layer(3, DenseLayer.builder().activation(Activation.RELU).nIn(490).nOut(50) .weightInit(WeightInit.RELU).updater(new AdaGrad(0.5)) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10).build()) - .layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50) + .layer(4, GravesLSTM.builder().activation(Activation.SOFTSIGN).nIn(50) .nOut(50).weightInit(WeightInit.XAVIER).updater(new AdaGrad(0.6)) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10).build()) - .layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(5, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line .weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) @@ -288,32 +288,32 @@ public class TransferLearningMLNTest extends BaseDL4JTest { NeuralNetConfiguration.builder().seed(12345) .updater(new RmsProp(0.1)) .list() - .layer(0, new ConvolutionLayer.Builder(10, 10) //Only keep the first layer the same + .layer(0, ConvolutionLayer.builder(10, 10) //Only keep the first layer the same .nIn(3) //3 channels: RGB .nOut(30).stride(4, 4) .activation(Activation.RELU) .weightInit(WeightInit.RELU) .updater(new AdaGrad(0.1)).build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 - .layer(1, new SubsamplingLayer.Builder( + .layer(1, SubsamplingLayer.builder( SubsamplingLayer.PoolingType.MAX) //change kernel size .kernelSize(5, 5).stride(2, 2) .build()) //(31-5+0)/2+1 = 14 - .layer(2, new ConvolutionLayer.Builder(6, 6) //change here + .layer(2, ConvolutionLayer.builder(6, 6) //change here .nIn(30).nOut(10).stride(2, 2) .activation(Activation.RELU) .weightInit(WeightInit.RELU).build()) //Output: (14-6+0)/2+1 = 5 -> 5*5*10 = 250 - .layer(3, new DenseLayer.Builder() //change here + .layer(3, DenseLayer.builder() //change here .activation(Activation.RELU).nIn(250).nOut(50) .weightInit(WeightInit.RELU) .gradientNormalization( GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10) .updater(new RmsProp(0.01)).build()) - .layer(4, new GravesLSTM.Builder() //change here + .layer(4, GravesLSTM.builder() //change here .activation(Activation.SOFTSIGN).nIn(50) .nOut(25).weightInit(WeightInit.XAVIER) .build()) - .layer(5, new RnnOutputLayer.Builder( + .layer(5, RnnOutputLayer.builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nIn(25).nOut(4) @@ -336,19 +336,19 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .updater(new AdaGrad(0.4)) .weightInit(WeightInit.RELU).build()) .removeLayersFromOutput(5) - .addLayer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3) + .addLayer(SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3) .stride(2, 2).build()) - .addLayer(new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2) + .addLayer(ConvolutionLayer.builder(3, 3).nIn(30).nOut(10).stride(2, 2) .activation(Activation.RELU).weightInit(WeightInit.RELU).build()) - .addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50) + .addLayer(DenseLayer.builder().activation(Activation.RELU).nIn(490).nOut(50) .weightInit(WeightInit.RELU).updater(new AdaGrad(0.5)) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10).build()) - .addLayer(new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50) + .addLayer(GravesLSTM.builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50) .weightInit(WeightInit.XAVIER).updater(new AdaGrad(0.6)) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10).build()) - .addLayer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer(RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line .weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) @@ -392,25 +392,25 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) .list() - .layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).stride(1, 1) + .layer(0, ConvolutionLayer.builder(5, 5).nIn(3).stride(1, 1) .nOut(20).activation(Activation.IDENTITY) .build()) - .layer(1, new SubsamplingLayer.Builder( + .layer(1, SubsamplingLayer.builder( SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(2, 2) .build()) - .layer(2, new ConvolutionLayer.Builder(5, 5).stride(1, 1) + .layer(2, ConvolutionLayer.builder(5, 5).stride(1, 1) .nOut(50).activation(Activation.IDENTITY) .build()) - .layer(3, new SubsamplingLayer.Builder( + .layer(3, SubsamplingLayer.builder( SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(2, 2) .build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU) + .layer(4, DenseLayer.builder().activation(Activation.RELU) .nOut(500).build()) - .layer(5, new DenseLayer.Builder().activation(Activation.RELU) + .layer(5, DenseLayer.builder().activation(Activation.RELU) .nOut(250).build()) - .layer(6, new OutputLayer.Builder( + .layer(6, OutputLayer.builder( LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(100) .activation(Activation.SOFTMAX) @@ -428,23 +428,23 @@ public class TransferLearningMLNTest extends BaseDL4JTest { MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(overallConf) .setFeatureExtractor(1).nOutReplace(4, 600, WeightInit.XAVIER).removeLayersFromOutput(2) - .addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(600).nOut(300).build()) - .addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(300).nOut(150).build()) - .addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(150).nOut(50).build()) - .addLayer(new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .addLayer(DenseLayer.builder().activation(Activation.RELU).nIn(600).nOut(300).build()) + .addLayer(DenseLayer.builder().activation(Activation.RELU).nIn(300).nOut(150).build()) + .addLayer(DenseLayer.builder().activation(Activation.RELU).nIn(150).nOut(50).build()) + .addLayer(OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX).nIn(50).nOut(10).build()) .build(); MultiLayerNetwork notFrozen = new MultiLayerNetwork(equivalentConf.list() - .layer(0, new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50) + .layer(0, ConvolutionLayer.builder(5, 5).stride(1, 1).nOut(50) .activation(Activation.IDENTITY).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(2, new DenseLayer.Builder().activation(Activation.RELU).nOut(600).build()) - .layer(3, new DenseLayer.Builder().activation(Activation.RELU).nOut(300).build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(150).build()) - .layer(5, new DenseLayer.Builder().activation(Activation.RELU).nOut(50).build()) - .layer(6, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10) + .layer(2, DenseLayer.builder().activation(Activation.RELU).nOut(600).build()) + .layer(3, DenseLayer.builder().activation(Activation.RELU).nOut(300).build()) + .layer(4, DenseLayer.builder().activation(Activation.RELU).nOut(150).build()) + .layer(5, DenseLayer.builder().activation(Activation.RELU).nOut(50).build()) + .layer(6, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10) .activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(12, 12, 20)).build()); notFrozen.init(); @@ -487,8 +487,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { NeuralNetConfiguration.builder().updater(new Adam(1e-4)) .activation(Activation.TANH).weightInit(WeightInit.RELU) .l1(0.1).l2(0.2).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(5).build()).layer(1, - new OutputLayer.Builder().nIn(5).nOut(4) + .layer(0, DenseLayer.builder().nIn(10).nOut(5).build()).layer(1, + OutputLayer.builder().nIn(5).nOut(4) .activation(Activation.HARDSIGMOID).build()) .build(); @@ -544,17 +544,17 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) .list() - .layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).stride(1, 1) + .layer(0, ConvolutionLayer.builder(5, 5).nIn(3).stride(1, 1) .nOut(20).activation(Activation.IDENTITY).build()) - .layer(1, new SubsamplingLayer.Builder(PoolingType.MAX) + .layer(1, SubsamplingLayer.builder(PoolingType.MAX) .kernelSize(2, 2).stride(2, 2).build()) - .layer(2, new ConvolutionLayer.Builder(5, 5).stride(1, 1) + .layer(2, ConvolutionLayer.builder(5, 5).stride(1, 1) .nOut(50).activation(Activation.IDENTITY).build()) - .layer(3, new SubsamplingLayer.Builder(PoolingType.MAX) + .layer(3, SubsamplingLayer.builder(PoolingType.MAX) .kernelSize(2, 2).stride(2, 2).build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) - .layer(5, new DenseLayer.Builder().activation(Activation.RELU).nOut(250).build()) - .layer(6, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(4, DenseLayer.builder().activation(Activation.RELU).nOut(500).build()) + .layer(5, DenseLayer.builder().activation(Activation.RELU).nOut(250).build()) + .layer(6, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(100).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, 3)) //See note below .build()); @@ -566,21 +566,21 @@ public class TransferLearningMLNTest extends BaseDL4JTest { MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(overallConf) .setFeatureExtractor(1).removeLayersFromOutput(5) - .addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(12 * 12 * 20).nOut(300) + .addLayer(DenseLayer.builder().activation(Activation.RELU).nIn(12 * 12 * 20).nOut(300) .build()) - .addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(300).nOut(150).build()) - .addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(150).nOut(50).build()) - .addLayer(new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .addLayer(DenseLayer.builder().activation(Activation.RELU).nIn(300).nOut(150).build()) + .addLayer(DenseLayer.builder().activation(Activation.RELU).nIn(150).nOut(50).build()) + .addLayer(OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX).nIn(50).nOut(10).build()) .setInputPreProcessor(2, new CnnToFeedForwardPreProcessor(12, 12, 20)).build(); MultiLayerNetwork notFrozen = new MultiLayerNetwork(equivalentConf.list() - .layer(0, new DenseLayer.Builder().activation(Activation.RELU).nIn(12 * 12 * 20).nOut(300) + .layer(0, DenseLayer.builder().activation(Activation.RELU).nIn(12 * 12 * 20).nOut(300) .build()) - .layer(1, new DenseLayer.Builder().activation(Activation.RELU).nIn(300).nOut(150).build()) - .layer(2, new DenseLayer.Builder().activation(Activation.RELU).nIn(150).nOut(50).build()) - .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(50) + .layer(1, DenseLayer.builder().activation(Activation.RELU).nIn(300).nOut(150).build()) + .layer(2, DenseLayer.builder().activation(Activation.RELU).nIn(150).nOut(50).build()) + .layer(3, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(50) .nOut(10).activation(Activation.SOFTMAX).build()) .inputPreProcessor(0, new CnnToFeedForwardPreProcessor(12, 12, 20)) .build()); @@ -618,7 +618,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .l2(0.5) .constrainWeights(new UnitNormConstraint()) .list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) .build(); MultiLayerNetwork orig = new MultiLayerNetwork(conf); @@ -637,7 +637,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { DenseLayer l = (DenseLayer) transfer.getLayer(0).getLayerConfiguration(); - assertNull(l.getIDropout()); + assertNull(l.getDropOut()); assertNull(l.getWeightNoise()); assertNull(l.getConstraints()); assertNull(TestUtils.getL2Reg(l)); @@ -651,9 +651,9 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .weightInit(new ConstantDistribution(666)) .list() .inputType(InputType.inferInputTypes(input)[0]) - .layer(new Convolution2D.Builder(3, 3).nOut(10).build()) - .layer(new Convolution2D.Builder(1, 1).nOut(3).build()) - .layer(new OutputLayer.Builder().nOut(2).lossFunction(LossFunctions.LossFunction.MSE) + .layer(Convolution2D.builder(3, 3).nOut(10).build()) + .layer(Convolution2D.builder(1, 1).nOut(3).build()) + .layer(OutputLayer.builder().nOut(2).lossFunction(LossFunctions.LossFunction.MSE) .build()).build()); net.init(); @@ -663,7 +663,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .nOutReplace(0, 7, new ConstantDistribution(333)) .nOutReplace(1, 3, new ConstantDistribution(111)) .removeLayersFromOutput(1) - .addLayer(new OutputLayer.Builder() + .addLayer(OutputLayer.builder() .nIn(48).nOut(2) .lossFunction(LossFunctions.LossFunction.MSE) .build()) @@ -682,10 +682,10 @@ public class TransferLearningMLNTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork( NeuralNetConfiguration.builder() .list() .inputType(InputType.inferInputTypes(input)[0]) - .layer(new Convolution2D.Builder(1, 1).nOut(10).build()) - .layer(new SubsamplingLayer.Builder(1,1).build()) - .layer(new Convolution2D.Builder(1, 1).nOut(7).build()) - .layer(new OutputLayer.Builder().activation(Activation.SOFTMAX).nOut(2).build()) + .layer(Convolution2D.builder(1, 1).nOut(10).build()) + .layer(SubsamplingLayer.builder(1,1).build()) + .layer(Convolution2D.builder(1, 1).nOut(7).build()) + .layer(OutputLayer.builder().activation(Activation.SOFTMAX).nOut(2).build()) .build()); net.init(); @@ -711,10 +711,10 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .updater(new Adam(0.01)) .weightInit(WeightInit.XAVIER) .list() - .layer(new LSTM.Builder().nOut(8).build()) - .layer( new SelfAttentionLayer.Builder().nOut(4).nHeads(2).projectInput(true).build()) - .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) - .layer(new OutputLayer.Builder().nOut(2).activation(Activation.SOFTMAX) + .layer(LSTM.builder().nOut(8).build()) + .layer( SelfAttentionLayer.builder().nOut(4).nHeads(2).projectInput(true).build()) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build()) + .layer(OutputLayer.builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.recurrent(4)) .build(); @@ -728,7 +728,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { MultiLayerNetwork net2 = new TransferLearning.Builder(net) .fineTuneConfiguration(FineTuneConfiguration.builder().updater(new Adam(0.01)).build()) .removeLayersFromOutput(1) - .addLayer(new OutputLayer.Builder().nIn(4).nOut(2).activation(Activation.SOFTMAX) + .addLayer(OutputLayer.builder().nIn(4).nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java index 54d3a3174..05bc7e796 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestGradientNormalization.java @@ -47,7 +47,7 @@ public class TestGradientNormalization extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new DenseLayer.Builder().nIn(10).nOut(20) + .layer(DenseLayer.builder().nIn(10).nOut(20) .updater(new NoOp()) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build()) .build(); @@ -93,7 +93,7 @@ public class TestGradientNormalization extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new DenseLayer.Builder().nIn(10).nOut(20) + .layer(DenseLayer.builder().nIn(10).nOut(20) .updater(new NoOp()) .gradientNormalization(GradientNormalization.RenormalizeL2PerParamType).build()) .build(); @@ -126,7 +126,7 @@ public class TestGradientNormalization extends BaseDL4JTest { double threshold = 3; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer( - new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp()) + DenseLayer.builder().nIn(10).nOut(20).updater(new NoOp()) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(threshold).build()) .build(); @@ -182,7 +182,7 @@ public class TestGradientNormalization extends BaseDL4JTest { //t=1: large -> clipping NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer( - new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp()) + DenseLayer.builder().nIn(10).nOut(20).updater(new NoOp()) .gradientNormalization(GradientNormalization.ClipL2PerLayer) .gradientNormalizationThreshold(threshold).build()) .build(); @@ -237,7 +237,7 @@ public class TestGradientNormalization extends BaseDL4JTest { double threshold = 3; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer( - new DenseLayer.Builder().nIn(10).nOut(20).updater(new NoOp()) + DenseLayer.builder().nIn(10).nOut(20).updater(new NoOp()) .gradientNormalization(GradientNormalization.ClipL2PerParamType) .gradientNormalizationThreshold(threshold).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java index ce7d713dc..0e2a77eff 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java @@ -90,7 +90,7 @@ public class TestUpdaters extends BaseDL4JTest { double rho = 0.85; NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) + .layer(DenseLayer.builder().nIn(nIn).nOut(nOut) .updater(new AdaDelta(rho, Nd4j.EPS_THRESHOLD)) .build()) .build(); @@ -158,7 +158,7 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new AdaGrad(lr)) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) + .layer(DenseLayer.builder().nIn(nIn).nOut(nOut).build()) .build(); long numParams = conf.getFirstLayer().initializer().numParams(conf); @@ -202,7 +202,7 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Adam(lr, beta1, beta2, Adam.DEFAULT_ADAM_EPSILON)) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) + .layer(DenseLayer.builder().nIn(nIn).nOut(nOut).build()) .build(); long numParams = conf.getFirstLayer().initializer().numParams(conf); @@ -262,7 +262,7 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut) + .layer(DenseLayer.builder().nIn(nIn).nOut(nOut) .updater(Nadam.builder().learningRate(lr).beta1(beta1) .beta2(beta2).epsilon(epsilon).build()) .build()) @@ -355,7 +355,7 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new AdaMax(lr, beta1, beta2, AdaMax.DEFAULT_ADAMAX_EPSILON)) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) + .layer(DenseLayer.builder().nIn(nIn).nOut(nOut).build()) .build(); long numParams = conf.getFirstLayer().initializer().numParams(conf); @@ -411,7 +411,7 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Nesterovs(lr, mu)) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) + .layer(DenseLayer.builder().nIn(nIn).nOut(nOut).build()) .build(); long numParams = conf.getFirstLayer().initializer().numParams(conf); @@ -458,7 +458,7 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp(lr,rmsDecay, RmsProp.DEFAULT_RMSPROP_EPSILON)) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) + .layer(DenseLayer.builder().nIn(nIn).nOut(nOut).build()) .build(); long numParams = conf.getFirstLayer().initializer().numParams(conf); @@ -505,7 +505,7 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(lr)) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) + .layer(DenseLayer.builder().nIn(nIn).nOut(nOut).build()) .build(); long numParams = conf.getFirstLayer().initializer().numParams(conf); @@ -539,7 +539,7 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new NoOp()) - .layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).build()) + .layer(DenseLayer.builder().nIn(nIn).nOut(nOut).build()) .build(); long numParams = conf.getFirstLayer().initializer().numParams(conf); @@ -575,12 +575,12 @@ public class TestUpdaters extends BaseDL4JTest { double lr = 0.03; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(5).updater(new Sgd(lr)).build()) - .layer(1, new DenseLayer.Builder().nIn(5).nOut(6) + .layer(0, DenseLayer.builder().nIn(4).nOut(5).updater(new Sgd(lr)).build()) + .layer(1, DenseLayer.builder().nIn(5).nOut(6) .updater(new NoOp()).build()) - .layer(2, new DenseLayer.Builder().nIn(6).nOut(7) + .layer(2, DenseLayer.builder().nIn(6).nOut(7) .updater(new AdaGrad(lr)).build()) - .layer(3, new OutputLayer.Builder().nIn(7).nOut(8) + .layer(3, OutputLayer.builder().nIn(7).nOut(8) .updater(new Nesterovs(0.6)) .activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE) .build()) @@ -676,13 +676,13 @@ public class TestUpdaters extends BaseDL4JTest { int nOut = 8; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Nesterovs(lr,0.6)).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(5) .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .layer(1, new DenseLayer.Builder().nIn(5).nOut(6) + .layer(1, DenseLayer.builder().nIn(5).nOut(6) .updater(new NoOp()).build()) - .layer(2, new DenseLayer.Builder().nIn(6).nOut(7) + .layer(2, DenseLayer.builder().nIn(6).nOut(7) .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()) - .layer(3, new OutputLayer.Builder().nIn(7).nOut(nOut).activation(Activation.SOFTMAX) + .layer(3, OutputLayer.builder().nIn(7).nOut(nOut).activation(Activation.SOFTMAX) .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()) .build(); @@ -707,13 +707,13 @@ public class TestUpdaters extends BaseDL4JTest { int nOut = 8; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Nesterovs(lr,0.6)).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(5) .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) - .layer(1, new DenseLayer.Builder().nIn(5).nOut(6) + .layer(1, DenseLayer.builder().nIn(5).nOut(6) .updater(new NoOp()).build()) - .layer(2, new DenseLayer.Builder().nIn(6).nOut(7) + .layer(2, DenseLayer.builder().nIn(6).nOut(7) .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()) - .layer(3, new OutputLayer.Builder().nIn(7).nOut(nOut).activation(Activation.SOFTMAX) + .layer(3, OutputLayer.builder().nIn(7).nOut(nOut).activation(Activation.SOFTMAX) .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()) .build(); @@ -744,7 +744,7 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(lr)).seed(42) - .layer(new AutoEncoder.Builder() + .layer(AutoEncoder.builder() .lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY) .activation(Activation.IDENTITY).nIn(nIn).nOut(nOut).build()) .build(); @@ -808,16 +808,16 @@ public class TestUpdaters extends BaseDL4JTest { List blocks; if (i == 0) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).name("l0") + .layer(0, DenseLayer.builder().nIn(10).nOut(10).name("l0") .updater(new Adam(0.5)).build()) - .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).name("l1") + .layer(1, DenseLayer.builder().nIn(10).nOut(10).name("l1") .updater(new Adam(0.5)).biasUpdater(new Adam(0.25)) .build()) - .layer(2, new DenseLayer.Builder().nIn(10).nOut(10).name("l2") + .layer(2, DenseLayer.builder().nIn(10).nOut(10).name("l2") .updater(new AdaDelta()).build()) - .layer(3, new DenseLayer.Builder().nIn(10).nOut(10).name("l3") + .layer(3, DenseLayer.builder().nIn(10).nOut(10).name("l3") .updater(new AdaGrad(0.5)).build()) - .layer(4, new OutputLayer.Builder().nIn(10).nOut(10).name("l4").activation(Activation.SOFTMAX) + .layer(4, OutputLayer.builder().nIn(10).nOut(10).name("l4").activation(Activation.SOFTMAX) .updater(new AdaMax(0.5)).build()) .build(); @@ -829,16 +829,16 @@ public class TestUpdaters extends BaseDL4JTest { } else { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder().addInputs("in") - .addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10) + .addLayer("l0", DenseLayer.builder().nIn(10).nOut(10) .updater(new Adam(0.5)).build(), "in") - .addLayer("l1", new DenseLayer.Builder().nIn(10).nOut(10) + .addLayer("l1", DenseLayer.builder().nIn(10).nOut(10) .updater(new Adam(0.5)).biasUpdater(new Adam(0.25)) .build(), "l0") - .addLayer("l2", new DenseLayer.Builder().nIn(10).nOut(10) + .addLayer("l2", DenseLayer.builder().nIn(10).nOut(10) .updater(new AdaDelta()).build(), "l1") - .addLayer("l3", new DenseLayer.Builder().nIn(10).nOut(10) + .addLayer("l3", DenseLayer.builder().nIn(10).nOut(10) .updater(new AdaGrad(0.5)).build(), "l2") - .addLayer("l4", new OutputLayer.Builder().nIn(10).nOut(10) + .addLayer("l4", OutputLayer.builder().nIn(10).nOut(10) .activation(Activation.SOFTMAX) .updater(new AdaMax(0.5)).build(), "l3") .setOutputs("l4").build(); @@ -859,11 +859,11 @@ public class TestUpdaters extends BaseDL4JTest { //Check first updater block: UpdaterBlock ub0 = blocks.get(0); assertEquals(3, ub0.getLayersAndVariablesInBlock().size()); - assertEquals("l0", ub0.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getLayerName()); + assertEquals("l0", ub0.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getName()); assertEquals(DefaultParamInitializer.WEIGHT_KEY, ub0.getLayersAndVariablesInBlock().get(0).getParamName()); - assertEquals("l0", ub0.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getLayerName()); + assertEquals("l0", ub0.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getName()); assertEquals(DefaultParamInitializer.BIAS_KEY, ub0.getLayersAndVariablesInBlock().get(1).getParamName()); - assertEquals("l1", ub0.getLayersAndVariablesInBlock().get(2).getLayer().getTrainingConfig().getLayerName()); + assertEquals("l1", ub0.getLayersAndVariablesInBlock().get(2).getLayer().getTrainingConfig().getName()); assertEquals(DefaultParamInitializer.WEIGHT_KEY, ub0.getLayersAndVariablesInBlock().get(2).getParamName()); int nParams0 = 10 * 10 + 10 + 10 * 10; @@ -876,7 +876,7 @@ public class TestUpdaters extends BaseDL4JTest { //Check second updater block: UpdaterBlock ub1 = blocks.get(1); assertEquals(1, ub1.getLayersAndVariablesInBlock().size()); - assertEquals("l1", ub1.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getLayerName()); + assertEquals("l1", ub1.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getName()); assertEquals(DefaultParamInitializer.BIAS_KEY, ub1.getLayersAndVariablesInBlock().get(0).getParamName()); int nParams1 = 10; @@ -889,9 +889,9 @@ public class TestUpdaters extends BaseDL4JTest { //Check third updater block: UpdaterBlock ub2 = blocks.get(2); assertEquals(2, ub2.getLayersAndVariablesInBlock().size()); - assertEquals("l2", ub2.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getLayerName()); + assertEquals("l2", ub2.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getName()); assertEquals(DefaultParamInitializer.WEIGHT_KEY, ub2.getLayersAndVariablesInBlock().get(0).getParamName()); - assertEquals("l2", ub2.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getLayerName()); + assertEquals("l2", ub2.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getName()); assertEquals(DefaultParamInitializer.BIAS_KEY, ub2.getLayersAndVariablesInBlock().get(1).getParamName()); int nParams2 = 10 * 10 + 10; @@ -904,9 +904,9 @@ public class TestUpdaters extends BaseDL4JTest { //Check fourth updater block: UpdaterBlock ub3 = blocks.get(3); assertEquals(2, ub3.getLayersAndVariablesInBlock().size()); - assertEquals("l3", ub3.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getLayerName()); + assertEquals("l3", ub3.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getName()); assertEquals(DefaultParamInitializer.WEIGHT_KEY, ub3.getLayersAndVariablesInBlock().get(0).getParamName()); - assertEquals("l3", ub3.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getLayerName()); + assertEquals("l3", ub3.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getName()); assertEquals(DefaultParamInitializer.BIAS_KEY, ub3.getLayersAndVariablesInBlock().get(1).getParamName()); int nParams3 = 10 * 10 + 10; @@ -919,9 +919,9 @@ public class TestUpdaters extends BaseDL4JTest { //Check fifth updater black UpdaterBlock ub4 = blocks.get(4); assertEquals(2, ub4.getLayersAndVariablesInBlock().size()); - assertEquals("l4", ub4.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getLayerName()); + assertEquals("l4", ub4.getLayersAndVariablesInBlock().get(0).getLayer().getTrainingConfig().getName()); assertEquals(DefaultParamInitializer.WEIGHT_KEY, ub4.getLayersAndVariablesInBlock().get(0).getParamName()); - assertEquals("l4", ub4.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getLayerName()); + assertEquals("l4", ub4.getLayersAndVariablesInBlock().get(1).getLayer().getTrainingConfig().getName()); assertEquals(DefaultParamInitializer.BIAS_KEY, ub4.getLayersAndVariablesInBlock().get(1).getParamName()); int nParams4 = 10 * 10 + 10; @@ -944,7 +944,7 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration.builder() .updater(new Adam(0.5)) .weightInit(WeightInit.NORMAL) - .layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(12) + .layer(0, VariationalAutoencoder.builder().nIn(8).nOut(12) .encoderLayerSizes(10, 11).decoderLayerSizes(13, 14).build()) .build(); @@ -985,9 +985,9 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -1012,11 +1012,11 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new DenseLayer.Builder().nIn(10).nOut(9).build()) - .layer(new BatchNormalization.Builder().nOut(9).build()) - .layer(new DenseLayer.Builder().nIn(9).nOut(8).build()) - .layer(new BatchNormalization.Builder().nOut(8).build()) - .layer(new OutputLayer.Builder().nIn(8).nOut(7).activation(Activation.SOFTMAX).build()) + .layer(DenseLayer.builder().nIn(10).nOut(9).build()) + .layer(BatchNormalization.builder().nOut(9).build()) + .layer(DenseLayer.builder().nIn(9).nOut(8).build()) + .layer(BatchNormalization.builder().nOut(8).build()) + .layer(OutputLayer.builder().nIn(8).nOut(7).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -1063,11 +1063,11 @@ public class TestUpdaters extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new BatchNormalization.Builder().nOut(6).build()) - .layer(new ConvolutionLayer.Builder().nIn(6).nOut(5).kernelSize(2,2).build()) - .layer(new BatchNormalization.Builder().nOut(5).build()) - .layer(new ConvolutionLayer.Builder().nIn(5).nOut(4).kernelSize(2,2).build()) - .layer(new BatchNormalization.Builder().nOut(4).build()) + .layer(BatchNormalization.builder().nOut(6).build()) + .layer(ConvolutionLayer.builder().nIn(6).nOut(5).kernelSize(2,2).build()) + .layer(BatchNormalization.builder().nOut(5).build()) + .layer(ConvolutionLayer.builder().nIn(5).nOut(4).kernelSize(2,2).build()) + .layer(BatchNormalization.builder().nOut(4).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java index e52b126f2..c909187fc 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/updater/custom/TestCustomUpdater.java @@ -48,15 +48,15 @@ public class TestCustomUpdater extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration conf1 = NeuralNetConfiguration.builder().seed(12345) .activation(Activation.TANH).updater(new CustomIUpdater(lr)) //Specify custom IUpdater - .list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new OutputLayer.Builder().nIn(10).nOut(10) + .list().layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, OutputLayer.builder().nIn(10).nOut(10) .lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration conf2 = NeuralNetConfiguration.builder().seed(12345) .activation(Activation.TANH).updater(new Sgd(lr)).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder() + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()).layer(1, OutputLayer.builder() .nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java index b5becc819..5dc7bd0a3 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java @@ -52,13 +52,13 @@ public class WeightInitIdentityTest extends BaseDL4JTest { .graphBuilder() .addInputs(inputName) .setOutputs(output) - .layer(conv, new Convolution1DLayer.Builder(7) + .layer(conv, Convolution1DLayer.builder(7) .convolutionMode(ConvolutionMode.Same) .nOut(input.size(1)) .weightInit(new WeightInitIdentity()) .activation(new ActivationIdentity()) .build(), inputName) - .layer(output, new RnnLossLayer.Builder().activation(new ActivationIdentity()).build(), conv) + .layer(output, RnnLossLayer.builder().activation(new ActivationIdentity()).build(), conv) .setInputTypes(InputType.recurrent(5,7,RNNFormat.NCW)) .build()); graph.init(); @@ -81,13 +81,13 @@ public class WeightInitIdentityTest extends BaseDL4JTest { .setInputTypes(InputType.inferInputType(input)) .addInputs(inputName) .setOutputs(output) - .layer(conv, new ConvolutionLayer.Builder(3,5) + .layer(conv, ConvolutionLayer.builder(3,5) .convolutionMode(ConvolutionMode.Same) .nOut(input.size(1)) .weightInit(new WeightInitIdentity()) .activation(new ActivationIdentity()) .build(), inputName) - .layer(output, new CnnLossLayer.Builder().activation(new ActivationIdentity()).build(), conv) + .layer(output, CnnLossLayer.builder().activation(new ActivationIdentity()).build(), conv) .build()); graph.init(); @@ -108,14 +108,14 @@ public class WeightInitIdentityTest extends BaseDL4JTest { .setInputTypes(InputType.inferInputType(input)) .addInputs(inputName) .setOutputs(output) - .layer(conv, new Convolution3D.Builder(3,7,5) + .layer(conv, Convolution3D.builder(3,7,5) .convolutionMode(ConvolutionMode.Same) .dataFormat(Convolution3D.DataFormat.NCDHW) .nOut(input.size(1)) .weightInit(new WeightInitIdentity()) .activation(new ActivationIdentity()) .build(), inputName) - .layer(output, new Cnn3DLossLayer.Builder(Convolution3D.DataFormat.NCDHW).activation(new ActivationIdentity()).build(), conv) + .layer(output, Cnn3DLossLayer.builder().dataFormat(Convolution3D.DataFormat.NCDHW).activation(new ActivationIdentity()).build(), conv) .build()); graph.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java index 91101fccc..bdd4fc343 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/BackTrackLineSearchTest.java @@ -168,7 +168,7 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345L).miniBatch(true) .maxNumLineSearchIterations(maxIterations) - .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(lossFunction) + .layer(org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction(lossFunction) .nIn(4).nOut(3).activation(activationFunction) .weightInit(WeightInit.XAVIER).build()) .build(); @@ -241,9 +241,9 @@ public class BackTrackLineSearchTest extends BaseDL4JTest { private static NeuralNetConfiguration getIrisMultiLayerConfig(Activation activationFunction, OptimizationAlgorithm optimizer) { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().optimizationAlgo(optimizer) .updater(new Adam(0.01)).seed(12345L).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(activationFunction).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(100).nOut(3) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java index 7883c899f..6b0825557 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimize/solver/TestOptimizers.java @@ -144,10 +144,10 @@ public class TestOptimizers extends BaseDL4JTest { NeuralNetConfiguration c = NeuralNetConfiguration.builder().optimizationAlgo(oa) .updater(new AdaGrad(1e-1)).seed(12345L) .list().layer(0, - new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) + DenseLayer.builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER) .activation(Activation.RELU) .build()) - .layer(1, new OutputLayer.Builder(LossFunction.MCXENT).nIn(3).nOut(3) + .layer(1, OutputLayer.builder().lossFunction(LossFunction.MCXENT).nIn(3).nOut(3) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) .build(); @@ -214,7 +214,7 @@ public class TestOptimizers extends BaseDL4JTest { LayerConfiguration conf = NeuralNetConfiguration.builder().maxNumLineSearchIterations(numLineSearchIter) .updater(new Sgd(1e-2)) - .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build().getFlattenedLayerConfigurations().get(0); + .layer(DenseLayer.builder().nIn(1).nOut(1).build()).build().getFlattenedLayerConfigurations().get(0); conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here Random rng = new DefaultRandom(12345L); @@ -277,7 +277,7 @@ public class TestOptimizers extends BaseDL4JTest { new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .maxNumLineSearchIterations(maxNumLineSearchIter).updater(new Sgd(0.1)) - .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); + .layer(DenseLayer.builder().nIn(1).nOut(1).build()).build(); conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here IModel m = new SphereFunctionModel(100, dist, conf.getFlattenedLayerConfigurations().get(0)); @@ -497,7 +497,7 @@ public class TestOptimizers extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .maxNumLineSearchIterations(maxNumLineSearchIter).miniBatch(false) .updater(new AdaGrad(1e-2)) - .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); + .layer(DenseLayer.builder().nIn(1).nOut(1).build()).build(); conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here IModel m = new RastriginFunctionModel(10, conf.getFlattenedLayerConfigurations().get(0)); @@ -765,7 +765,7 @@ public class TestOptimizers extends BaseDL4JTest { .maxNumLineSearchIterations(maxNumLineSearchIter) .updater(new Sgd(1e-1)) .stepFunction(new org.deeplearning4j.nn.conf.stepfunctions.NegativeDefaultStepFunction()) - .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()) + .layer(DenseLayer.builder().nIn(1).nOut(1).build()) .build(); conf.addNetWideVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java index 2e34fcd46..e0b8726d8 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestCheckpointListener.java @@ -52,7 +52,7 @@ public class TestCheckpointListener extends BaseDL4JTest { private static Pair getNetAndData(){ NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) + .layer(OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java index fb500772d..c61a96627 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestFailureListener.java @@ -53,7 +53,7 @@ public class TestFailureListener extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-4)) .list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(0, OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -75,7 +75,7 @@ public class TestFailureListener extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-4)) .list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(0, OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -103,7 +103,7 @@ public class TestFailureListener extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-4)) .list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(0, OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java index f3d4f5dee..5999c29be 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/optimizer/listener/TestListeners.java @@ -71,8 +71,8 @@ public class TestListeners extends BaseDL4JTest { //Pretrain layers should get copies of the listeners, in addition to the NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new AutoEncoder.Builder().nIn(10).nOut(10).build()) - .layer(1, new VariationalAutoencoder.Builder().nIn(10).nOut(10).build()).build(); + .layer(0, AutoEncoder.builder().nIn(10).nOut(10).build()) + .layer(1, VariationalAutoencoder.builder().nIn(10).nOut(10).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); @@ -95,8 +95,8 @@ public class TestListeners extends BaseDL4JTest { ComputationGraphConfiguration gConf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("0", new AutoEncoder.Builder().nIn(10).nOut(10).build(), "in") - .addLayer("1", new VariationalAutoencoder.Builder().nIn(10).nOut(10).build(), "0") + .addLayer("0", AutoEncoder.builder().nIn(10).nOut(10).build(), "in") + .addLayer("1", VariationalAutoencoder.builder().nIn(10).nOut(10).build(), "0") .setOutputs("1").build(); ComputationGraph cg = new ComputationGraph(gConf); cg.init(); @@ -173,7 +173,7 @@ public class TestListeners extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3) + .layer(OutputLayer.builder().nIn(4).nOut(3) .activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -209,7 +209,7 @@ public class TestListeners extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java index 2c214eeff..8e219f56c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/parallelism/RandomTests.java @@ -63,19 +63,19 @@ public class RandomTests extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) .trainingWorkspaceMode(WorkspaceMode.ENABLED).list() - .layer(0, new ConvolutionLayer.Builder(5, 5) + .layer(0, ConvolutionLayer.builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(1).stride(1, 1).nOut(20).activation(Activation.IDENTITY) .build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(2, 2).build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) + .layer(2, ConvolutionLayer.builder(5, 5) //Note that nIn need not be specified in later layers .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .layer(3, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(2, 2).build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(4, DenseLayer.builder().activation(Activation.RELU).nOut(500).build()) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(10).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)) //See note below .build(); @@ -105,9 +105,9 @@ public class RandomTests extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).activation(Activation.TANH) .weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(2, - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, DenseLayer.builder().nIn(10).nOut(10).build()).layer(2, + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java index 6b2d882e3..79d41358e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/perf/listener/TestSystemInfoPrintListener.java @@ -58,7 +58,7 @@ public class TestSystemInfoPrintListener extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/MiscRegressionTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/MiscRegressionTests.java index 316ad2f46..64adf2d57 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/MiscRegressionTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/MiscRegressionTests.java @@ -67,7 +67,7 @@ public class MiscRegressionTests extends BaseDL4JTest { public void testFrozenNewFormat(){ NeuralNetConfiguration configuration = NeuralNetConfiguration.builder() .list() - .layer(0, new FrozenLayer(new DenseLayer.Builder().nIn(10).nOut(10).build())) + .layer(0, FrozenLayer.builder(DenseLayer.builder().nIn(10).nOut(10).build())) .build(); String json = configuration.toJson(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java index a771e414b..14d04ac82 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java @@ -78,7 +78,7 @@ public class RegressionTest050 extends BaseDL4JTest { OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertEquals("softmax", l1.getActivationFn().toString()); - assertTrue(l1.getLossFn() instanceof LossMCXENT); + assertTrue(l1.getLossFunction() instanceof LossMCXENT); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(new WeightInitXavier(), l1.getWeightInit()); @@ -109,19 +109,19 @@ public class RegressionTest050 extends BaseDL4JTest { assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); - assertEquals(new Dropout(0.6), l0.getIDropout()); + assertEquals(new Dropout(0.6), l0.getDropOut()); assertEquals(0.1, TestUtils.getL1(l0), 1e-6); assertEquals(new WeightDecay(0.2, false), TestUtils.getWeightDecayReg(l0)); OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertEquals("identity", l1.getActivationFn().toString()); - assertTrue(l1.getLossFn() instanceof LossMSE); + assertTrue(l1.getLossFunction() instanceof LossMSE); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater()); assertEquals(0.15, ((RmsProp)l1.getIUpdater()).getLearningRate(), 1e-6); - assertEquals(new Dropout(0.6), l1.getIDropout()); + assertEquals(new Dropout(0.6), l1.getDropOut()); assertEquals(0.1, TestUtils.getL1(l1), 1e-6); assertEquals(new WeightDecay(0.2, false), TestUtils.getWeightDecayReg(l1)); @@ -162,7 +162,7 @@ public class RegressionTest050 extends BaseDL4JTest { OutputLayer l2 = (OutputLayer) conf.getConf(2).getLayer(); assertEquals("sigmoid", l2.getActivationFn().toString()); - assertTrue(l2.getLossFn() instanceof LossNegativeLogLikelihood); + assertTrue(l2.getLossFunction() instanceof LossNegativeLogLikelihood); assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals(new WeightInitRelu(), l0.getWeightInit()); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java index 8d6dae94a..f4c46e7ee 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java @@ -80,7 +80,7 @@ public class RegressionTest060 extends BaseDL4JTest { OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertEquals("softmax", l1.getActivationFn().toString()); - assertTrue(l1.getLossFn() instanceof LossMCXENT); + assertTrue(l1.getLossFunction() instanceof LossMCXENT); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(new WeightInitXavier(), l1.getWeightInit()); @@ -111,7 +111,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); - assertEquals(new Dropout(0.6), l0.getIDropout()); + assertEquals(new Dropout(0.6), l0.getDropOut()); assertEquals(0.1, TestUtils.getL1(l0), 1e-6); assertEquals(new WeightDecay(0.2, false), TestUtils.getWeightDecayReg(l0)); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l0.getGradientNormalization()); @@ -119,13 +119,13 @@ public class RegressionTest060 extends BaseDL4JTest { OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertEquals("identity", l1.getActivationFn().toString()); - assertTrue(l1.getLossFn() instanceof LossMSE); + assertTrue(l1.getLossFunction() instanceof LossMSE); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater()); assertEquals(0.15, ((RmsProp)l1.getIUpdater()).getLearningRate(), 1e-6); - assertEquals(new Dropout(0.6), l1.getIDropout()); + assertEquals(new Dropout(0.6), l1.getDropOut()); assertEquals(0.1, TestUtils.getL1(l1), 1e-6); assertEquals(new WeightDecay(0.2,false), TestUtils.getWeightDecayReg(l1)); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization()); @@ -168,7 +168,7 @@ public class RegressionTest060 extends BaseDL4JTest { OutputLayer l2 = (OutputLayer) conf.getConf(2).getLayer(); assertEquals("sigmoid", l2.getActivationFn().toString()); - assertTrue(l2.getLossFn() instanceof LossNegativeLogLikelihood); //TODO + assertTrue(l2.getLossFunction() instanceof LossNegativeLogLikelihood); //TODO assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals(new WeightInitRelu(), l0.getWeightInit()); @@ -211,7 +211,7 @@ public class RegressionTest060 extends BaseDL4JTest { assertEquals(4, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals("softmax", l2.getActivationFn().toString()); - assertTrue(l2.getLossFn() instanceof LossMCXENT); + assertTrue(l2.getLossFunction() instanceof LossMCXENT); } @Test @@ -243,6 +243,6 @@ public class RegressionTest060 extends BaseDL4JTest { assertEquals(4, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals("softmax", l2.getActivationFn().toString()); - assertTrue(l2.getLossFn() instanceof LossMCXENT); + assertTrue(l2.getLossFunction() instanceof LossMCXENT); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java index 8589b7de2..f43c1e9ad 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java @@ -81,7 +81,7 @@ public class RegressionTest071 extends BaseDL4JTest { OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertEquals("softmax", l1.getActivationFn().toString()); - assertTrue(l1.getLossFn() instanceof LossMCXENT); + assertTrue(l1.getLossFunction() instanceof LossMCXENT); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(new WeightInitXavier(), l1.getWeightInit()); @@ -112,7 +112,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l0.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); - assertEquals(new Dropout(0.6), l0.getIDropout()); + assertEquals(new Dropout(0.6), l0.getDropOut()); assertEquals(0.1, TestUtils.getL1(l0), 1e-6); assertEquals(new WeightDecay(0.2,false), TestUtils.getWeightDecayReg(l0)); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l0.getGradientNormalization()); @@ -120,13 +120,13 @@ public class RegressionTest071 extends BaseDL4JTest { OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertTrue(l1.getActivationFn() instanceof ActivationIdentity); - assertTrue(l1.getLossFn() instanceof LossMSE); + assertTrue(l1.getLossFunction() instanceof LossMSE); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l0.getWeightInit()); assertEquals(new RmsProp(0.15, 0.96, RmsProp.DEFAULT_RMSPROP_EPSILON), l1.getIUpdater()); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); - assertEquals(new Dropout(0.6), l1.getIDropout()); + assertEquals(new Dropout(0.6), l1.getDropOut()); assertEquals(0.1, TestUtils.getL1(l1), 1e-6); assertEquals(new WeightDecay(0.2,false), TestUtils.getWeightDecayReg(l1)); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization()); @@ -169,7 +169,7 @@ public class RegressionTest071 extends BaseDL4JTest { OutputLayer l2 = (OutputLayer) conf.getConf(2).getLayer(); assertEquals("sigmoid", l2.getActivationFn().toString()); - assertTrue(l2.getLossFn() instanceof LossNegativeLogLikelihood); //TODO + assertTrue(l2.getLossFunction() instanceof LossNegativeLogLikelihood); //TODO assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals(new WeightInitRelu(), l0.getWeightInit()); @@ -212,7 +212,7 @@ public class RegressionTest071 extends BaseDL4JTest { assertEquals(4, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals("softmax", l2.getActivationFn().toString()); - assertTrue(l2.getLossFn() instanceof LossMCXENT); + assertTrue(l2.getLossFunction() instanceof LossMCXENT); } @Test @@ -243,6 +243,6 @@ public class RegressionTest071 extends BaseDL4JTest { assertEquals(4, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals("softmax", l2.getActivationFn().toString()); - assertTrue(l2.getLossFn() instanceof LossMCXENT); + assertTrue(l2.getLossFunction() instanceof LossMCXENT); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java index 90cb2c126..379dd289a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java @@ -84,7 +84,7 @@ public class RegressionTest080 extends BaseDL4JTest { OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertTrue(l1.getActivationFn() instanceof ActivationSoftmax); - assertTrue(l1.getLossFn() instanceof LossMCXENT); + assertTrue(l1.getLossFunction() instanceof LossMCXENT); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(new WeightInitXavier(), l1.getWeightInit()); @@ -119,7 +119,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertEquals(0.96, r.getRmsDecay(), 1e-6); assertEquals(0.15, r.getLearningRate(), 1e-6); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); - assertEquals(new Dropout(0.6), l0.getIDropout()); + assertEquals(new Dropout(0.6), l0.getDropOut()); assertEquals(0.1, TestUtils.getL1(l0), 1e-6); assertEquals(new WeightDecay(0.2,false), TestUtils.getWeightDecayReg(l0)); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l0.getGradientNormalization()); @@ -127,7 +127,7 @@ public class RegressionTest080 extends BaseDL4JTest { OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer(); assertTrue(l1.getActivationFn() instanceof ActivationIdentity); - assertTrue(l1.getLossFn() instanceof LossMSE); + assertTrue(l1.getLossFunction() instanceof LossMSE); assertEquals(4, l1.getNIn()); assertEquals(5, l1.getNOut()); assertEquals(new WeightInitDistribution(new NormalDistribution(0.1, 1.2)), l1.getWeightInit()); @@ -136,7 +136,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertEquals(0.96, r.getRmsDecay(), 1e-6); assertEquals(0.15, r.getLearningRate(), 1e-6); assertEquals(0.15, ((RmsProp)l0.getIUpdater()).getLearningRate(), 1e-6); - assertEquals(new Dropout(0.6), l1.getIDropout()); + assertEquals(new Dropout(0.6), l1.getDropOut()); assertEquals(0.1, TestUtils.getL1(l1), 1e-6); assertEquals(new WeightDecay(0.2, false), TestUtils.getWeightDecayReg(l1)); assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization()); @@ -182,7 +182,7 @@ public class RegressionTest080 extends BaseDL4JTest { OutputLayer l2 = (OutputLayer) conf.getConf(2).getLayer(); assertTrue(l2.getActivationFn() instanceof ActivationSigmoid); - assertTrue(l2.getLossFn() instanceof LossNegativeLogLikelihood); + assertTrue(l2.getLossFunction() instanceof LossNegativeLogLikelihood); assertEquals(26 * 26 * 3, l2.getNIn()); assertEquals(5, l2.getNOut()); assertEquals(new WeightInitRelu(), l2.getWeightInit()); @@ -227,7 +227,7 @@ public class RegressionTest080 extends BaseDL4JTest { assertEquals(4, l2.getNIn()); assertEquals(5, l2.getNOut()); assertTrue(l2.getActivationFn() instanceof ActivationSoftmax); - assertTrue(l2.getLossFn() instanceof LossMCXENT); + assertTrue(l2.getLossFunction() instanceof LossMCXENT); } @Test @@ -259,6 +259,6 @@ public class RegressionTest080 extends BaseDL4JTest { assertEquals(4, l2.getNIn()); assertEquals(5, l2.getNOut()); assertTrue(l2.getActivationFn() instanceof ActivationSoftmax); - assertTrue(l2.getLossFn() instanceof LossMCXENT); + assertTrue(l2.getLossFunction() instanceof LossMCXENT); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java index 6cdede6bd..65329cb4a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java @@ -330,7 +330,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { assertEquals(new WeightInitXavier(), l9.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l9)); assertEquals(new Adam(0.005), l9.getIUpdater()); - assertEquals(new LossMAE(), l9.getLossFn()); + assertEquals(new LossMAE(), l9.getLossFunction()); INDArray outExp; File f2 = Resources.asFile("regression_testing/100b4/SyntheticCNN_Output_100b4.bin"); @@ -394,7 +394,7 @@ public class RegressionTest100b4 extends BaseDL4JTest { OutputLayer outl = (OutputLayer) net.getLayer("out").getLayerConfiguration(); assertEquals(3, outl.getNOut()); - assertEquals(new LossMCXENT(), outl.getLossFn()); + assertEquals(new LossMCXENT(), outl.getLossFunction()); INDArray outExp; File f2 = Resources.asFile("regression_testing/100b4/SyntheticBidirectionalRNNGraph_Output_100b4.bin"); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java index c0ee3dca2..0a43d9769 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java @@ -310,7 +310,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { assertEquals(new WeightInitXavier(), l9.getWeightInit()); assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l9)); assertEquals(new Adam(0.005), l9.getIUpdater()); - assertEquals(new LossMAE(), l9.getLossFn()); + assertEquals(new LossMAE(), l9.getLossFunction()); INDArray outExp; File f2 = Resources.asFile("regression_testing/100b6/SyntheticCNN_Output_100b6.bin"); @@ -374,7 +374,7 @@ public class RegressionTest100b6 extends BaseDL4JTest { OutputLayer outl = (OutputLayer) net.getLayer("out").getLayerConfiguration(); assertEquals(3, outl.getNOut()); - assertEquals(new LossMCXENT(), outl.getLossFn()); + assertEquals(new LossMCXENT(), outl.getLossFunction()); INDArray outExp; File f2 = Resources.asFile("regression_testing/100b6/SyntheticBidirectionalRNNGraph_Output_100b6.bin"); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java index b20ad6f00..6eb8e1ae2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/customlayer100a/CustomLayer.java @@ -20,8 +20,11 @@ package org.deeplearning4j.regressiontest.customlayer100a; +import java.util.Collection; +import java.util.Map; import lombok.Getter; -import lombok.Setter; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; @@ -33,146 +36,108 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - +@NoArgsConstructor +@SuperBuilder public class CustomLayer extends FeedForwardLayer { + /** + * A custom property used in this custom layer example. See the CustomLayerExampleReadme.md for + * details + * + * @param secondActivationFunction Second activation function for the layer + */ + @Getter private IActivation secondActivationFunction; - private IActivation secondActivationFunction; + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection iterationListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { - public CustomLayer() { - //We need a no-arg constructor so we can deserialize the configuration from JSON or YAML format - // Without this, you will likely get an exception like the following: - //com.fasterxml.jackson.databind.JsonMappingException: No suitable constructor found for type [simple type, class org.deeplearning4j.examples.misc.customlayers.layer.CustomLayer]: can not instantiate from JSON object (missing default constructor or creator, or perhaps need to add/enable type information?) + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(0); + // The instantiate method is how we go from the configuration class (i.e., this class) to the + // implementation class + // (i.e., a CustomLayerImpl instance) + // For the most part, it's the same for each type of layer + runInheritance(); + + CustomLayerImpl myCustomLayer = new CustomLayerImpl(lconf, networkDataType); + myCustomLayer.addTrainingListeners(iterationListeners); // Set the iteration listeners, if any + myCustomLayer.setIndex(layerIndex); // Integer index of the layer + + // Parameter view array: In Deeplearning4j, the network parameters for the entire network (all + // layers) are + // allocated in one big array. The relevant section of this parameter vector is extracted out + // for each layer, + // (i.e., it's a "view" array in that it's a subset of a larger array) + // This is a row vector, with length equal to the number of parameters in the layer + myCustomLayer.setParamsViewArray(layerParamsView); + + // Initialize the layer parameters. For example, + // Note that the entries in paramTable (2 entries here: a weight array of shape [nIn,nOut] and + // biases of shape [1,nOut] + // are in turn a view of the 'layerParamsView' array. + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + myCustomLayer.setParamTable(paramTable); + myCustomLayer.setLayerConfiguration(lconf); + return myCustomLayer; + } + + @Override + public ParamInitializer initializer() { + // This method returns the parameter initializer for this type of layer + // In this case, we can use the DefaultParamInitializer, which is the same one used for + // DenseLayerConfiguration + // For more complex layers, you may need to implement a custom parameter initializer + // See the various parameter initializers here: + // https://github.com/deeplearning4j/deeplearning4j/tree/master/deeplearning4j-core/src/main/java/org/deeplearning4j/nn/params + + return DefaultParamInitializer.getInstance(); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + // Memory report is used to estimate how much memory is required for the layer, for different + // configurations + // If you don't need this functionality for your custom layer, you can return a + // LayerMemoryReport + // with all 0s, or + + // This implementation: based on DenseLayerConfiguration implementation + InputType outputType = getOutputType(-1, inputType); + + val numParams = initializer().numParams(this); + int updaterStateSize = (int) getIUpdater().stateSize(numParams); + + int trainSizeFixed = 0; + int trainSizeVariable = 0; + if (getDropOut() != null) { + // Assume we dup the input for dropout + trainSizeVariable += inputType.arrayElementsPerExample(); } - private CustomLayer(Builder builder) { - super(builder); - this.secondActivationFunction = builder.secondActivationFunction; - } - - public IActivation getSecondActivationFunction() { - //We also need setter/getter methods for our layer configuration fields (if any) for JSON serialization - return secondActivationFunction; - } - - public void setSecondActivationFunction(IActivation secondActivationFunction) { - //We also need setter/getter methods for our layer configuration fields (if any) for JSON serialization - this.secondActivationFunction = secondActivationFunction; - } - - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(0); - //The instantiate method is how we go from the configuration class (i.e., this class) to the implementation class - // (i.e., a CustomLayerImpl instance) - //For the most part, it's the same for each type of layer - - CustomLayerImpl myCustomLayer = new CustomLayerImpl(lconf, networkDataType); - myCustomLayer.addTrainingListeners(iterationListeners); //Set the iteration listeners, if any - myCustomLayer.setIndex(layerIndex); //Integer index of the layer - - //Parameter view array: In Deeplearning4j, the network parameters for the entire network (all layers) are - // allocated in one big array. The relevant section of this parameter vector is extracted out for each layer, - // (i.e., it's a "view" array in that it's a subset of a larger array) - // This is a row vector, with length equal to the number of parameters in the layer - myCustomLayer.setParamsViewArray(layerParamsView); - - //Initialize the layer parameters. For example, - // Note that the entries in paramTable (2 entries here: a weight array of shape [nIn,nOut] and biases of shape [1,nOut] - // are in turn a view of the 'layerParamsView' array. - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - myCustomLayer.setParamTable(paramTable); - myCustomLayer.setLayerConfiguration(lconf); - return myCustomLayer; - } - - @Override - public ParamInitializer initializer() { - //This method returns the parameter initializer for this type of layer - //In this case, we can use the DefaultParamInitializer, which is the same one used for DenseLayerConfiguration - //For more complex layers, you may need to implement a custom parameter initializer - //See the various parameter initializers here: - //https://github.com/deeplearning4j/deeplearning4j/tree/master/deeplearning4j-core/src/main/java/org/deeplearning4j/nn/params - - return DefaultParamInitializer.getInstance(); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - //Memory report is used to estimate how much memory is required for the layer, for different configurations - //If you don't need this functionality for your custom layer, you can return a LayerMemoryReport - // with all 0s, or - - //This implementation: based on DenseLayerConfiguration implementation - InputType outputType = getOutputType(-1, inputType); - - val numParams = initializer().numParams(this); - int updaterStateSize = (int) getIUpdater().stateSize(numParams); - - int trainSizeFixed = 0; - int trainSizeVariable = 0; - if (getIDropout() != null) { - //Assume we dup the input for dropout - trainSizeVariable += inputType.arrayElementsPerExample(); - } - - //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size - // which is modified in-place by activation function backprop - // then we have 'epsilonNext' which is equivalent to input size - trainSizeVariable += outputType.arrayElementsPerExample(); - - return new LayerMemoryReport.Builder(layerName, CustomLayer.class, inputType, outputType) - .standardMemory(numParams, updaterStateSize) - .workingMemory(0, 0, trainSizeFixed, - trainSizeVariable) //No additional memory (beyond activations) for inference - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, - MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayerConfiguration - .build(); - } - - - //Here's an implementation of a builder pattern, to allow us to easily configure the layer - //Note that we are inheriting all of the FeedForwardLayer.Builder options: things like n - public static class Builder extends FeedForwardLayer.Builder { - - @Getter - @Setter - private IActivation secondActivationFunction; - - //This is an example of a custom property in the configuration - - /** - * A custom property used in this custom layer example. See the CustomLayerExampleReadme.md for details - * - * @param secondActivationFunction Second activation function for the layer - */ - public Builder secondActivationFunction(String secondActivationFunction) { - return secondActivationFunction(Activation.fromString(secondActivationFunction)); - } - - /** - * A custom property used in this custom layer example. See the CustomLayerExampleReadme.md for details - * - * @param secondActivationFunction Second activation function for the layer - */ - public Builder secondActivationFunction(Activation secondActivationFunction) { - this.secondActivationFunction = secondActivationFunction.getActivationFunction(); - return this; - } - - @Override - @SuppressWarnings("unchecked") //To stop warnings about unchecked cast. Not required. - public CustomLayer build() { - return new CustomLayer(this); - } - } + // Also, during backprop: we do a preOut call -> gives us activations size equal to the output + // size + // which is modified in-place by activation function backprop + // then we have 'epsilonNext' which is equivalent to input size + trainSizeVariable += outputType.arrayElementsPerExample(); + return new LayerMemoryReport.Builder(name, CustomLayer.class, inputType, outputType) + .standardMemory(numParams, updaterStateSize) + .workingMemory( + 0, + 0, + trainSizeFixed, + trainSizeVariable) // No additional memory (beyond activations) for inference + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, + MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching in DenseLayerConfiguration + .build(); + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java index b4edb0ba8..717aed08f 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/samediff/CompareTrainingImplementations.java @@ -158,8 +158,8 @@ public class CompareTrainingImplementations extends BaseDL4JTest { .weightDecay(wdVal, true).weightDecayBias(wdVal, true) .updater(new Sgd(1.0)) //Exclicitly use SGD(1.0) for comparing PRE-UPDATE GRADIENTS (but with l1/l2/wd component added) .list() - .layer(new DenseLayer.Builder().nIn(4).nOut(10).activation(Activation.TANH).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(DenseLayer.builder().nIn(4).nOut(10).activation(Activation.TANH).build()) + .layer(OutputLayer.builder().nIn(10).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(mlc); @@ -222,8 +222,8 @@ public class CompareTrainingImplementations extends BaseDL4JTest { .weightDecay(wdVal, true).weightDecayBias(wdVal, true) .updater(updater.clone()) .list() - .layer(new DenseLayer.Builder().nIn(4).nOut(10).activation(Activation.TANH).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(DenseLayer.builder().nIn(4).nOut(10).activation(Activation.TANH).build()) + .layer(OutputLayer.builder().nIn(10).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); net = new MultiLayerNetwork(mlc); net.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java index e0eeef88d..491360a7d 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/CrashReportingUtilTest.java @@ -84,18 +84,18 @@ public class CrashReportingUtilTest extends BaseDL4JTest { .dist(new NormalDistribution(0, 1)) .list().layer(0, - new ConvolutionLayer.Builder() + ConvolutionLayer.builder() .kernelSize(kernel, kernel) .stride(stride, stride) .padding(padding, padding) .nIn(inputDepth) .nOut(3).build()) - .layer(1, new SubsamplingLayer.Builder(poolingType) + .layer(1, SubsamplingLayer.builder(poolingType) .kernelSize(kernel, kernel) .stride(stride, stride) .padding(padding, padding) .build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(2, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nOut(10).build()) .inputType(InputType.convolutionalFlat(height, width, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java index e941c75ee..fb8ba0fb1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java @@ -249,9 +249,9 @@ public class ModelGuesserTest extends BaseDL4JTest { int nOut = 6; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01).l2(0.01) - .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() + .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(20).build()) + .layer(1, DenseLayer.builder().nIn(20).nOut(30).build()).layer(2, OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java index 495b403d5..8c476d80e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelSerializerTest.java @@ -66,8 +66,8 @@ public class ModelSerializerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() + .layer(0, DenseLayer.builder().nIn(nIn).nOut(20).build()) + .layer(1, DenseLayer.builder().nIn(20).nOut(30).build()).layer(2, OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) .build(); @@ -92,8 +92,8 @@ public class ModelSerializerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() + .layer(0, DenseLayer.builder().nIn(nIn).nOut(20).build()) + .layer(1, DenseLayer.builder().nIn(20).nOut(30).build()).layer(2, OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) .build(); @@ -135,8 +135,8 @@ public class ModelSerializerTest extends BaseDL4JTest { ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .graphBuilder().addInputs("in") - .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) + .addLayer("dense", DenseLayer.builder().nIn(4).nOut(2).build(), "in").addLayer("out", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) .activation(Activation.SOFTMAX).build(), "dense") .setOutputs("out").build(); @@ -160,8 +160,8 @@ public class ModelSerializerTest extends BaseDL4JTest { ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .graphBuilder().addInputs("in") - .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) + .addLayer("dense", DenseLayer.builder().nIn(4).nOut(2).build(), "in").addLayer("out", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) .activation(Activation.SOFTMAX).build(), "dense") .setOutputs("out").build(); @@ -191,8 +191,8 @@ public class ModelSerializerTest extends BaseDL4JTest { ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.1)) .graphBuilder().addInputs("in") - .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) + .addLayer("dense", DenseLayer.builder().nIn(4).nOut(2).build(), "in").addLayer("out", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) .activation(Activation.SOFTMAX).build(), "dense") .setOutputs("out").build(); @@ -254,8 +254,8 @@ public class ModelSerializerTest extends BaseDL4JTest { public void testInvalidLoading1() throws Exception { ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .graphBuilder().addInputs("in") - .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in") - .addLayer("out",new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("dense", DenseLayer.builder().nIn(4).nOut(2).build(), "in") + .addLayer("out",OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(2).nOut(3).build(), "dense") .setOutputs("out").build(); @@ -283,8 +283,8 @@ public class ModelSerializerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .l2(0.01).updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).build()) - .layer(1, new DenseLayer.Builder().nIn(20).nOut(30).build()).layer(2, new OutputLayer.Builder() + .layer(0, DenseLayer.builder().nIn(nIn).nOut(20).build()) + .layer(1, DenseLayer.builder().nIn(20).nOut(30).build()).layer(2, OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) .build(); @@ -311,7 +311,7 @@ public class ModelSerializerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .list() - .layer(new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build()) + .layer(OutputLayer.builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -359,7 +359,7 @@ public class ModelSerializerTest extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .graphBuilder() .addInputs("in") - .layer("0", new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build(), "in") + .layer("0", OutputLayer.builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build(), "in") .setOutputs("0") .build(); @@ -408,7 +408,7 @@ public class ModelSerializerTest extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .graphBuilder() .addInputs("in") - .layer("0", new OutputLayer.Builder().nIn(nIn).nOut(nOut).build(), "in") + .layer("0", OutputLayer.builder().nIn(nIn).nOut(nOut).build(), "in") .setOutputs("0") .validateOutputLayerConfig(false) .build(); @@ -434,7 +434,7 @@ public class ModelSerializerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .list() - .layer(0, new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build()) + .layer(0, OutputLayer.builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -460,7 +460,7 @@ public class ModelSerializerTest extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01) .graphBuilder() .addInputs("in") - .layer("0", new OutputLayer.Builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build(), "in") + .layer("0", OutputLayer.builder().nIn(nIn).nOut(nOut).activation(Activation.SOFTMAX).build(), "in") .setOutputs("0") .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelValidatorTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelValidatorTests.java index eef3472d2..7c61a7afd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelValidatorTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelValidatorTests.java @@ -299,9 +299,9 @@ public class ModelValidatorTests extends BaseDL4JTest { .seed(12345) .updater(new Adam(0.01)) .list() - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(new OutputLayer.Builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(OutputLayer.builder().nIn(10).nOut(10).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java index 810dbce85..11d2c1890 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasLayer.java @@ -51,7 +51,8 @@ public class KerasLayer { public enum DimOrder {NONE, THEANO, TENSORFLOW} protected String className; // Keras layer class name - protected String layerName; // Keras layer name + @Getter + protected String name; // Keras layer name protected int[] inputShape; // Keras layer input shape protected DimOrder dimOrder; // Keras layer backend dimension order protected List inboundLayerNames; // List of inbound layers @@ -75,7 +76,7 @@ public class KerasLayer { */ protected KerasLayer(Integer kerasVersion) throws UnsupportedKerasConfigurationException { this.className = null; - this.layerName = null; + this.name = null; this.inputShape = null; this.dimOrder = DimOrder.NONE; this.inboundLayerNames = new ArrayList<>(); @@ -94,7 +95,7 @@ public class KerasLayer { */ protected KerasLayer() throws UnsupportedKerasConfigurationException { this.className = null; - this.layerName = null; + this.name = null; this.inputShape = null; this.dimOrder = DimOrder.NONE; this.inboundLayerNames = new ArrayList<>(); @@ -133,8 +134,8 @@ public class KerasLayer { this.className = KerasLayerUtils.getClassNameFromConfig(layerConfig, conf); if (this.className == null) throw new InvalidKerasConfigurationException("Keras layer class name is missing"); - this.layerName = KerasLayerUtils.getLayerNameFromConfig(layerConfig, conf); - if (this.layerName == null) + this.name = KerasLayerUtils.getLayerNameFromConfig(layerConfig, conf); + if (this.name == null) throw new InvalidKerasConfigurationException("Keras layer class name is missing"); this.inputShape = KerasLayerUtils.getInputShapeFromConfig(layerConfig, conf); this.dimOrder = KerasLayerUtils.getDimOrderFromConfig(layerConfig, conf); @@ -212,7 +213,7 @@ public class KerasLayer { * @return layer name */ public String getLayerName() { - return this.layerName; + return this.name; } /** @@ -301,8 +302,8 @@ public class KerasLayer { */ public void copyWeightsToLayer(org.deeplearning4j.nn.api.Layer layer) throws InvalidKerasConfigurationException { if (this.getNumParams() > 0) { - String dl4jLayerName = layer.getLayerConfiguration().getLayerName(); - String kerasLayerName = this.getLayerName(); + String dl4jLayerName = layer.getLayerConfiguration().getName(); + String kerasLayerName = this.name; String msg = "Error when attempting to copy weights from Keras layer " + kerasLayerName + " to DL4J layer " + dl4jLayerName; @@ -346,7 +347,7 @@ public class KerasLayer { } catch (Exception e) { log.error(e.getMessage()); throw new InvalidKerasConfigurationException(e.getMessage() - + "\nTried to set weights for layer with name " + this.getLayerName() + + "\nTried to set weights for layer with name " + this.name + ", of " + layer.getLayerConfiguration().getClass() + ".\n" + "Failed to set weights for parameter " + paramName + "\n" + "Expected shape for this parameter: " + layer.getParam(paramName).shapeInfoToString() diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java index 4ce518eac..6f1bf14d2 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java @@ -221,7 +221,7 @@ public class KerasModel { KerasLayer layer = KerasLayerUtils.getKerasLayerFromConfig( layerConfigMap, this.enforceTrainingConfig, kerasLayerConf, customLayers, lambdaLayers, layers); layersOrdered.add(layer); - layers.put(layer.getLayerName(), layer); + layers.put(layer.getName(), layer); if (layer instanceof KerasLSTM) this.useTruncatedBPTT = this.useTruncatedBPTT || ((KerasLSTM) layer).getUnroll(); if (layer instanceof KerasSimpleRnn) @@ -235,9 +235,9 @@ public class KerasModel { //node inputs by name for looking up which nodes to do replacements for (useful since indices of nodes can change) Map> nodesOutputToForLambdas = new HashMap<>(); for(int i = 0; i < layers.size(); i++) { - names.add(layersOrdered.get(i).getLayerName()); + names.add(layersOrdered.get(i).getName()); if(layersOrdered.get(i) instanceof KerasLambda) { - lambdaNames.add(layersOrdered.get(i).getLayerName()); + lambdaNames.add(layersOrdered.get(i).getName()); } } @@ -255,23 +255,23 @@ public class KerasModel { nodesOutputToForLambdas.put(input,new ArrayList()); } - nodesOutputToForLambdas.get(input).add(kerasLayer.getLayerName()); + nodesOutputToForLambdas.get(input).add(kerasLayer.getName()); } //potential loop found int indexOfInput = names.indexOf(input); if(indexOfInput > i) { KerasLambda originalLambda = (KerasLambda) kerasLayer; Map configCopy = new HashMap(kerasLayer.originalLayerConfig); - String newName = kerasLayer.getLayerName() + "-" + input; - if(!replacementNamesForLambda.containsKey(originalLambda.layerName)) { - replacementNamesForLambda.put(originalLambda.layerName,new ArrayList()); + String newName = kerasLayer.name + "-" + input; + if(!replacementNamesForLambda.containsKey(originalLambda.name)) { + replacementNamesForLambda.put(originalLambda.name,new ArrayList()); } configCopy.put(kerasLayer.conf.getLAYER_FIELD_NAME(),newName); - replacementNamesForLambda.get(originalLambda.layerName).add(newName); + replacementNamesForLambda.get(originalLambda.name).add(newName); SameDiffLambdaLayer sameDiffLambdaLayer = (SameDiffLambdaLayer) originalLambda.getSameDiffLayer().clone(); - sameDiffLambdaLayer.setLayerName(newName); + sameDiffLambdaLayer.setName(newName); KerasLambda kerasLambda = new KerasLambda(configCopy,sameDiffLambdaLayer); - kerasLambda.layerName = newName; + kerasLambda.name = newName; kerasLambda.setInboundLayerNames(new ArrayList<>(Collections.singletonList(input))); layers.put(newName,kerasLambda); int indexOfNewLayer = names.indexOf(input) + 1; @@ -344,7 +344,7 @@ public class KerasModel { layers.clear(); for(KerasLayer kerasLayer : layersOrdered) { - layers.put(kerasLayer.getLayerName(),kerasLayer); + layers.put(kerasLayer.name,kerasLayer); } return new Pair<>(layers, layersOrdered); @@ -419,8 +419,8 @@ public class KerasModel { /* Add loss layers to output layer list and layer graph. */ for (KerasLayer lossLayer : lossLayers) { this.layersOrdered.add(lossLayer); - this.layers.put(lossLayer.getLayerName(), lossLayer); - this.outputLayerNames.add(lossLayer.getLayerName()); + this.layers.put(lossLayer.getName(), lossLayer); + this.outputLayerNames.add(lossLayer.getName()); } } @@ -468,7 +468,7 @@ public class KerasModel { inputTypes.add(outputTypes.get(inboundLayerName)); outputType = layer.getOutputType(inputTypes.toArray(new InputType[1])); } - outputTypes.put(layer.getLayerName(), outputType); + outputTypes.put(layer.getName(), outputType); kerasLayerIdx++; } @@ -500,7 +500,7 @@ public class KerasModel { outputs.put(input,new ArrayList()); } - outputs.get(input).add(layer.getLayerName()); + outputs.get(input).add(layer.getName()); } } @@ -571,22 +571,22 @@ public class KerasModel { if (layer.isLayer()) { if (preprocessor != null) - preprocessors.put(layer.getLayerName(), preprocessor); - graphBuilder.addLayer(layer.getLayerName(), layer.getLayer(), inboundLayerNamesArray); + preprocessors.put(layer.getName(), preprocessor); + graphBuilder.addLayer(layer.getName(), layer.getLayer(), inboundLayerNamesArray); } else if (layer.isVertex()) { // Ignore "preprocessor" layers for now if (preprocessor != null) - preprocessors.put(layer.getLayerName(), preprocessor); - graphBuilder.addVertex(layer.getLayerName(), layer.getVertex(), inboundLayerNamesArray); + preprocessors.put(layer.getName(), preprocessor); + graphBuilder.addVertex(layer.getName(), layer.getVertex(), inboundLayerNamesArray); } else if (layer.isInputPreProcessor()) { if (preprocessor == null) - throw new UnsupportedKerasConfigurationException("ILayer " + layer.getLayerName() + throw new UnsupportedKerasConfigurationException("ILayer " + layer.getName() + " could not be mapped to ILayer, Vertex, or InputPreProcessor"); - graphBuilder.addVertex(layer.getLayerName(), new PreprocessorVertex(preprocessor), + graphBuilder.addVertex(layer.getName(), new PreprocessorVertex(preprocessor), inboundLayerNamesArray); } if(layer instanceof KerasInput) { - initialInputTypes.add(this.outputTypes.get(layer.layerName)); + initialInputTypes.add(this.outputTypes.get(layer.name)); } } graphBuilder.setInputPreProcessors(preprocessors); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java index 2a99d0c34..af22b0a1b 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java @@ -119,18 +119,18 @@ public class KerasSequentialModel extends KerasModel { Preconditions.checkState(ArrayUtil.prod(firstLayerInputShape) > 0,"Input shape must not be zero!"); inputLayer = new KerasInput("input1", firstLayerInputShape); inputLayer.setDimOrder(this.layersOrdered.get(0).getDimOrder()); - this.layers.put(inputLayer.getLayerName(), inputLayer); + this.layers.put(inputLayer.getName(), inputLayer); this.layersOrdered.add(0, inputLayer); } - this.inputLayerNames = new ArrayList<>(Collections.singletonList(inputLayer.getLayerName())); + this.inputLayerNames = new ArrayList<>(Collections.singletonList(inputLayer.getName())); this.outputLayerNames = new ArrayList<>( - Collections.singletonList(this.layersOrdered.get(this.layersOrdered.size() - 1).getLayerName())); + Collections.singletonList(this.layersOrdered.get(this.layersOrdered.size() - 1).getName())); /* Update each layer's inbound layer list to include (only) previous layer. */ KerasLayer prevLayer = null; for (KerasLayer layer : this.layersOrdered) { if (prevLayer != null) - layer.setInboundLayerNames(Collections.singletonList(prevLayer.getLayerName())); + layer.setInboundLayerNames(Collections.singletonList(prevLayer.getName())); prevLayer = layer; } @@ -192,7 +192,7 @@ public class KerasSequentialModel extends KerasModel { if (nbInbound != 1) throw new InvalidKerasConfigurationException( "Layers in NeuralNetConfiguration must have exactly one inbound layer (found " - + nbInbound + " for layer " + layer.getLayerName() + ")"); + + nbInbound + " for layer " + layer.getName() + ")"); if (prevLayer != null) { InputType[] inputTypes = new InputType[1]; InputPreProcessor preprocessor; @@ -202,7 +202,7 @@ public class KerasSequentialModel extends KerasModel { InputType outputType = preprocessor.getOutputType(inputTypes[0]); layer.getLayer().setNIn(outputType,modelBuilder.isOverrideNinUponBuild()); } else { - inputTypes[0] = this.outputTypes.get(prevLayer.getLayerName()); + inputTypes[0] = this.outputTypes.get(prevLayer.getName()); preprocessor = layer.getInputPreprocessor(inputTypes); if(preprocessor != null) { InputType outputType = preprocessor.getOutputType(inputTypes[0]); @@ -221,7 +221,7 @@ public class KerasSequentialModel extends KerasModel { modelBuilder.layer(layerIndex++, layer.getLayer()); } else if (layer.getVertex() != null) throw new InvalidKerasConfigurationException("Cannot add vertex to NeuralNetConfiguration (class name " - + layer.getClassName() + ", layer name " + layer.getLayerName() + ")"); + + layer.getClassName() + ", layer name " + layer.getName() + ")"); prevLayer = layer; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java index 64c64ad75..af063e7b8 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java @@ -102,7 +102,7 @@ public class KerasInput extends KerasLayer { public KerasInput(String layerName, int[] inputShape, boolean enforceTrainingConfig) throws UnsupportedKerasConfigurationException, InvalidKerasConfigurationException { this.className = conf.getLAYER_CLASS_NAME_INPUT(); - this.layerName = layerName; + this.name = layerName; this.inputShape = inputShape; this.inboundLayerNames = new ArrayList<>(); this.layer = null; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java index addc3d833..e82a677d7 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasLoss.java @@ -75,7 +75,7 @@ public class KerasLoss extends KerasLayer { public KerasLoss(String layerName, String inboundLayerName, String kerasLoss, boolean enforceTrainingConfig) throws UnsupportedKerasConfigurationException, InvalidKerasConfigurationException { this.className = KERAS_CLASS_NAME_LOSS; - this.layerName = layerName; + this.name = layerName; this.inputShape = null; this.dimOrder = DimOrder.NONE; this.inboundLayerNames = new ArrayList<>(); @@ -97,13 +97,13 @@ public class KerasLoss extends KerasLayer { */ public FeedForwardLayer getLossLayer(InputType type) throws UnsupportedKerasConfigurationException { if (type instanceof InputType.InputTypeFeedForward) { - this.layer = new LossLayer.Builder(loss).name(this.layerName).activation(Activation.IDENTITY).build(); + this.layer = LossLayer.builder().lossFunction(loss).name(this.name).activation(Activation.IDENTITY).build(); } else if (type instanceof InputType.InputTypeRecurrent) { - this.layer = new RnnLossLayer.Builder(loss).name(this.layerName).activation(Activation.IDENTITY).build(); + this.layer = RnnLossLayer.builder().lossFunction(loss).name(this.name).activation(Activation.IDENTITY).build(); } else if (type instanceof InputType.InputTypeConvolutional) { - this.layer = new CnnLossLayer.Builder(loss).name(this.layerName).activation(Activation.IDENTITY).build(); + this.layer = CnnLossLayer.builder().lossFunction(loss).name(this.name).activation(Activation.IDENTITY).build(); } else { throw new UnsupportedKerasConfigurationException("Unsupported output layer type" + "got : " + type.toString()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasELU.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasELU.java index af40cf279..70831a279 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasELU.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasELU.java @@ -65,7 +65,7 @@ public class KerasELU extends KerasLayer { alpha = (double) innerConfig.get(layerFieldLeakyReluAlpha); } IActivation leakyReLU = new ActivationELU(alpha); - this.layer = new ActivationLayer.Builder().name(this.layerName).activation(leakyReLU).build(); + this.layer = ActivationLayer.builder().name(this.name).activation(leakyReLU).build(); } /** diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java index 897189dbb..de128ba20 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasLeakyReLU.java @@ -69,7 +69,7 @@ public class KerasLeakyReLU extends KerasLayer { alpha = (double) innerConfig.get(layerFieldLeakyReluAlpha); } IActivation leakyReLU = new ActivationLReLU(alpha); - this.layer = new ActivationLayer.Builder().name(this.layerName).activation(leakyReLU).build(); + this.layer = ActivationLayer.builder().name(this.name).activation(leakyReLU).build(); } /** diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasPReLU.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasPReLU.java index 5a4bb1c55..8f101574d 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasPReLU.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasPReLU.java @@ -85,8 +85,8 @@ public class KerasPReLU extends KerasLayer { enforceTrainingConfig, conf, kerasMajorVersion); long[] axes = getSharedAxes(layerConfig); - PReLULayer.Builder builder = new PReLULayer.Builder().sharedAxes(axes) - .weightInit(init).name(layerName); + PReLULayer.PReLULayerBuilder builder = PReLULayer.builder().sharedAxes(axes) + .weightInit(init).name(name); if (weightConstraint != null){ builder.constrainWeights(weightConstraint); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasReLU.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasReLU.java index 255d2315d..90313d0cc 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasReLU.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasReLU.java @@ -68,7 +68,7 @@ public class KerasReLU extends KerasLayer { threshold = ((Number)innerConfig.get("threshold")).doubleValue(); } - this.layer = new ActivationLayer.Builder().name(this.layerName) + this.layer = ActivationLayer.builder().name(this.name) .activation(new ActivationReLU(maxValue, threshold, negativeSlope)).build(); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasSoftmax.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasSoftmax.java index 5a8931ff5..516caaa70 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasSoftmax.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasSoftmax.java @@ -55,7 +55,7 @@ public class KerasSoftmax extends KerasLayer { throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); - this.layer = new ActivationLayer.Builder().name(this.layerName).activation(new ActivationSoftmax()).build(); + this.layer = ActivationLayer.builder().name(this.name).activation(new ActivationSoftmax()).build(); } /** diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasThresholdedReLU.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasThresholdedReLU.java index dad137e01..279249163 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasThresholdedReLU.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasThresholdedReLU.java @@ -69,7 +69,7 @@ public class KerasThresholdedReLU extends KerasLayer { theta = (double) innerConfig.get(layerFieldThresholdTheta); } IActivation thresholdedReLU = new ActivationThresholdedReLU(theta); - this.layer = new ActivationLayer.Builder().name(this.layerName).activation(thresholdedReLU).build(); + this.layer = ActivationLayer.builder().name(this.name).activation(thresholdedReLU).build(); } /** diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java index 916bc8cef..21c580494 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java @@ -24,6 +24,7 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils; @@ -83,7 +84,7 @@ public class KerasAtrousConvolution1D extends KerasConvolution { IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(), enforceTrainingConfig, conf, kerasMajorVersion); - Convolution1DLayer.Builder builder = new Convolution1DLayer.Builder().name(this.layerName) + ConvolutionLayer.ConvolutionLayerBuilder builder = Convolution1DLayer.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java index fb61997d0..419d74490 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolutional; +import lombok.val; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -84,7 +85,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution { IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(), enforceTrainingConfig, conf, kerasMajorVersion); - ConvolutionLayer.Builder builder = new ConvolutionLayer.Builder().name(this.layerName) + val builder = ConvolutionLayer.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) @@ -92,7 +93,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution { .l1(this.weightL1Regularization).l2(this.weightL2Regularization) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion)) - .dataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW) + .convFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW) .hasBias(hasBias) .stride(getStrideFromConfig(layerConfig, 2, conf)); int[] padding = getPaddingFromBorderModeConfig(layerConfig, 2, conf, kerasMajorVersion); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java index c27ef47eb..545a5e474 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java @@ -22,6 +22,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolutional; import lombok.Data; import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ArrayUtils; import org.deeplearning4j.nn.modelimport.keras.KerasLayer; @@ -54,6 +55,10 @@ abstract public class KerasConvolution extends KerasLayer { super(kerasVersion); } + public KerasConvolution() throws UnsupportedKerasConfigurationException { + super(); + } + /** * Constructor from parsed Keras layer configuration dictionary. * diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java index bc807accf..d99db5999 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java @@ -23,7 +23,6 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolutional; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.ArrayUtils; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -34,11 +33,9 @@ import org.deeplearning4j.nn.conf.layers.InputTypeUtil; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils; -import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.nn.weights.IWeightInit; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Nd4j; import java.util.HashMap; import java.util.Map; @@ -96,7 +93,7 @@ public class KerasConvolution1D extends KerasConvolution { IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(), enforceTrainingConfig, conf, kerasMajorVersion); - Convolution1DLayer.Builder builder = new Convolution1DLayer.Builder().name(this.layerName) + Convolution1DLayer.Convolution1DLayerBuilder builder = Convolution1DLayer.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) @@ -129,7 +126,7 @@ public class KerasConvolution1D extends KerasConvolution { this.layer = builder.build(); //set this in order to infer the dimensional format Convolution1DLayer convolution1DLayer = (Convolution1DLayer) this.layer; - convolution1DLayer.setCnn2dDataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW); + convolution1DLayer.setDataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW); convolution1DLayer.setDefaultValueOverriden(true); } @@ -176,7 +173,7 @@ public class KerasConvolution1D extends KerasConvolution { if (inputType.length > 1) throw new InvalidKerasConfigurationException( "Keras Conv1D layer accepts only one input (received " + inputType.length + ")"); - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], RNNFormat.NCW,layerName); + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], RNNFormat.NCW, name); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java index c6c513c29..7eb2c62a0 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolutional; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.extern.slf4j.Slf4j; +import lombok.val; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -94,11 +95,11 @@ public class KerasConvolution2D extends KerasConvolution { LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion); - ConvolutionLayer.Builder builder = new ConvolutionLayer.Builder().name(this.layerName) + final var builder = ConvolutionLayer.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) - .dataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW) + .convFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW) .l1(this.weightL1Regularization).l2(this.weightL2Regularization) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion)) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution3D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution3D.java index 2ffa05f4e..5e1a9eec9 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution3D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution3D.java @@ -93,7 +93,7 @@ public class KerasConvolution3D extends KerasConvolution { LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion); - Convolution3D.Builder builder = new Convolution3D.Builder().name(this.layerName) + Convolution3D.Convolution3DBuilder builder = Convolution3D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping1D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping1D.java index 7c073348d..bc3f07e3e 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping1D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping1D.java @@ -63,8 +63,8 @@ public class KerasCropping1D extends KerasLayer { super(layerConfig, enforceTrainingConfig); String croppingField = conf.getLAYER_FIELD_CROPPING(); int[] cropping = getPaddingFromConfig(layerConfig, conf, croppingField, 1); - Cropping1D.Builder builder = new Cropping1D.Builder(cropping) - .name(this.layerName).dropOut(this.dropout); + Cropping1D.Cropping1DBuilder builder = Cropping1D.builder(cropping[0]) + .name(this.name).dropOut(this.dropout); this.layer = builder.build(); this.vertex = null; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping2D.java index 66d49d37a..c153c4f3a 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping2D.java @@ -66,9 +66,9 @@ public class KerasCropping2D extends KerasLayer { super(layerConfig, enforceTrainingConfig); String croppingField = conf.getLAYER_FIELD_CROPPING(); int[] cropping = getPaddingFromConfig(layerConfig, conf, croppingField, 2); - Cropping2D.Builder builder = new Cropping2D.Builder(cropping) + Cropping2D.Cropping2DBuilder builder = Cropping2D.builder(cropping) .dataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW) - .name(this.layerName).dropOut(this.dropout); + .name(this.name).dropOut(this.dropout); this.layer = builder.build(); this.vertex = null; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping3D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping3D.java index 16d31b2b1..877989a42 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping3D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasCropping3D.java @@ -63,8 +63,8 @@ public class KerasCropping3D extends KerasLayer { super(layerConfig, enforceTrainingConfig); String croppingField = conf.getLAYER_FIELD_CROPPING(); int[] cropping = getPaddingFromConfig(layerConfig, conf, croppingField, 3); - Cropping3D.Builder builder = new Cropping3D.Builder(cropping) - .name(this.layerName).dropOut(this.dropout); + Cropping3D.Cropping3DBuilder builder = Cropping3D.builder(cropping) + .name(this.name).dropOut(this.dropout); this.layer = builder.build(); this.vertex = null; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDeconvolution2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDeconvolution2D.java index cc7508ab1..2016d00ab 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDeconvolution2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDeconvolution2D.java @@ -91,11 +91,11 @@ public class KerasDeconvolution2D extends KerasConvolution { LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion); - Deconvolution2D.Builder builder = new Deconvolution2D.Builder().name(this.layerName) + final var builder = Deconvolution2D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) - .dataFormat(KerasConvolutionUtils.getDataFormatFromConfig(layerConfig,conf)) + .convFormat(KerasConvolutionUtils.getDataFormatFromConfig(layerConfig,conf)) .l1(this.weightL1Regularization).l2(this.weightL2Regularization) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion)) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDepthwiseConvolution2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDepthwiseConvolution2D.java index 264200686..553076d25 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDepthwiseConvolution2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDepthwiseConvolution2D.java @@ -143,7 +143,7 @@ public class KerasDepthwiseConvolution2D extends KerasConvolution { layerConfig, conf.getLAYER_FIELD_DEPTH_WISE_CONSTRAINT(), conf, kerasMajorVersion); - DepthwiseConvolution2D.Builder builder = new DepthwiseConvolution2D.Builder().name(this.layerName) + DepthwiseConvolution2D.DepthwiseConvolution2DBuilder builder = DepthwiseConvolution2D.builder().name(this.name) .dropOut(this.dropout) .nIn(nIn) .nOut(nIn * depthMultiplier) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSeparableConvolution2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSeparableConvolution2D.java index a72524529..21fe982a9 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSeparableConvolution2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSeparableConvolution2D.java @@ -116,7 +116,7 @@ public class KerasSeparableConvolution2D extends KerasConvolution { LayerConstraint pointWiseWeightConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_POINT_WISE_CONSTRAINT(), conf, kerasMajorVersion); - SeparableConvolution2D.Builder builder = new SeparableConvolution2D.Builder().name(this.layerName) + SeparableConvolution2D.SeparableConvolution2DBuilder builder = SeparableConvolution2D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(depthWiseInit) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSpaceToDepth.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSpaceToDepth.java index fa02eb829..81e5d0a77 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSpaceToDepth.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSpaceToDepth.java @@ -56,11 +56,11 @@ public class KerasSpaceToDepth extends KerasLayer { // TODO: we hard-code block size here to import YOLO9000. This size is not available as property // in the hdf5 file outside of the serialized lambda function (that we can't really well deserialize). - SpaceToDepthLayer.Builder builder = new SpaceToDepthLayer.Builder() - .blocks(2) + var builder = SpaceToDepthLayer.builder() + .blockSize(2) //the default data format is tensorflow/NWHC for keras import - .dataFormat(SpaceToDepthLayer.DataFormat.NHWC) - .name(layerName); + .dataFormat(SpaceToDepthLayer.DataFormat.NHWC.toFormat()) + .name(name); this.layer = builder.build(); this.vertex = null; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java index db74b3918..24a252948 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling1D.java @@ -62,8 +62,8 @@ public class KerasUpsampling1D extends KerasLayer { int[] size = KerasConvolutionUtils.getUpsamplingSizeFromConfig(layerConfig, 1, conf); - Upsampling1D.Builder builder = new Upsampling1D.Builder() - .name(this.layerName) + Upsampling1D.Upsampling1DBuilder builder = Upsampling1D.builder() + .name(this.name) .dropOut(this.dropout) .size(size[0]); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling2D.java index c9099e17b..46ac1d5fe 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling2D.java @@ -61,8 +61,8 @@ public class KerasUpsampling2D extends KerasLayer { super(layerConfig, enforceTrainingConfig); int[] size = KerasConvolutionUtils.getUpsamplingSizeFromConfig(layerConfig, 2, conf); - Upsampling2D.Builder builder = new Upsampling2D.Builder() - .name(this.layerName) + Upsampling2D.Upsampling2DBuilder builder = Upsampling2D.builder() + .name(this.name) .dropOut(this.dropout) .size(size); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java index 40915a66f..042207a8c 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java @@ -63,8 +63,8 @@ public class KerasUpsampling3D extends KerasLayer { int[] size = KerasConvolutionUtils.getUpsamplingSizeFromConfig(layerConfig, 3, conf); // TODO: make sure to allow different sizes. - Upsampling3D.Builder builder = new Upsampling3D.Builder() - .name(this.layerName) + var builder = Upsampling3D.builder() + .name(this.name) .dropOut(this.dropout) .size(size[0]); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java index c6fc5bfdc..ceb424414 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding1D.java @@ -68,9 +68,9 @@ public class KerasZeroPadding1D extends KerasLayer { super(layerConfig, enforceTrainingConfig); String paddingField = conf.getLAYER_FIELD_ZERO_PADDING(); - ZeroPadding1DLayer.Builder builder = new ZeroPadding1DLayer.Builder( - getPaddingFromConfig(layerConfig, conf, paddingField, 1)) - .name(this.layerName).dropOut(this.dropout); + ZeroPadding1DLayer.ZeroPadding1DLayerBuilder builder = ZeroPadding1DLayer.builder( + getPaddingFromConfig(layerConfig, conf, paddingField, 1)[0]) + .name(this.name).dropOut(this.dropout); this.layer = builder.build(); this.vertex = null; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java index 3d01e1a77..796daaee1 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding2D.java @@ -69,10 +69,10 @@ public class KerasZeroPadding2D extends KerasLayer { throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); String paddingField = conf.getLAYER_FIELD_ZERO_PADDING(); - ZeroPaddingLayer.Builder builder = new ZeroPaddingLayer.Builder( + ZeroPaddingLayer.ZeroPaddingLayerBuilder builder = ZeroPaddingLayer.builder( getPaddingFromConfig(layerConfig, conf, paddingField, 2)) .dataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW) - .name(this.layerName).dropOut(this.dropout); + .name(this.name).dropOut(this.dropout); this.layer = builder.build(); this.vertex = null; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java index b1bfe103f..f0d0b2081 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java @@ -69,8 +69,8 @@ public class KerasZeroPadding3D extends KerasLayer { super(layerConfig, enforceTrainingConfig); String paddingField = conf.getLAYER_FIELD_ZERO_PADDING(); int[] padding = getPaddingFromConfig(layerConfig, conf, paddingField,3); - ZeroPadding3DLayer.Builder builder = new ZeroPadding3DLayer.Builder(padding) - .name(this.layerName).dropOut(this.dropout); + ZeroPadding3DLayer.ZeroPadding3DLayerBuilder builder = ZeroPadding3DLayer.builder(padding) + .name(this.name).dropOut(this.dropout); this.layer = builder.build(); this.vertex = null; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java index 7d4e8429d..a0f5d44f1 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivation.java @@ -57,7 +57,7 @@ public class KerasActivation extends KerasLayer { public KerasActivation(Map layerConfig, boolean enforceTrainingConfig) throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); - this.layer = new ActivationLayer.Builder().name(this.layerName) + this.layer = ActivationLayer.builder().name(this.name) .activation(getIActivationFromConfig(layerConfig, conf)) .build(); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java index f49599ccf..dcb7e3562 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java @@ -100,7 +100,7 @@ public class KerasDense extends KerasLayer { IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(), enforceTrainingConfig, conf, kerasMajorVersion); - DenseLayer.Builder builder = new DenseLayer.Builder().name(this.layerName) + DenseLayer.DenseLayerBuilder builder = DenseLayer.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)) .dropOut(this.dropout).activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java index a24eee540..311dca63a 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropout.java @@ -60,7 +60,7 @@ public class KerasDropout extends KerasLayer { public KerasDropout(Map layerConfig, boolean enforceTrainingConfig) throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); - this.layer = new DropoutLayer.Builder().name(this.layerName).dropOut(this.dropout).build(); + this.layer = DropoutLayer.builder().name(this.name).dropOut(this.dropout).build(); } /** diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasLambda.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasLambda.java index 4cb56fefd..a8b0de5cb 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasLambda.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasLambda.java @@ -62,7 +62,7 @@ public class KerasLambda extends KerasLayer { SameDiffLayer sameDiffLayer) throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); - sameDiffLayer.setLayerName(this.layerName); + sameDiffLayer.setName(this.name); this.layer = sameDiffLayer; } @@ -75,7 +75,7 @@ public class KerasLambda extends KerasLayer { */ public InputType getOutputType(InputType... inputType) throws InvalidKerasConfigurationException { if (inputType.length > 1) { - log.warn("Note: only first input type will be counted for lambda on layer with name " + layerName); + log.warn("Note: only first input type will be counted for lambda on layer with name " + name); } return this.getSameDiffLayer().getOutputType(-1, inputType[0]); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMasking.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMasking.java index 480267fb5..7758317e6 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMasking.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasMasking.java @@ -70,10 +70,10 @@ public class KerasMasking extends KerasLayer { super(layerConfig, enforceTrainingConfig); maskingValue = KerasLayerUtils.getMaskingValueFromConfig(layerConfig, conf); - this.layer = new MaskZeroLayer.Builder() - .setMaskValue(maskingValue) - .setUnderlying(new IdentityLayer(this.layerName)) - .name(this.layerName) + this.layer = MaskZeroLayer.builder() + .maskingValue(maskingValue) + .underlying(IdentityLayer.builder(this.name).build()) + .name(this.name) .build(); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java index a7a3ef3e0..9bd54340e 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java @@ -59,9 +59,9 @@ public class KerasRepeatVector extends KerasLayer { throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); - this.layer = new RepeatVector.Builder().repetitionFactor(getRepeatMultiplier(layerConfig, conf)) + this.layer = RepeatVector.builder().repetitionFactor(getRepeatMultiplier(layerConfig, conf)) .dataFormat(RNNFormat.NWC) - .name(this.layerName).build(); + .name(this.name).build(); } /** diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout.java index df13fa855..d4141a1bf 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout.java @@ -81,7 +81,7 @@ public class KerasSpatialDropout extends KerasLayer { double rate = (double) innerConfig.get(conf.getLAYER_FIELD_RATE()); // Keras stores drop rates double retainRate = 1 - rate; - this.layer = new DropoutLayer.Builder().name(this.layerName) + this.layer = DropoutLayer.builder().name(this.name) .dropOut(new SpatialDropout(retainRate)).build(); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java index 74118d200..78abac569 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java @@ -59,7 +59,7 @@ public class KerasLRN extends KerasLayer { super(layerConfig, enforceTrainingConfig); Map lrnParams = KerasLayerUtils.getInnerLayerConfigFromConfig(layerConfig, conf); - LocalResponseNormalization.Builder builder = new LocalResponseNormalization.Builder().name(this.layerName) + var builder = LocalResponseNormalization.builder().name(this.name) .dropOut(this.dropout).alpha((double) lrnParams.get("alpha")) .beta((double) lrnParams.get("beta")).k((int) lrnParams.get("k")).n((int) lrnParams.get("n")); this.layer = builder.build(); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/Keras2DEmbedding.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/Keras2DEmbedding.java index 6559a0506..c9075c1a8 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/Keras2DEmbedding.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/Keras2DEmbedding.java @@ -118,8 +118,8 @@ public class Keras2DEmbedding extends KerasLayer { LayerConstraint embeddingConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_EMBEDDINGS_CONSTRAINT(), conf, kerasMajorVersion); int nOutFromConfig = getNOutFromConfig(layerConfig, conf); - EmbeddingLayer.Builder builder = new EmbeddingLayer.Builder() - .name(this.layerName) + EmbeddingLayer.EmbeddingLayerBuilder builder = EmbeddingLayer.builder() + .name(this.name) .nIn(inputDim) .nOut(nOutFromConfig) .dropOut(this.dropout).activation(Activation.IDENTITY) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java index f49f590cf..5247cea61 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java @@ -117,8 +117,8 @@ public class KerasEmbedding extends KerasLayer { LayerConstraint embeddingConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_EMBEDDINGS_CONSTRAINT(), conf, kerasMajorVersion); int nOutFromConfig = getNOutFromConfig(layerConfig, conf); - EmbeddingSequenceLayer.Builder builder = new EmbeddingSequenceLayer.Builder() - .name(this.layerName) + EmbeddingSequenceLayer.EmbeddingSequenceLayerBuilder builder = EmbeddingSequenceLayer.builder() + .name(this.name) .nIn(inputDim) .inputLength(inputLength) .inferInputLength(inferInputLength) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java index 9b9fecabf..699f5d8e0 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java @@ -33,9 +33,11 @@ import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolu import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.nn.weights.IWeightInit; +import org.deeplearning4j.nn.weights.WeightInit; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.HashMap; +import java.util.List; import java.util.Map; import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*; @@ -95,10 +97,10 @@ public class KerasLocallyConnected1D extends KerasConvolution { LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion); - LocallyConnected1D.Builder builder = new LocallyConnected1D.Builder().name(this.layerName) + LocallyConnected1D.LocallyConnected1DBuilder builder = LocallyConnected1D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getActivationFromConfig(layerConfig, conf)) - .weightInit(conf.getKERAS_PARAM_NAME_W(), init) + .weightInit(WeightInit.valueOf(conf.getKERAS_PARAM_NAME_W())) .l1(this.weightL1Regularization).l2(this.weightL2Regularization) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .kernelSize(getKernelSizeFromConfig(layerConfig, 1, conf, kerasMajorVersion)[0]) @@ -112,7 +114,7 @@ public class KerasLocallyConnected1D extends KerasConvolution { if (biasConstraint != null) builder.constrainBias(biasConstraint); if (weightConstraint != null) - builder.constrainWeights(weightConstraint); + builder.weightConstraints(List.of(weightConstraint)); this.layer = builder.build(); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java index 421797976..12687d165 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java @@ -36,6 +36,7 @@ import org.deeplearning4j.nn.weights.IWeightInit; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.HashMap; +import java.util.List; import java.util.Map; import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*; @@ -96,10 +97,10 @@ public class KerasLocallyConnected2D extends KerasConvolution { LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion); - LocallyConnected2D.Builder builder = new LocallyConnected2D.Builder().name(this.layerName) + LocallyConnected2D.LocallyConnected2DBuilder builder = LocallyConnected2D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getActivationFromConfig(layerConfig, conf)) - .weightInit(conf.getKERAS_PARAM_NAME_W(), init) + .weightInit(init.enumValue()) .l1(this.weightL1Regularization).l2(this.weightL2Regularization) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion)) @@ -113,7 +114,7 @@ public class KerasLocallyConnected2D extends KerasConvolution { if (biasConstraint != null) builder.constrainBias(biasConstraint); if (weightConstraint != null) - builder.constrainWeights(weightConstraint); + builder.weightConstraints(List.of(weightConstraint)); this.layer = builder.build(); } @@ -143,7 +144,7 @@ public class KerasLocallyConnected2D extends KerasConvolution { // Override input/output shape and input channels dynamically. This works since getOutputType will always // be called when initializing the model. ((LocallyConnected2D) this.layer).setInputSize(new int[] {(int) convType.getHeight(),(int) convType.getWidth()}); - ((LocallyConnected2D) this.layer).setNIn(convType.getChannels()); + ((LocallyConnected2D) this.layer).setNIn((int)convType.getChannels()); ((LocallyConnected2D) this.layer).computeOutputSize(); InputPreProcessor preprocessor = getInputPreprocessor(inputType[0]); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasAlphaDropout.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasAlphaDropout.java index 76707a0b3..277277ba7 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasAlphaDropout.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasAlphaDropout.java @@ -81,7 +81,7 @@ public class KerasAlphaDropout extends KerasLayer { double rate = (double) innerConfig.get(conf.getLAYER_FIELD_RATE()); // Keras stores drop rates double retainRate = 1 - rate; - this.layer = new DropoutLayer.Builder().name(this.layerName) + this.layer = DropoutLayer.builder().name(this.name) .dropOut(new AlphaDropout(retainRate)).build(); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianDropout.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianDropout.java index dde2709aa..744d2c76a 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianDropout.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianDropout.java @@ -80,7 +80,7 @@ public class KerasGaussianDropout extends KerasLayer { double rate = (double) innerConfig.get(conf.getLAYER_FIELD_RATE()); // Keras stores drop rates double retainRate = 1 - rate; - this.layer = new DropoutLayer.Builder().name(this.layerName) + this.layer = DropoutLayer.builder().name(this.name) .dropOut(new GaussianDropout(retainRate)).build(); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianNoise.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianNoise.java index de3d56760..c415154db 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianNoise.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianNoise.java @@ -79,7 +79,7 @@ public class KerasGaussianNoise extends KerasLayer { } double stddev = (double) innerConfig.get(conf.getLAYER_FIELD_GAUSSIAN_VARIANCE()); - this.layer = new DropoutLayer.Builder().name(this.layerName) + this.layer = DropoutLayer.builder().name(this.name) .dropOut(new GaussianNoise(stddev)).build(); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java index a90f4f1e6..82016be49 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java @@ -127,15 +127,17 @@ public class KerasBatchNormalization extends KerasLayer { LayerConstraint gammaConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_BATCHNORMALIZATION_GAMMA_CONSTRAINT(), conf, kerasMajorVersion); - BatchNormalization.Builder builder = new BatchNormalization.Builder() - .name(this.layerName) + BatchNormalization.BatchNormalizationBuilder builder =BatchNormalization.builder() + .name(this.name) .dropOut(this.dropout) - .minibatch(true) + + .isMinibatch(true) .lockGammaBeta(false) .useLogStd(false) .decay(getMomentumFromConfig(layerConfig)) .eps(getEpsFromConfig(layerConfig)); if (betaConstraint != null) + builder.constrainBeta(betaConstraint); if (gammaConstraint != null) builder.constrainGamma(gammaConstraint); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java index 9192e6dcf..27d8d5752 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasGlobalPooling.java @@ -72,11 +72,11 @@ public class KerasGlobalPooling extends KerasLayer { throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); this.dimensions = mapGlobalPoolingDimensions(this.className, conf); - GlobalPoolingLayer.Builder builder = - new GlobalPoolingLayer.Builder(mapPoolingType(this.className, conf)) + GlobalPoolingLayer.GlobalPoolingLayerBuilder builder = + GlobalPoolingLayer.builder(mapPoolingType(this.className, conf)) .poolingDimensions(dimensions) .collapseDimensions(true) // keras 2 collapses dimensions - .name(this.layerName) + .name(this.name) .dropOut(this.dropout); this.layer = builder.build(); this.vertex = null; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java index bb27e7d81..4d344e7b6 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1D.java @@ -63,8 +63,8 @@ public class KerasPooling1D extends KerasLayer { public KerasPooling1D(Map layerConfig, boolean enforceTrainingConfig) throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); - Subsampling1DLayer.Builder builder = new Subsampling1DLayer.Builder( - KerasPoolingUtils.mapPoolingType(this.className, conf)).name(this.layerName) + Subsampling1DLayer.SubsamplingLayerBuilder builder = Subsampling1DLayer.builder( + KerasPoolingUtils.mapPoolingType(this.className, conf)).name(this.name) .dropOut(this.dropout) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .kernelSize(getKernelSizeFromConfig(layerConfig, 1, conf, kerasMajorVersion)[0]) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java index 7d214a96c..79ccff3a2 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2D.java @@ -63,8 +63,8 @@ public class KerasPooling2D extends KerasLayer { public KerasPooling2D(Map layerConfig, boolean enforceTrainingConfig) throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); - SubsamplingLayer.Builder builder = new SubsamplingLayer.Builder( - KerasPoolingUtils.mapPoolingType(this.className, conf)).name(this.layerName) + SubsamplingLayer.SubsamplingLayerBuilder builder = SubsamplingLayer.builder( + KerasPoolingUtils.mapPoolingType(this.className, conf)).name(this.name) .dropOut(this.dropout) .dataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3D.java index 5fbee75b8..edad97399 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3D.java @@ -62,8 +62,8 @@ public class KerasPooling3D extends KerasLayer { public KerasPooling3D(Map layerConfig, boolean enforceTrainingConfig) throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); - Subsampling3DLayer.Builder builder = new Subsampling3DLayer.Builder( - KerasPoolingUtils.mapPoolingType(this.className, conf)).name(this.layerName) + Subsampling3DLayer.Subsampling3DLayerBuilder builder =Subsampling3DLayer.builder( + KerasPoolingUtils.mapPoolingType(this.className, conf)).name(this.name) .dropOut(this.dropout) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .kernelSize(getKernelSizeFromConfig(layerConfig, 3, conf, kerasMajorVersion)) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java index b2e5a15a2..4134f1d05 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java @@ -179,10 +179,10 @@ public class KerasLSTM extends KerasLayer { Pair maskingConfig = KerasLayerUtils.getMaskingConfiguration(inboundLayerNames, previousLayers); - LSTM.Builder builder = new LSTM.Builder() + final var builder = LSTM.builder() .gateActivationFunction(getGateActivationFromConfig(layerConfig)) .forgetGateBiasInit(getForgetBiasInitFromConfig(layerConfig, enforceTrainingConfig)) - .name(this.layerName) + .name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)) .dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) @@ -193,7 +193,7 @@ public class KerasLSTM extends KerasLayer { .l2(this.weightL2Regularization).dataFormat(RNNFormat.NWC); Integer nIn = KerasLayerUtils.getNInFromInputDim(layerConfig, conf); if(nIn != null) - builder.setNIn(nIn); + builder.nIn(nIn); if (biasConstraint != null) builder.constrainBias(biasConstraint); if (weightConstraint != null) @@ -203,10 +203,10 @@ public class KerasLSTM extends KerasLayer { this.layer = builder.build(); if (!returnSequences) { - this.layer = new LastTimeStep(this.layer); + this.layer = LastTimeStep.builder().underlying(this.layer).build(); } if (maskingConfig.getFirst()) { - this.layer = new MaskZeroLayer(this.layer, maskingConfig.getSecond()); + this.layer = MaskZeroLayer.builder().underlying(this.layer).maskingValue(maskingConfig.getSecond()).build(); } } @@ -270,7 +270,7 @@ public class KerasLSTM extends KerasLayer { "or three (input to LSTM and two states tensors, but " + "received " + inputType.length + "."); RNNFormat f = TimeSeriesUtils.getFormatFromRnnLayer(layer); - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], f,layerName); + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], f, name); } /** diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java index 3c850ecfa..942759615 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java @@ -152,8 +152,8 @@ public class KerasSimpleRnn extends KerasLayer { LayerConstraint recurrentConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_RECURRENT_CONSTRAINT(), conf, kerasMajorVersion); - SimpleRnn.Builder builder = new SimpleRnn.Builder() - .name(this.layerName) + SimpleRnn.SimpleRnnBuilder builder = SimpleRnn.builder() + .name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)) .dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) @@ -164,7 +164,7 @@ public class KerasSimpleRnn extends KerasLayer { .l2(this.weightL2Regularization).dataFormat(RNNFormat.NWC); Integer nIn = KerasLayerUtils.getNInFromInputDim(layerConfig, conf); if(nIn != null) - builder.setNIn(nIn); + builder.nIn(nIn); if (biasConstraint != null) builder.constrainBias(biasConstraint); if (weightConstraint != null) @@ -174,10 +174,10 @@ public class KerasSimpleRnn extends KerasLayer { this.layer = builder.build(); if (!returnSequences) { - this.layer = new LastTimeStep(this.layer); + this.layer = LastTimeStep.builder(this.layer).build(); } if (maskingConfig.getFirst()) { - this.layer = new MaskZeroLayer(this.layer, maskingConfig.getSecond()); + this.layer = MaskZeroLayer.builder().underlying(this.layer).maskingValue(maskingConfig.getSecond()).build(); } } @@ -234,7 +234,7 @@ public class KerasSimpleRnn extends KerasLayer { "Keras SimpleRnn layer accepts only one input (received " + inputType.length + ")"); RNNFormat f = TimeSeriesUtils.getFormatFromRnnLayer(layer); - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], f, layerName); + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], f, name); } /** diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java index 3da1a4642..d94ac13cc 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java @@ -135,19 +135,19 @@ public class KerasBidirectional extends KerasLayer { kerasRnnlayer = new KerasLSTM(innerRnnConfig, enforceTrainingConfig, previousLayers); try { LSTM rnnLayer = (LSTM) ((KerasLSTM) kerasRnnlayer).getLSTMLayer(); - layer = new Bidirectional(mode, rnnLayer); - layer.setLayerName(layerName); + layer = Bidirectional.builder(mode, rnnLayer).build(); + layer.setName(name); } catch (Exception e) { LastTimeStep rnnLayer = (LastTimeStep) ((KerasLSTM) kerasRnnlayer).getLSTMLayer(); - this.layer = new Bidirectional(mode, rnnLayer); - layer.setLayerName(layerName); + this.layer = Bidirectional.builder(mode, rnnLayer).build(); + layer.setName(name); } break; case "SimpleRNN": kerasRnnlayer = new KerasSimpleRnn(innerRnnConfig, enforceTrainingConfig, previousLayers); LayerConfiguration rnnLayer = ((KerasSimpleRnn) kerasRnnlayer).getSimpleRnnLayer(); - this.layer = new Bidirectional(mode, rnnLayer); - layer.setLayerName(layerName); + this.layer = Bidirectional.builder(mode, rnnLayer).build(); + layer.setName(name); break; default: throw new UnsupportedKerasConfigurationException("Currently only two types of recurrent Keras layers are" + @@ -216,7 +216,7 @@ public class KerasBidirectional extends KerasLayer { if (inputType.length > 1) throw new InvalidKerasConfigurationException( "Keras Bidirectional layer accepts only one input (received " + inputType.length + ")"); - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], ((Bidirectional)layer).getRNNDataFormat(), layerName); + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], ((Bidirectional)layer).getRNNDataFormat(), name); } /** diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java index 969626676..c8c956458 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java @@ -67,7 +67,7 @@ public class KerasModelUtils { /* Iterate over layers in model, setting weights when relevant. */ Set layerNames = new HashSet<>(kerasLayers.keySet()); for (org.deeplearning4j.nn.api.Layer layer : layersFromModel) { - String layerName = layer.getLayerConfiguration().getLayerName(); + String layerName = layer.getLayerConfiguration().getName(); if (!kerasLayers.containsKey(layerName)) throw new InvalidKerasConfigurationException( "No weights found for layer in model (named " + layerName + ")"); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/DeepCTRLambdaTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/DeepCTRLambdaTest.java index 1eca7ab48..4dfb13383 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/DeepCTRLambdaTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/DeepCTRLambdaTest.java @@ -79,7 +79,7 @@ public class DeepCTRLambdaTest { @Override public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput) { - if(this.layerName.equals("concat_embed_2d") || this.layerName.equals("cat_embed_2d_genure_mean")) + if(this.name.equals("concat_embed_2d") || this.name.equals("cat_embed_2d_genure_mean")) return layerInput.mean("mean_pooling-" + UUID.randomUUID(),true,1); else return layerInput.mean("mean_pooling-" + UUID.randomUUID(),false,1); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java index db0fc466b..449b4df29 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java @@ -91,7 +91,7 @@ public class FullModelComparisons extends BaseDL4JTest { // "unit_forget_bias": true assertEquals(1.0, firstConf.getForgetGateBiasInit()); - assertTrue(firstConf.getGateActivationFn() instanceof ActivationHardSigmoid); + assertTrue(firstConf.getGateActivationFunction() instanceof ActivationHardSigmoid); assertTrue(firstConf.getActivationFn() instanceof ActivationTanH); int nIn = 12; @@ -129,7 +129,7 @@ public class FullModelComparisons extends BaseDL4JTest { // "unit_forget_bias": true assertEquals(1.0, secondConf.getForgetGateBiasInit()); - assertTrue(firstConf.getGateActivationFn() instanceof ActivationHardSigmoid); + assertTrue(firstConf.getGateActivationFunction() instanceof ActivationHardSigmoid); assertTrue(firstConf.getActivationFn() instanceof ActivationTanH); nIn = 96; diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java index 02c478093..38d0d4f67 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java @@ -58,11 +58,11 @@ public class KerasModelImportTest extends BaseDL4JTest { MultiLayerNetwork model = loadModel("modelimport/keras/weights/conv2dnchw/simpleconv2d.hdf5"); List layerConfigs = model.getNetConfiguration().getFlattenedLayerConfigurations(); ConvolutionLayer convolutionLayer = (ConvolutionLayer) layerConfigs.get(0); - assertEquals(CNN2DFormat.NCHW,convolutionLayer.getCnn2dDataFormat()); + assertEquals(CNN2DFormat.NCHW,convolutionLayer.getConvFormat()); SubsamplingLayer subsamplingLayer = (SubsamplingLayer) layerConfigs.get(1); - assertEquals(CNN2DFormat.NHWC,subsamplingLayer.getCnn2dDataFormat()); + assertEquals(CNN2DFormat.NHWC,subsamplingLayer.getDataFormat()); ConvolutionLayer convolutionLayer1 = (ConvolutionLayer) layerConfigs.get(2); - assertEquals(CNN2DFormat.NHWC,convolutionLayer1.getCnn2dDataFormat()); + assertEquals(CNN2DFormat.NHWC,convolutionLayer1.getConvFormat()); model.output(Nd4j.zeros(1,1,28,28)); assertNotNull(model); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java index 1dad7c549..be1ecf339 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java @@ -287,7 +287,7 @@ public class KerasModelEndToEndTest extends BaseDL4JTest { Layer outLayer = net.getOutputLayer(); assertTrue(outLayer instanceof org.deeplearning4j.nn.layers.LossLayer); LossLayer llConf = (LossLayer) outLayer.getTrainingConfig(); - assertEquals(new LossSparseMCXENT(), llConf.getLossFn()); + assertEquals(new LossSparseMCXENT(), llConf.getLossFunction()); } /** @@ -973,13 +973,13 @@ public class KerasModelEndToEndTest extends BaseDL4JTest { } else { LayerConfiguration l; if (labels.rank() == 2) { - l = new LossLayer.Builder() - .lossFunction(LossFunctions.LossFunction.MSE) + l = LossLayer.builder() + .lossFunction(LossFunctions.LossFunction.MSE.getILossFunction()) .activation(Activation.IDENTITY) .build(); } else { //Rank 3 - l = new RnnOutputLayer.Builder() + l = RnnOutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(labels.size(1)) @@ -1000,16 +1000,16 @@ public class KerasModelEndToEndTest extends BaseDL4JTest { for (Layer l : netToTest.getLayers()) { // Remove any dropout manually - until this is fixed: // https://github.com/eclipse/deeplearning4j/issues/4368 - l.getLayerConfiguration().setIDropout(null); + l.getLayerConfiguration().setDropOut(null); //Also swap out activation functions... this is a bit of a hack, but should make the net gradient checkable... if (l.getLayerConfiguration() instanceof FeedForwardLayer) { FeedForwardLayer ffl = (FeedForwardLayer) l.getLayerConfiguration(); IActivation activation = ffl.getActivationFn(); if (activation instanceof ActivationReLU || activation instanceof ActivationLReLU) { - ffl.setActivationFn(new ActivationSoftPlus()); + ffl.setActivation(new ActivationSoftPlus()); } else if (activation instanceof ActivationHardTanH) { - ffl.setActivationFn(new ActivationTanH()); + ffl.setActivation(new ActivationTanH()); } } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java index a5ab1f512..acf06120d 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java @@ -60,8 +60,8 @@ public class KerasYolo9000PredictTest extends BaseDL4JTest { ComputationGraph model = new TransferLearning.GraphBuilder(graph) .addLayer("outputs", - new org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer.Builder() - .boundingBoxPriors(priors) + org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer.builder() + .boundingBoxes(priors) .build(), "conv2d_23") .setOutputs("outputs") diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasLeakyReLUTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasLeakyReLUTest.java index b2417dad8..41c276c52 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasLeakyReLUTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasLeakyReLUTest.java @@ -64,6 +64,6 @@ public class KerasLeakyReLUTest extends BaseDL4JTest { ActivationLayer layer = new KerasLeakyReLU(layerConfig).getActivationLayer(); assertEquals("leakyrelu(a=0.3)", layer.getActivationFn().toString()); - assertEquals(layerName, layer.getLayerName()); + assertEquals(layerName, layer.getName()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasPReLUTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasPReLUTest.java index 053eb1fab..ef31068b6 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasPReLUTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasPReLUTest.java @@ -81,6 +81,6 @@ public class KerasPReLUTest extends BaseDL4JTest { assertArrayEquals(layer.getInputShape(), new long[] {3, 5, 4}); assertEquals(INIT_DL4J, layer.getWeightInit()); - assertEquals(layerName, layer.getLayerName()); + assertEquals(layerName, layer.getName()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasThresholdedReLUTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasThresholdedReLUTest.java index 834886ef7..12328dafa 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasThresholdedReLUTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activation/KerasThresholdedReLUTest.java @@ -65,6 +65,6 @@ public class KerasThresholdedReLUTest extends BaseDL4JTest { ActivationLayer layer = new KerasThresholdedReLU(layerConfig).getActivationLayer(); assertEquals("thresholdedrelu(theta=0.5)", layer.getActivationFn().toString()); - assertEquals(layerName, layer.getLayerName()); + assertEquals(layerName, layer.getName()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java index 10330113c..a44cf7472 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java @@ -99,11 +99,11 @@ public class KerasAtrousConvolution1DTest extends BaseDL4JTest { Convolution1DLayer layer = new KerasAtrousConvolution1D(layerConfig).getAtrousConvolution1D(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertEquals(KERNEL_SIZE[0], layer.getKernelSize()[0]); assertEquals(STRIDE[0], layer.getStride()[0]); assertEquals(N_OUT, layer.getNOut()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java index 7f1d65b3b..2da9eb06a 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution2DTest.java @@ -113,11 +113,11 @@ public class KerasAtrousConvolution2DTest extends BaseDL4JTest { ConvolutionLayer layer = new KerasAtrousConvolution2D(layerConfig).getAtrousConvolution2D(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); assertArrayEquals(STRIDE, layer.getStride()); assertEquals(N_OUT, layer.getNOut()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java index b8629573f..56c1abf34 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java @@ -121,11 +121,11 @@ public class KerasConvolution1DTest extends BaseDL4JTest { Convolution1DLayer layer = new KerasConvolution1D(layerConfig).getConvolution1DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertEquals(KERNEL_SIZE[0], layer.getKernelSize()[0]); assertEquals(STRIDE[0], layer.getStride()[0]); assertEquals(N_OUT, layer.getNOut()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java index 4ba12c10f..1069643e9 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution2DTest.java @@ -122,11 +122,11 @@ public class KerasConvolution2DTest extends BaseDL4JTest { ConvolutionLayer layer = new KerasConvolution2D(layerConfig).getConvolution2DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); assertArrayEquals(STRIDE, layer.getStride()); assertEquals(N_OUT, layer.getNOut()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java index f52939947..078af3736 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java @@ -118,11 +118,11 @@ public class KerasConvolution3DTest extends BaseDL4JTest { ConvolutionLayer layer = new KerasConvolution3D(layerConfig).getConvolution3DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); assertArrayEquals(STRIDE, layer.getStride()); assertEquals(N_OUT, layer.getNOut()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java index b3159e54b..57ed5909f 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java @@ -65,7 +65,7 @@ public class KerasCropping1DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); Cropping1D layer = new KerasCropping1D(layerConfig).getCropping1DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(CROPPING, layer.getCropping()[0]); assertEquals(CROPPING, layer.getCropping()[1]); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping2DTest.java index e65a59438..0e33bbf84 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping2DTest.java @@ -70,7 +70,7 @@ public class KerasCropping2DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); Cropping2D layer = new KerasCropping2D(layerConfig).getCropping2DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(CROPPING[0], layer.getCropping()[0]); assertEquals(CROPPING[0], layer.getCropping()[1]); assertEquals(CROPPING[1], layer.getCropping()[2]); @@ -89,7 +89,7 @@ public class KerasCropping2DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); Cropping2D layer = new KerasCropping2D(layerConfig).getCropping2DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(CROPPING[0], layer.getCropping()[0]); assertEquals(CROPPING[0], layer.getCropping()[1]); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java index 5fe65127a..893e03478 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java @@ -70,7 +70,7 @@ public class KerasCropping3DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); Cropping3D layer = new KerasCropping3D(layerConfig).getCropping3DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(CROPPING[0], layer.getCropping()[0]); assertEquals(CROPPING[0], layer.getCropping()[1]); assertEquals(CROPPING[1], layer.getCropping()[2]); @@ -91,7 +91,7 @@ public class KerasCropping3DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); Cropping3D layer = new KerasCropping3D(layerConfig).getCropping3DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(CROPPING[0], layer.getCropping()[0]); assertEquals(CROPPING[0], layer.getCropping()[1]); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDeconvolution2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDeconvolution2DTest.java index 9fecab86c..560fb88d1 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDeconvolution2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDeconvolution2DTest.java @@ -122,11 +122,11 @@ public class KerasDeconvolution2DTest extends BaseDL4JTest { Deconvolution2D layer = new KerasDeconvolution2D(layerConfig).getDeconvolution2DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); assertArrayEquals(STRIDE, layer.getStride()); assertEquals(N_OUT, layer.getNOut()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java index eef103f98..98487876f 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java @@ -127,12 +127,12 @@ public class KerasDepthwiseConvolution2DTest extends BaseDL4JTest { DepthwiseConvolution2D layer = kerasLayer.getDepthwiseConvolution2DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(DEPTH_MULTIPLIER, layer.getDepthMultiplier()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); assertArrayEquals(STRIDE, layer.getStride()); assertEquals(N_IN * DEPTH_MULTIPLIER, layer.getNOut()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasSeparableConvolution2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasSeparableConvolution2DTest.java index 9745ff5ed..7c3d0844f 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasSeparableConvolution2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasSeparableConvolution2DTest.java @@ -129,12 +129,12 @@ public class KerasSeparableConvolution2DTest extends BaseDL4JTest { SeparableConvolution2D layer = new KerasSeparableConvolution2D(layerConfig).getSeparableConvolution2DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(DEPTH_MULTIPLIER, layer.getDepthMultiplier()); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); assertArrayEquals(STRIDE, layer.getStride()); assertEquals(N_OUT, layer.getNOut()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java index 394f768c9..3acbe2c5c 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java @@ -62,7 +62,7 @@ public class KerasUpsampling1DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); Upsampling1D layer = new KerasUpsampling1D(layerConfig).getUpsampling1DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(size, layer.getSize()[0]); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java index f75958315..9ddc4dc8b 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java @@ -68,7 +68,7 @@ public class KerasUpsampling2DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); Upsampling2D layer = new KerasUpsampling2D(layerConfig).getUpsampling2DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(size[0], layer.getSize()[0]); assertEquals(size[1], layer.getSize()[1]); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling3DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling3DTest.java index 7c82f4907..3be649e23 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling3DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling3DTest.java @@ -69,7 +69,7 @@ public class KerasUpsampling3DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); Upsampling3D layer = new KerasUpsampling3D(layerConfig).getUpsampling3DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(size[0], layer.getSize()[0]); assertEquals(size[1], layer.getSize()[1]); assertEquals(size[2], layer.getSize()[2]); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding1DTest.java index 64bc6563f..245ca1732 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding1DTest.java @@ -62,7 +62,7 @@ public class KerasZeroPadding1DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); ZeroPadding1DLayer layer = new KerasZeroPadding1D(layerConfig).getZeroPadding1DLayer(); - assertEquals(layerName, layer.getLayerName()); + assertEquals(layerName, layer.getName()); assertEquals(zeroPadding, layer.getPadding()[0]); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding2DTest.java index 203c4b887..e6b2be2f2 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding2DTest.java @@ -70,7 +70,7 @@ public class KerasZeroPadding2DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); ZeroPaddingLayer layer = new KerasZeroPadding2D(layerConfig).getZeroPadding2DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(ZERO_PADDING[0], layer.getPadding()[0]); assertEquals(ZERO_PADDING[0], layer.getPadding()[1]); assertEquals(ZERO_PADDING[1], layer.getPadding()[2]); @@ -89,7 +89,7 @@ public class KerasZeroPadding2DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); ZeroPaddingLayer layer = new KerasZeroPadding2D(layerConfig).getZeroPadding2DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(ZERO_PADDING[0], layer.getPadding()[0]); assertEquals(ZERO_PADDING[0], layer.getPadding()[1]); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java index cc2c44968..41789b259 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java @@ -70,7 +70,7 @@ public class KerasZeroPadding3DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); ZeroPadding3DLayer layer = new KerasZeroPadding3D(layerConfig).getZeroPadding3DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(ZERO_PADDING[0], layer.getPadding()[0]); assertEquals(ZERO_PADDING[0], layer.getPadding()[1]); assertEquals(ZERO_PADDING[1], layer.getPadding()[2]); @@ -91,7 +91,7 @@ public class KerasZeroPadding3DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); ZeroPadding3DLayer layer = new KerasZeroPadding3D(layerConfig).getZeroPadding3DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(ZERO_PADDING[0], layer.getPadding()[0]); assertEquals(ZERO_PADDING[0], layer.getPadding()[1]); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivationLayer.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivationLayer.java index 1f2496c16..23a7a814b 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivationLayer.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasActivationLayer.java @@ -60,7 +60,7 @@ public class KerasActivationLayer extends BaseDL4JTest { ActivationLayer layer = new KerasActivation(layerConfig).getActivationLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java index 637ce5915..620c88b05 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java @@ -88,11 +88,11 @@ public class KerasDenseTest extends BaseDL4JTest { DenseLayer layer = new KerasDense(layerConfig, false).getDenseLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertEquals(N_OUT, layer.getNOut()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java index afc7506e2..f70289289 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDropoutTest.java @@ -65,8 +65,8 @@ public class KerasDropoutTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); DropoutLayer layer = new KerasDropout(layerConfig).getDropoutLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(LAYER_NAME, layer.getName()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVectorTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVectorTest.java index 958e2baad..a4cb19187 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVectorTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVectorTest.java @@ -63,8 +63,8 @@ public class KerasRepeatVectorTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); RepeatVector layer = new KerasRepeatVector(layerConfig).getRepeatVectorLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(layer.getN(), REPEAT); + assertEquals(LAYER_NAME, layer.getName()); + assertEquals(layer.getRepetitionFactor(), REPEAT); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout2DTest.java index 8234c29b2..075937fd8 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasSpatialDropout2DTest.java @@ -65,8 +65,8 @@ public class KerasSpatialDropout2DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); DropoutLayer layer = new KerasSpatialDropout(layerConfig).getSpatialDropoutLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(new SpatialDropout(RATE_DL4J), layer.getIDropout()); + assertEquals(LAYER_NAME, layer.getName()); + assertEquals(new SpatialDropout(RATE_DL4J), layer.getDropOut()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbeddingTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbeddingTest.java index 010f890b7..1037dfb04 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbeddingTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbeddingTest.java @@ -100,7 +100,7 @@ public class KerasEmbeddingTest extends BaseDL4JTest { assertEquals(kerasEmbedding.isZeroMasking(), maskZero); EmbeddingSequenceLayer layer = kerasEmbedding.getEmbeddingLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); return kerasEmbedding; } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java index 42afecf32..195039c8c 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java @@ -121,15 +121,15 @@ public class KerasLocallyConnected1DTest extends BaseDL4JTest { LocallyConnected1D layer = kerasLocal.getLocallyConnected1DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivation().toString().toLowerCase()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); - assertEquals(KERNEL_SIZE, layer.getKernel()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); + assertEquals(KERNEL_SIZE, layer.getKernelSize()); assertEquals(STRIDE, layer.getStride()); assertEquals(N_OUT, layer.getNOut()); - assertEquals(ConvolutionMode.Truncate, layer.getCm()); + assertEquals(ConvolutionMode.Truncate, layer.getConvolutionMode()); assertEquals(VALID_PADDING, layer.getPadding()); assertEquals(layer.getInputSize(), 4); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2DTest.java index 42981f1b6..3f068b549 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2DTest.java @@ -20,11 +20,18 @@ package org.deeplearning4j.nn.modelimport.keras.layers.local; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.LocallyConnected2D; -import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration; @@ -32,107 +39,99 @@ import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration; import org.deeplearning4j.nn.weights.WeightInit; import org.junit.jupiter.api.Test; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; - /** * @author Max Pumperla */ public class KerasLocallyConnected2DTest extends BaseDL4JTest { - private final String ACTIVATION_KERAS = "linear"; - private final String ACTIVATION_DL4J = "identity"; - private final String LAYER_NAME = "test_layer"; - private final String INIT_KERAS = "glorot_normal"; - private final WeightInit INIT_DL4J = WeightInit.XAVIER; - private final double L1_REGULARIZATION = 0.01; - private final double L2_REGULARIZATION = 0.02; - private final double DROPOUT_KERAS = 0.3; - private final double DROPOUT_DL4J = 1 - DROPOUT_KERAS; - private final int[] KERNEL_SIZE = new int[]{1, 2}; - private final int[] DILATION = new int[]{2, 2}; - private final int[] STRIDE = new int[]{3, 4}; - private final int N_OUT = 13; - private final String BORDER_MODE_VALID = "valid"; - private final int[] VALID_PADDING = new int[]{0, 0}; + private final String ACTIVATION_KERAS = "linear"; + private final String ACTIVATION_DL4J = "identity"; + private final String LAYER_NAME = "test_layer"; + private final String INIT_KERAS = "glorot_normal"; + private final WeightInit INIT_DL4J = WeightInit.XAVIER; + private final double L1_REGULARIZATION = 0.01; + private final double L2_REGULARIZATION = 0.02; + private final double DROPOUT_KERAS = 0.3; + private final double DROPOUT_DL4J = 1 - DROPOUT_KERAS; + private final int[] KERNEL_SIZE = new int[] {1, 2}; + private final int[] DILATION = new int[] {2, 2}; + private final int[] STRIDE = new int[] {3, 4}; + private final int N_OUT = 13; + private final String BORDER_MODE_VALID = "valid"; + private final int[] VALID_PADDING = new int[] {0, 0}; - private final Integer keras1 = 1; - private final Integer keras2 = 2; - private final Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); - private final Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); + private final Integer keras1 = 1; + private final Integer keras2 = 2; + private final Keras1LayerConfiguration conf1 = new Keras1LayerConfiguration(); + private final Keras2LayerConfiguration conf2 = new Keras2LayerConfiguration(); + @Test + public void testLocallyConnected2DLayer() throws Exception { + buildLocallyConnected2DLayer(conf1, keras1); + buildLocallyConnected2DLayer(conf2, keras2); + } - @Test - public void testLocallyConnected2DLayer() throws Exception { - buildLocallyConnected2DLayer(conf1, keras1); - buildLocallyConnected2DLayer(conf2, keras2); + private void buildLocallyConnected2DLayer(KerasLayerConfiguration conf, Integer kerasVersion) + throws Exception { + Map layerConfig = new HashMap<>(); + layerConfig.put( + conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_LOCALLY_CONNECTED_2D()); + Map config = new HashMap<>(); + config.put(conf.getLAYER_FIELD_ACTIVATION(), ACTIVATION_KERAS); + config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); + if (kerasVersion == 1) { + config.put(conf.getLAYER_FIELD_INIT(), INIT_KERAS); + } else { + Map init = new HashMap<>(); + init.put("class_name", conf.getINIT_GLOROT_NORMAL()); + config.put(conf.getLAYER_FIELD_INIT(), init); + } + Map W_reg = new HashMap<>(); + W_reg.put(conf.getREGULARIZATION_TYPE_L1(), L1_REGULARIZATION); + W_reg.put(conf.getREGULARIZATION_TYPE_L2(), L2_REGULARIZATION); + config.put(conf.getLAYER_FIELD_W_REGULARIZER(), W_reg); + config.put(conf.getLAYER_FIELD_DROPOUT(), DROPOUT_KERAS); + if (kerasVersion == 1) { + config.put(conf.getLAYER_FIELD_NB_ROW(), KERNEL_SIZE[0]); + config.put(conf.getLAYER_FIELD_NB_COL(), KERNEL_SIZE[1]); + } else { + ArrayList kernel = + new ArrayList() { + { + for (int i : KERNEL_SIZE) add(i); + } + }; + config.put(conf.getLAYER_FIELD_KERNEL_SIZE(), kernel); } + List subsampleList = new ArrayList<>(); + subsampleList.add(STRIDE[0]); + subsampleList.add(STRIDE[1]); + config.put(conf.getLAYER_FIELD_CONVOLUTION_STRIDES(), subsampleList); + config.put(conf.getLAYER_FIELD_NB_FILTER(), N_OUT); + config.put(conf.getLAYER_FIELD_BORDER_MODE(), BORDER_MODE_VALID); + layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); + layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); - private void buildLocallyConnected2DLayer(KerasLayerConfiguration conf, Integer kerasVersion) - throws Exception { - Map layerConfig = new HashMap<>(); - layerConfig.put(conf.getLAYER_FIELD_CLASS_NAME(), conf.getLAYER_CLASS_NAME_LOCALLY_CONNECTED_2D()); - Map config = new HashMap<>(); - config.put(conf.getLAYER_FIELD_ACTIVATION(), ACTIVATION_KERAS); - config.put(conf.getLAYER_FIELD_NAME(), LAYER_NAME); - if (kerasVersion == 1) { - config.put(conf.getLAYER_FIELD_INIT(), INIT_KERAS); - } else { - Map init = new HashMap<>(); - init.put("class_name", conf.getINIT_GLOROT_NORMAL()); - config.put(conf.getLAYER_FIELD_INIT(), init); - } - Map W_reg = new HashMap<>(); - W_reg.put(conf.getREGULARIZATION_TYPE_L1(), L1_REGULARIZATION); - W_reg.put(conf.getREGULARIZATION_TYPE_L2(), L2_REGULARIZATION); - config.put(conf.getLAYER_FIELD_W_REGULARIZER(), W_reg); - config.put(conf.getLAYER_FIELD_DROPOUT(), DROPOUT_KERAS); - if (kerasVersion == 1) { - config.put(conf.getLAYER_FIELD_NB_ROW(), KERNEL_SIZE[0]); - config.put(conf.getLAYER_FIELD_NB_COL(), KERNEL_SIZE[1]); - } else { - ArrayList kernel = new ArrayList() {{ - for (int i : KERNEL_SIZE) add(i); - }}; - config.put(conf.getLAYER_FIELD_KERNEL_SIZE(), kernel); - } + KerasLocallyConnected2D kerasLocal = new KerasLocallyConnected2D(layerConfig); - List subsampleList = new ArrayList<>(); - subsampleList.add(STRIDE[0]); - subsampleList.add(STRIDE[1]); - config.put(conf.getLAYER_FIELD_CONVOLUTION_STRIDES(), subsampleList); - config.put(conf.getLAYER_FIELD_NB_FILTER(), N_OUT); - config.put(conf.getLAYER_FIELD_BORDER_MODE(), BORDER_MODE_VALID); - layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); - layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); + // once get output type is triggered, inputshape, output shape and input depth are updated + kerasLocal.getOutputType(InputType.convolutional(4, 4, 3)); + LocallyConnected2D layer = kerasLocal.getLocallyConnected2DLayer(); + assertEquals(ACTIVATION_DL4J, layer.getActivation().toString().toLowerCase()); + assertEquals(LAYER_NAME, layer.getName()); + assertEquals(INIT_DL4J, layer.getWeightInit()); + assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); + assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); + assertArrayEquals(KERNEL_SIZE, layer.getKernel()); + assertArrayEquals(STRIDE, layer.getStride()); + assertEquals(N_OUT, layer.getNOut()); + assertEquals(ConvolutionMode.Truncate, layer.getConvolutionMode()); + assertArrayEquals(VALID_PADDING, layer.getPadding()); - KerasLocallyConnected2D kerasLocal = new KerasLocallyConnected2D(layerConfig); - - // once get output type is triggered, inputshape, output shape and input depth are updated - kerasLocal.getOutputType(InputType.convolutional(4,4,3)); - - LocallyConnected2D layer = kerasLocal.getLocallyConnected2DLayer(); - assertEquals(ACTIVATION_DL4J, layer.getActivation().toString().toLowerCase()); - assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(INIT_DL4J, layer.getWeightInit()); - assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); - assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); - assertArrayEquals(KERNEL_SIZE, layer.getKernel()); - assertArrayEquals(STRIDE, layer.getStride()); - assertEquals(N_OUT, layer.getNOut()); - assertEquals(ConvolutionMode.Truncate, layer.getCm()); - assertArrayEquals(VALID_PADDING, layer.getPadding()); - - assertArrayEquals(layer.getInputSize(), new int[] {4, 4}); - assertEquals(layer.getNIn(), 3); - } + assertArrayEquals(layer.getInputSize(), new int[] {4, 4}); + assertEquals(layer.getNIn(), 3); + } } - diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasAlphaDropoutTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasAlphaDropoutTest.java index 1f35515bb..5a6828600 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasAlphaDropoutTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasAlphaDropoutTest.java @@ -65,8 +65,8 @@ public class KerasAlphaDropoutTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); DropoutLayer layer = new KerasAlphaDropout(layerConfig).getAlphaDropoutLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(new AlphaDropout(RATE_DL4J), layer.getIDropout()); + assertEquals(LAYER_NAME, layer.getName()); + assertEquals(new AlphaDropout(RATE_DL4J), layer.getDropOut()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianDropoutTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianDropoutTest.java index eee0f1c8a..7f48338b0 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianDropoutTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianDropoutTest.java @@ -65,8 +65,8 @@ public class KerasGaussianDropoutTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); DropoutLayer layer = new KerasGaussianDropout(layerConfig).getGaussianDropoutLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(new GaussianDropout(RATE_DL4J), layer.getIDropout()); + assertEquals(LAYER_NAME, layer.getName()); + assertEquals(new GaussianDropout(RATE_DL4J), layer.getDropOut()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianNoiseTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianNoiseTest.java index 6d8eb994c..45fec55b6 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianNoiseTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/noise/KerasGaussianNoiseTest.java @@ -64,8 +64,8 @@ public class KerasGaussianNoiseTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); DropoutLayer layer = new KerasGaussianNoise(layerConfig).getGaussianNoiseLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); - assertEquals(new GaussianNoise(STDDEV), layer.getIDropout()); + assertEquals(LAYER_NAME, layer.getName()); + assertEquals(new GaussianNoise(STDDEV), layer.getDropOut()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java index ca84e5244..21aedf497 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalizationTest.java @@ -74,7 +74,7 @@ public class KerasBatchNormalizationTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); BatchNormalization layer = new KerasBatchNormalization(layerConfig).getBatchNormalizationLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(epsilon, layer.getEps(), 0.0); assertEquals(momentum, layer.getDecay(), 0.0); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1DTest.java index d504e626f..9311f2afa 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling1DTest.java @@ -86,7 +86,7 @@ public class KerasPooling1DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); Subsampling1DLayer layer = new KerasPooling1D(layerConfig).getSubsampling1DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(KERNEL_SIZE[0], layer.getKernelSize()[0]); assertEquals(STRIDE[0], layer.getStride()[0]); assertEquals(POOLING_TYPE, layer.getPoolingType()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2DTest.java index 76aed15c1..7ab6fccf5 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling2DTest.java @@ -78,7 +78,7 @@ public class KerasPooling2DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); SubsamplingLayer layer = new KerasPooling2D(layerConfig).getSubsampling2DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); assertArrayEquals(STRIDE, layer.getStride()); assertEquals(POOLING_TYPE, layer.getPoolingType()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java index 44ed404eb..2bdf94b12 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java @@ -80,7 +80,7 @@ public class KerasPooling3DTest extends BaseDL4JTest { layerConfig.put(conf.getLAYER_FIELD_KERAS_VERSION(), kerasVersion); Subsampling3DLayer layer = new KerasPooling3D(layerConfig).getSubsampling3DLayer(); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertArrayEquals(KERNEL_SIZE, layer.getKernelSize()); assertArrayEquals(STRIDE, layer.getStride()); assertEquals(POOLING_TYPE, layer.getPoolingType()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java index 1bfc3a4ce..7b785b523 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java @@ -129,11 +129,11 @@ public class KerasLSTMTest extends BaseDL4JTest { layer = (LSTM) lts.getUnderlying(); } assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertEquals(lstmForgetBiasDouble, layer.getForgetGateBiasInit(), 0.0); assertEquals(N_OUT, layer.getNOut()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnnTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnnTest.java index 1b143a706..beadedece 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnnTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnnTest.java @@ -100,11 +100,11 @@ public class KerasSimpleRnnTest extends BaseDL4JTest { SimpleRnn layer = rs ? (SimpleRnn) new KerasSimpleRnn(layerConfig).getSimpleRnnLayer() : (SimpleRnn) ((LastTimeStep) new KerasSimpleRnn(layerConfig).getSimpleRnnLayer()).getUnderlying(); assertEquals(ACTIVATION, layer.getActivationFn().toString()); - assertEquals(LAYER_NAME, layer.getLayerName()); + assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); - assertEquals(new Dropout(DROPOUT_DL4J), layer.getIDropout()); + assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); assertEquals(N_OUT, layer.getNOut()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectionalTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectionalTest.java index 1aa8b0a81..a69ea6f63 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectionalTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectionalTest.java @@ -112,7 +112,7 @@ public class KerasBidirectionalTest extends BaseDL4JTest { assertEquals(Bidirectional.Mode.ADD, layer.getMode()); assertEquals(Activation.HARDSIGMOID.toString().toLowerCase(), - ((LSTM) kerasBidirectional.getUnderlyingRecurrentLayer()).getGateActivationFn().toString()); + ((LSTM) kerasBidirectional.getUnderlyingRecurrentLayer()).getGateActivationFunction().toString()); } } diff --git a/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java b/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java index bd89639b3..912ab998a 100644 --- a/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java +++ b/cavis-dnn/cavis-dnn-nlp/src/test/java/org/deeplearning4j/models/word2vec/Word2VecTestsSmall.java @@ -189,9 +189,9 @@ public class Word2VecTestsSmall extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345).list() - .layer(new EmbeddingLayer.Builder().weightInit(vec).build()) - .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(w.size(1)).nOut(3).build()) - .layer(new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) + .layer(EmbeddingLayer.builder().weightInit(vec).build()) + .layer(DenseLayer.builder().activation(Activation.TANH).nIn(w.size(1)).nOut(3).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-nn/build.gradle b/cavis-dnn/cavis-dnn-nn/build.gradle index 59ff712ab..e668886b1 100644 --- a/cavis-dnn/cavis-dnn-nn/build.gradle +++ b/cavis-dnn/cavis-dnn-nn/build.gradle @@ -18,6 +18,9 @@ * ***************************************************************************** * */ +plugins { + id("io.freefair.lombok") version "8.0.1" +} apply from: "${project.rootProject.projectDir}/createTestBackends.gradle" dependencies { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java index 1462661bb..e3414d39e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java @@ -21,6 +21,7 @@ package net.brutex.ai.dnn.api; + public interface ILayerConfiguration { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java index 3f84a7004..aceaffdf0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/IModel.java @@ -40,11 +40,10 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; /** * A Neural Network is an instance of a {@link INeuralNetworkConfiguration}, that can be trained, - * evaluated, saved, exported, etc. Its configuration state is defined with the - * {@link #setNetConfiguration(NeuralNetConfiguration)} (INeuralNetworkConfiguration)} and - * {@link #getNetConfiguration()} methods. - **/ - + * evaluated, saved, exported, etc. Its configuration state is defined with the {@link + * #setNetConfiguration(NeuralNetConfiguration)} (INeuralNetworkConfiguration)} and {@link + * #getNetConfiguration()} methods. + */ public interface IModel extends ITrainableLayer { /** @@ -54,9 +53,10 @@ public interface IModel extends ITrainableLayer { */ Map getParamTable(); - Map getParamTable(boolean backpropOnly); void setParamTable(Map paramTable); + Map getParamTable(boolean backpropOnly); + /** * This method returns updater state (if applicable), null otherwise * @@ -124,9 +124,7 @@ public interface IModel extends ITrainableLayer { void setNetConfiguration(@NonNull NeuralNetConfiguration netConfiguration); - /** - * Init the model - */ + /** Init the model */ void init(); /** @@ -136,15 +134,11 @@ public interface IModel extends ITrainableLayer { */ long numParams(); - /** - * All models have a fit method - */ + /** All models have a fit method */ @Deprecated void fit(); - /** - * Update layer weights and biases with gradient change - */ + /** Update layer weights and biases with gradient change */ void update(Gradient gradient); /** @@ -154,7 +148,6 @@ public interface IModel extends ITrainableLayer { */ void update(INDArray gradient, String paramType); - /** * The score for the model. No calculation occurs, this simply returns the score calculated before * by the {@link #computeGradientAndScore(LayerWorkspaceMgr)} method. @@ -163,10 +156,7 @@ public interface IModel extends ITrainableLayer { */ double getScore(); - - /** - * Update the score - */ + /** Update the score */ void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr); /** @@ -176,7 +166,6 @@ public interface IModel extends ITrainableLayer { */ INDArray getModelParams(); - /** * the number of parameters for the model * @@ -200,15 +189,13 @@ public interface IModel extends ITrainableLayer { */ void setParamsViewArray(INDArray params); - INDArray getGradientsViewArray(); /** * Set the gradients array as a view of the full (backprop) network parameters NOTE: this is * intended to be used internally in MultiLayerNetwork and ComputationGraph, not by users. * - * @param gradients a 1 x nParams row vector that is a view of the larger (MLN/CG) gradients - * array + * @param gradients a 1 x nParams row vector that is a view of the larger (MLN/CG) gradients array */ void setBackpropGradientsViewArray(INDArray gradients); @@ -219,11 +206,10 @@ public interface IModel extends ITrainableLayer { */ void fit(INDArray data, LayerWorkspaceMgr workspaceMgr); - /** * Get the gradient. Note that this method will not calculate the gradient, it will rather return - * the gradient that has been computed before. For calculating the gradient, see - * {@link IModel#computeGradientAndScore(LayerWorkspaceMgr)} } . + * the gradient that has been computed before. For calculating the gradient, see {@link + * IModel#computeGradientAndScore(LayerWorkspaceMgr)} } . * * @return the gradient for this model, as calculated before */ @@ -258,7 +244,6 @@ public interface IModel extends ITrainableLayer { */ INDArray getParam(String param); - /** * Set the parameters for a given parameter type. * @@ -267,18 +252,12 @@ public interface IModel extends ITrainableLayer { */ void setParam(String key, INDArray val); - /** - * Clear input - */ + /** Clear input */ void clear(); - - /** - * Apply any constraints to the model - */ + /** Apply any constraints to the model */ void applyConstraints(int iteration, int epoch); - void close(); /** @@ -296,6 +275,4 @@ public interface IModel extends ITrainableLayer { void addTrainingListeners(TrainingListener... listeners); void addTrainingListeners(Collection listeners); - - } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java index aa0465659..46d2fa5b7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java @@ -21,6 +21,7 @@ package net.brutex.ai.dnn.networks; +import java.io.Serializable; import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -48,7 +49,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; * predictions of the network and the desired values and then using this error signal to change the * weights (or parameters) so that predictions get more accurate. */ -public abstract class ArtificialNeuralNetwork implements IModel { +public abstract class ArtificialNeuralNetwork implements IModel, Serializable { /** * A neural network is created from a configuration. @@ -71,8 +72,16 @@ public abstract class ArtificialNeuralNetwork implements IModel { * @param backpropParamsOnly If true, return backprop params only. If false: return all params * (equivalent to paramsTable()) */ + /** + * Returns a map of all parameters in the network as per {@link #paramTable()}.
+ * Optionally (with backpropParamsOnly=true) only the 'backprop' parameters are returned - that is, any parameters + * involved only in unsupervised layerwise pretraining not standard inference/backprop are excluded from the returned list. + * @param backpropParamsOnly If true, return backprop params only. If false: return all params + * @return Parameters for the network + */ @Override public Map getParamTable(boolean backpropParamsOnly) { + return paramTable; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java index 0cccc2a4f..a2db2946b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/gradientcheck/GradientCheckUtil.java @@ -20,21 +20,17 @@ package org.deeplearning4j.gradientcheck; +import java.util.*; import lombok.*; import lombok.experimental.Accessors; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; -import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.exception.ND4JArraySizeException; -import org.nd4j.common.function.Consumer; -import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT; -import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.BaseOutputLayer; @@ -42,693 +38,954 @@ import org.deeplearning4j.nn.layers.LossLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.updater.UpdaterCreator; import org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.function.Consumer; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationSoftmax; +import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.util.DataTypeUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.MultiDataSet; +import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.ILossFunction; +import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT; import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.*; @Slf4j public class GradientCheckUtil { - private static final List> VALID_ACTIVATION_FUNCTIONS = - Arrays.asList(Activation.CUBE.getActivationFunction().getClass(), - Activation.ELU.getActivationFunction().getClass(), - Activation.IDENTITY.getActivationFunction().getClass(), - Activation.RATIONALTANH.getActivationFunction().getClass(), - Activation.SIGMOID.getActivationFunction().getClass(), - Activation.SOFTMAX.getActivationFunction().getClass(), - Activation.SOFTPLUS.getActivationFunction().getClass(), - Activation.SOFTSIGN.getActivationFunction().getClass(), - Activation.TANH.getActivationFunction().getClass()); + private static final List> VALID_ACTIVATION_FUNCTIONS = + Arrays.asList( + Activation.CUBE.getActivationFunction().getClass(), + Activation.ELU.getActivationFunction().getClass(), + Activation.IDENTITY.getActivationFunction().getClass(), + Activation.RATIONALTANH.getActivationFunction().getClass(), + Activation.SIGMOID.getActivationFunction().getClass(), + Activation.SOFTMAX.getActivationFunction().getClass(), + Activation.SOFTPLUS.getActivationFunction().getClass(), + Activation.SOFTSIGN.getActivationFunction().getClass(), + Activation.TANH.getActivationFunction().getClass()); - private GradientCheckUtil() {} + private GradientCheckUtil() {} + private static void configureLossFnClippingIfPresent(IOutputLayer outputLayer) { - private static void configureLossFnClippingIfPresent(IOutputLayer outputLayer){ - - ILossFunction lfn = null; - IActivation afn = null; - if(outputLayer instanceof BaseOutputLayer){ - BaseOutputLayer o = (BaseOutputLayer)outputLayer; - lfn = ((org.deeplearning4j.nn.conf.layers.BaseOutputLayer)o.getTypedLayerConfiguration()).getLossFn(); - afn = o.getLayerConfiguration().getActivationFn(); - } else if(outputLayer instanceof LossLayer){ - LossLayer o = (LossLayer) outputLayer; - lfn = o.getTypedLayerConfiguration().getLossFn(); - afn = o.getTypedLayerConfiguration().getActivationFn(); - } - - if (lfn instanceof LossMCXENT && afn instanceof ActivationSoftmax && ((LossMCXENT) lfn).getSoftmaxClipEps() != 0) { - log.info("Setting softmax clipping epsilon to 0.0 for " + lfn.getClass() - + " loss function to avoid spurious gradient check failures"); - ((LossMCXENT) lfn).setSoftmaxClipEps(0.0); - } else if(lfn instanceof LossBinaryXENT && ((LossBinaryXENT) lfn).getClipEps() != 0) { - log.info("Setting clipping epsilon to 0.0 for " + lfn.getClass() - + " loss function to avoid spurious gradient check failures"); - ((LossBinaryXENT) lfn).setClipEps(0.0); - } + ILossFunction lfn = null; + IActivation afn = null; + if (outputLayer instanceof BaseOutputLayer) { + BaseOutputLayer o = (BaseOutputLayer) outputLayer; + lfn = + ((org.deeplearning4j.nn.conf.layers.BaseOutputLayer) o.getTypedLayerConfiguration()) + .getLossFunction(); + afn = o.getLayerConfiguration().getActivationFn(); + } else if (outputLayer instanceof LossLayer) { + LossLayer o = (LossLayer) outputLayer; + lfn = o.getTypedLayerConfiguration().getLossFunction(); + afn = o.getTypedLayerConfiguration().getActivationFn(); } - public enum PrintMode { - ALL, - ZEROS, - FAILURES_ONLY + if (lfn instanceof LossMCXENT + && afn instanceof ActivationSoftmax + && ((LossMCXENT) lfn).getSoftmaxClipEps() != 0) { + log.info( + "Setting softmax clipping epsilon to 0.0 for " + + lfn.getClass() + + " loss function to avoid spurious gradient check failures"); + ((LossMCXENT) lfn).setSoftmaxClipEps(0.0); + } else if (lfn instanceof LossBinaryXENT && ((LossBinaryXENT) lfn).getClipEps() != 0) { + log.info( + "Setting clipping epsilon to 0.0 for " + + lfn.getClass() + + " loss function to avoid spurious gradient check failures"); + ((LossBinaryXENT) lfn).setClipEps(0.0); } + } - @Accessors(fluent = true) - @Data - @NoArgsConstructor - public static class MLNConfig { - private MultiLayerNetwork net; - private INDArray input; - private INDArray labels; - private INDArray inputMask; - private INDArray labelMask; - private double epsilon = 1e-6; - private double maxRelError = 1e-3; - private double minAbsoluteError = 1e-8; - private PrintMode print = PrintMode.ZEROS; - private boolean exitOnFirstError = false; - private boolean subset; - private int maxPerParam; - private Set excludeParams; - private Consumer callEachIter; - } + /** + * Check backprop gradients for a MultiLayerNetwork. + * + * @param mln MultiLayerNetwork to test. This must be initialized. + * @param epsilon Usually on the order/ of 1e-4 or so. + * @param maxRelError Maximum relative error. Usually < 1e-5 or so, though maybe more for deep + * networks or those with nonlinear activation + * @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be + * non-zero due to precision issues. For example, 0.0 vs. 1e-18: relative error is 1.0, but + * not really a failure + * @param print Whether to print full pass/failure details for each parameter gradient + * @param exitOnFirstError If true: return upon first failure. If false: continue checking even if + * one parameter gradient has failed. Typically use false for debugging, true for unit tests. + * @param input Input array to use for forward pass. May be mini-batch data. + * @param labels Labels/targets to use to calculate backprop gradient. May be mini-batch data. + * @return true if gradients are passed, false otherwise. + */ + @Deprecated + public static boolean checkGradients( + MultiLayerNetwork mln, + double epsilon, + double maxRelError, + double minAbsoluteError, + boolean print, + boolean exitOnFirstError, + INDArray input, + INDArray labels) { + return checkGradients( + new MLNConfig() + .net(mln) + .epsilon(epsilon) + .maxRelError(maxRelError) + .minAbsoluteError(minAbsoluteError) + .print(PrintMode.FAILURES_ONLY) + .exitOnFirstError(exitOnFirstError) + .input(input) + .labels(labels)); + } - @Accessors(fluent = true) - @Data - @NoArgsConstructor - public static class GraphConfig { - private ComputationGraph net; - private INDArray[] inputs; - private INDArray[] labels; - private INDArray[] inputMask; - private INDArray[] labelMask; - private double epsilon = 1e-6; - private double maxRelError = 1e-3; - private double minAbsoluteError = 1e-8; - private PrintMode print = PrintMode.ZEROS; - private boolean exitOnFirstError = false; - private boolean subset; - private int maxPerParam; - private Set excludeParams; - private Consumer callEachIter; - } - - /** - * Check backprop gradients for a MultiLayerNetwork. - * @param mln MultiLayerNetwork to test. This must be initialized. - * @param epsilon Usually on the order/ of 1e-4 or so. - * @param maxRelError Maximum relative error. Usually < 1e-5 or so, though maybe more for deep networks or those with nonlinear activation - * @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be non-zero due to precision issues. - * For example, 0.0 vs. 1e-18: relative error is 1.0, but not really a failure - * @param print Whether to print full pass/failure details for each parameter gradient - * @param exitOnFirstError If true: return upon first failure. If false: continue checking even if - * one parameter gradient has failed. Typically use false for debugging, true for unit tests. - * @param input Input array to use for forward pass. May be mini-batch data. - * @param labels Labels/targets to use to calculate backprop gradient. May be mini-batch data. - * @return true if gradients are passed, false otherwise. - */ - @Deprecated - public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, double maxRelError, - double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray input, INDArray labels) { - return checkGradients(new MLNConfig().net(mln).epsilon(epsilon).maxRelError(maxRelError).minAbsoluteError(minAbsoluteError).print(PrintMode.FAILURES_ONLY) - .exitOnFirstError(exitOnFirstError).input(input).labels(labels)); - } - - @Deprecated - public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, double maxRelError, - double minAbsoluteError, boolean print, boolean exitOnFirstError, - INDArray input, INDArray labels, INDArray inputMask, INDArray labelMask, - boolean subset, int maxPerParam, Set excludeParams, final Integer rngSeedResetEachIter) { - Consumer c = null; - if(rngSeedResetEachIter != null){ - c = new Consumer() { - @Override - public void accept(MultiLayerNetwork multiLayerNetwork) { - Nd4j.getRandom().setSeed(rngSeedResetEachIter); - } - }; - } - - return checkGradients(new MLNConfig().net(mln).epsilon(epsilon).maxRelError(maxRelError).minAbsoluteError(minAbsoluteError).print(PrintMode.FAILURES_ONLY) - .exitOnFirstError(exitOnFirstError).input(input).labels(labels).inputMask(inputMask).labelMask(labelMask).subset(subset).maxPerParam(maxPerParam).excludeParams(excludeParams).callEachIter(c)); - } - - public static boolean checkGradients(MLNConfig c) { - - //Basic sanity checks on input: - if (c.epsilon <= 0.0 || c.epsilon > 0.1) - throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so"); - if (c.maxRelError <= 0.0 || c.maxRelError > 0.25) - throw new IllegalArgumentException("Invalid maxRelativeError: " + c.maxRelError); - if (!(c.net.getOutputLayer() instanceof IOutputLayer)) - throw new IllegalArgumentException("Cannot check backprop gradients without OutputLayer"); - - DataType dataType = DataTypeUtil.getDtypeFromContext(); - if (dataType != DataType.DOUBLE) { - throw new IllegalStateException("Cannot perform gradient check: Datatype is not set to double precision (" - + "is: " + dataType + "). Double precision must be used for gradient checks. Set " - + "DataTypeUtil.setDTypeForContext(DataType.DOUBLE); before using GradientCheckUtil"); - } - - DataType netDataType = c.net.getNetConfiguration().getDataType(); - if (netDataType != DataType.DOUBLE) { - throw new IllegalStateException("Cannot perform gradient check: Network datatype is not set to double precision (" - + "is: " + netDataType + "). Double precision must be used for gradient checks. Create network with .dataType(DataType.DOUBLE) before using GradientCheckUtil"); - } - - if(netDataType != c.net.getModelParams().dataType()){ - throw new IllegalStateException("Parameters datatype does not match network configuration datatype (" - + "is: " + c.net.getModelParams().dataType() + "). If network datatype is set to DOUBLE, parameters must also be DOUBLE."); - } - - - //Check network configuration: - int layerCount = 0; - for (LayerConfiguration n : c.net.getNetConfiguration().getFlattenedLayerConfigurations()) { - if (n instanceof BaseLayerConfiguration) { - BaseLayerConfiguration bl = (BaseLayerConfiguration) n; - IUpdater u = bl.getIUpdater(); - if (u instanceof Sgd) { - //Must have LR of 1.0 - double lr = ((Sgd) u).getLearningRate(); - if (lr != 1.0) { - throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " - + layerCount + "; got " + u + " with lr=" + lr + " for layer \"" - + n.getLayerName() + "\""); - } - } else if (!(u instanceof NoOp)) { - throw new IllegalStateException( - "Must have Updater.NONE (or SGD + lr=1.0) for layer " + layerCount + "; got " + u); - } - - IActivation activation = bl.getActivationFn(); - if (activation != null) { - if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) { - log.warn("LayerConfiguration " + layerCount + " is possibly using an unsuitable activation function: " - + activation.getClass() - + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " - + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)"); - } - } + @Deprecated + public static boolean checkGradients( + MultiLayerNetwork mln, + double epsilon, + double maxRelError, + double minAbsoluteError, + boolean print, + boolean exitOnFirstError, + INDArray input, + INDArray labels, + INDArray inputMask, + INDArray labelMask, + boolean subset, + int maxPerParam, + Set excludeParams, + final Integer rngSeedResetEachIter) { + Consumer c = null; + if (rngSeedResetEachIter != null) { + c = + new Consumer() { + @Override + public void accept(MultiLayerNetwork multiLayerNetwork) { + Nd4j.getRandom().setSeed(rngSeedResetEachIter); } + }; + } - if (n.getIDropout() != null && c.callEachIter == null) { - throw new IllegalStateException("When gradient checking dropout, need to reset RNG seed each iter, or no" + - " dropout should be present during gradient checks - got dropout = " - + n.getIDropout() + " for layer " + layerCount); - } + return checkGradients( + new MLNConfig() + .net(mln) + .epsilon(epsilon) + .maxRelError(maxRelError) + .minAbsoluteError(minAbsoluteError) + .print(PrintMode.FAILURES_ONLY) + .exitOnFirstError(exitOnFirstError) + .input(input) + .labels(labels) + .inputMask(inputMask) + .labelMask(labelMask) + .subset(subset) + .maxPerParam(maxPerParam) + .excludeParams(excludeParams) + .callEachIter(c)); + } + + public static boolean checkGradients(MLNConfig c) { + + // Basic sanity checks on input: + if (c.epsilon <= 0.0 || c.epsilon > 0.1) + throw new IllegalArgumentException( + "Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so"); + if (c.maxRelError <= 0.0 || c.maxRelError > 0.25) + throw new IllegalArgumentException("Invalid maxRelativeError: " + c.maxRelError); + if (!(c.net.getOutputLayer() instanceof IOutputLayer)) + throw new IllegalArgumentException("Cannot check backprop gradients without OutputLayer"); + + DataType dataType = DataTypeUtil.getDtypeFromContext(); + if (dataType != DataType.DOUBLE) { + throw new IllegalStateException( + "Cannot perform gradient check: Datatype is not set to double precision (" + + "is: " + + dataType + + "). Double precision must be used for gradient checks. Set " + + "DataTypeUtil.setDTypeForContext(DataType.DOUBLE); before using GradientCheckUtil"); + } + + DataType netDataType = c.net.getNetConfiguration().getDataType(); + if (netDataType != DataType.DOUBLE) { + throw new IllegalStateException( + "Cannot perform gradient check: Network datatype is not set to double precision (" + + "is: " + + netDataType + + "). Double precision must be used for gradient checks. Create network with .dataType(DataType.DOUBLE) before using GradientCheckUtil"); + } + + if (netDataType != c.net.getModelParams().dataType()) { + throw new IllegalStateException( + "Parameters datatype does not match network configuration datatype (" + + "is: " + + c.net.getModelParams().dataType() + + "). If network datatype is set to DOUBLE, parameters must also be DOUBLE."); + } + + // Check network configuration: + int layerCount = 0; + for (LayerConfiguration n : c.net.getNetConfiguration().getFlattenedLayerConfigurations()) { + if (n instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bl = (BaseLayerConfiguration) n; + IUpdater u = bl.getIUpdater(); + if (u instanceof Sgd) { + // Must have LR of 1.0 + double lr = ((Sgd) u).getLearningRate(); + if (lr != 1.0) { + throw new IllegalStateException( + "When using SGD updater, must also use lr=1.0 for layer " + + layerCount + + "; got " + + u + + " with lr=" + + lr + + " for layer \"" + + n.getName() + + "\""); + } + } else if (!(u instanceof NoOp)) { + throw new IllegalStateException( + "Must have Updater.NONE (or SGD + lr=1.0) for layer " + layerCount + "; got " + u); } - //Set softmax clipping to 0 if necessary, to avoid spurious failures due to clipping - for(Layer l : c.net.getLayers()){ - if(l instanceof IOutputLayer){ - configureLossFnClippingIfPresent((IOutputLayer) l); - } + IActivation activation = bl.getActivationFn(); + if (activation != null) { + if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) { + log.warn( + "LayerConfiguration " + + layerCount + + " is possibly using an unsuitable activation function: " + + activation.getClass() + + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)"); + } + } + } + + if (n.getDropOut() != null && c.callEachIter == null) { + throw new IllegalStateException( + "When gradient checking dropout, need to reset RNG seed each iter, or no" + + " dropout should be present during gradient checks - got dropout = " + + n.getDropOut() + + " for layer " + + layerCount); + } + } + + // Set softmax clipping to 0 if necessary, to avoid spurious failures due to clipping + for (Layer l : c.net.getLayers()) { + if (l instanceof IOutputLayer) { + configureLossFnClippingIfPresent((IOutputLayer) l); + } + } + + c.net.setInput(c.input); + c.net.setLabels(c.labels); + c.net.setLayerMaskArrays(c.inputMask, c.labelMask); + if (c.callEachIter != null) { + c.callEachIter.accept(c.net); + } + c.net.computeGradientAndScore(); + Pair gradAndScore = c.net.gradientAndScore(); + + Updater updater = UpdaterCreator.getUpdater(c.net); + updater.update( + c.net, gradAndScore.getFirst(), 0, 0, c.net.batchSize(), LayerWorkspaceMgr.noWorkspaces()); + + INDArray gradientToCheck = + gradAndScore + .getFirst() + .gradient() + .dup(); // need dup: gradients are a *view* of the full gradient array (which will + // change every time backprop is done) + INDArray originalParams = + c.net.getModelParams().dup(); // need dup: params are a *view* of full parameters + + val nParams = originalParams.length(); + + Map paramTable = c.net.getParamTable(); + List paramNames = new ArrayList<>(paramTable.keySet()); + val paramEnds = new long[paramNames.size()]; + paramEnds[0] = paramTable.get(paramNames.get(0)).length(); + Map stepSizeForParam; + if (c.subset) { + stepSizeForParam = new HashMap<>(); + stepSizeForParam.put( + paramNames.get(0), + (int) Math.max(1, paramTable.get(paramNames.get(0)).length() / c.maxPerParam)); + } else { + stepSizeForParam = null; + } + for (int i = 1; i < paramEnds.length; i++) { + val n = paramTable.get(paramNames.get(i)).length(); + paramEnds[i] = paramEnds[i - 1] + n; + if (c.subset) { + long ss = n / c.maxPerParam; + if (ss == 0) { + ss = n; } - c.net.setInput(c.input); - c.net.setLabels(c.labels); - c.net.setLayerMaskArrays(c.inputMask, c.labelMask); - if(c.callEachIter != null){ - c.callEachIter.accept(c.net); - } - c.net.computeGradientAndScore(); - Pair gradAndScore = c.net.gradientAndScore(); + if (ss > Integer.MAX_VALUE) throw new ND4JArraySizeException(); + stepSizeForParam.put(paramNames.get(i), (int) ss); + } + } - Updater updater = UpdaterCreator.getUpdater(c.net); - updater.update(c.net, gradAndScore.getFirst(), 0, 0, c.net.batchSize(), LayerWorkspaceMgr.noWorkspaces()); + if (c.print == PrintMode.ALL) { + int i = 0; + for (Layer l : c.net.getLayers()) { + Set s = l.getParamTable().keySet(); + log.info( + "LayerConfiguration " + i + ": " + l.getClass().getSimpleName() + " - params " + s); + i++; + } + } - INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done) - INDArray originalParams = c.net.getModelParams().dup(); //need dup: params are a *view* of full parameters + int totalNFailures = 0; + double maxError = 0.0; + DataSet ds = new DataSet(c.input, c.labels, c.inputMask, c.labelMask); + int currParamNameIdx = 0; - val nParams = originalParams.length(); + if (c.excludeParams != null && !c.excludeParams.isEmpty()) { + log.info("NOTE: parameters will be skipped due to config: {}", c.excludeParams); + } - Map paramTable = c.net.getParamTable(); - List paramNames = new ArrayList<>(paramTable.keySet()); - val paramEnds = new long[paramNames.size()]; - paramEnds[0] = paramTable.get(paramNames.get(0)).length(); - Map stepSizeForParam; - if(c.subset){ - stepSizeForParam = new HashMap<>(); - stepSizeForParam.put(paramNames.get(0), (int) Math.max(1, paramTable.get(paramNames.get(0)).length() / c.maxPerParam)); + INDArray params = + c.net.getModelParams(); // Assumption here: params is a view that we can modify in-place + for (long i = 0; i < nParams; ) { + // Get param name + if (i >= paramEnds[currParamNameIdx]) { + currParamNameIdx++; + } + String paramName = paramNames.get(currParamNameIdx); + if (c.excludeParams != null && c.excludeParams.contains(paramName)) { + // log.info("Skipping parameters for parameter name: {}", paramName); + i = paramEnds[currParamNameIdx++]; + continue; + } + + // (w+epsilon): Do forward pass and score + double origValue = params.getDouble(i); + params.putScalar(i, origValue + c.epsilon); + if (c.callEachIter != null) { + c.callEachIter.accept(c.net); + } + double scorePlus = c.net.score(ds, true); + + // (w-epsilon): Do forward pass and score + params.putScalar(i, origValue - c.epsilon); + if (c.callEachIter != null) { + c.callEachIter.accept(c.net); + } + double scoreMinus = c.net.score(ds, true); + + // Reset original param value + params.putScalar(i, origValue); + + // Calculate numerical parameter gradient: + double scoreDelta = scorePlus - scoreMinus; + + double numericalGradient = scoreDelta / (2 * c.epsilon); + if (Double.isNaN(numericalGradient)) + throw new IllegalStateException( + "Numerical gradient was NaN for parameter " + i + " of " + nParams); + + double backpropGradient = gradientToCheck.getDouble(i); + // http://cs231n.github.io/neural-networks-3/#gradcheck + // use mean centered + double relError = + Math.abs(backpropGradient - numericalGradient) + / (Math.abs(numericalGradient) + Math.abs(backpropGradient)); + if (backpropGradient == 0.0 && numericalGradient == 0.0) + relError = 0.0; // Edge case: i.e., RNNs with time series length of 1.0 + + if (relError > maxError) maxError = relError; + if (relError > c.maxRelError || Double.isNaN(relError)) { + double absError = Math.abs(backpropGradient - numericalGradient); + if (absError < c.minAbsoluteError) { + if (c.print == PrintMode.ALL || c.print == PrintMode.ZEROS && absError == 0.0) { + log.info( + "Param " + + i + + " (" + + paramName + + ") passed: grad= " + + backpropGradient + + ", numericalGrad= " + + numericalGradient + + ", relError= " + + relError + + "; absolute error = " + + absError + + " < minAbsoluteError = " + + c.minAbsoluteError); + } } else { - stepSizeForParam = null; + log.info( + "Param " + + i + + " (" + + paramName + + ") FAILED: grad= " + + backpropGradient + + ", numericalGrad= " + + numericalGradient + + ", relError= " + + relError + + ", scorePlus=" + + scorePlus + + ", scoreMinus= " + + scoreMinus + + ", paramValue = " + + origValue); + if (c.exitOnFirstError) return false; + totalNFailures++; } - for (int i = 1; i < paramEnds.length; i++) { - val n = paramTable.get(paramNames.get(i)).length(); - paramEnds[i] = paramEnds[i - 1] + n; - if(c.subset){ - long ss = n / c.maxPerParam; - if(ss == 0){ - ss = n; - } + } else if (c.print == PrintMode.ALL) { + log.info( + "Param " + + i + + " (" + + paramName + + ") passed: grad= " + + backpropGradient + + ", numericalGrad= " + + numericalGradient + + ", relError= " + + relError); + } - if (ss > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - stepSizeForParam.put(paramNames.get(i), (int) ss); - } + long step; + if (c.subset) { + step = stepSizeForParam.get(paramName); + if (i + step > paramEnds[currParamNameIdx] + 1) { + step = paramEnds[currParamNameIdx] + 1 - i; } + } else { + step = 1; + } - if(c.print == PrintMode.ALL) { - int i=0; - for (Layer l : c.net.getLayers()) { - Set s = l.getParamTable().keySet(); - log.info("LayerConfiguration " + i + ": " + l.getClass().getSimpleName() + " - params " + s); - i++; - } - } - - - int totalNFailures = 0; - double maxError = 0.0; - DataSet ds = new DataSet(c.input, c.labels, c.inputMask, c.labelMask); - int currParamNameIdx = 0; - - if(c.excludeParams != null && !c.excludeParams.isEmpty()){ - log.info("NOTE: parameters will be skipped due to config: {}", c.excludeParams); - } - - INDArray params = c.net.getModelParams(); //Assumption here: params is a view that we can modify in-place - for (long i = 0; i < nParams; ) { - //Get param name - if (i >= paramEnds[currParamNameIdx]) { - currParamNameIdx++; - } - String paramName = paramNames.get(currParamNameIdx); - if(c.excludeParams != null && c.excludeParams.contains(paramName)){ -// log.info("Skipping parameters for parameter name: {}", paramName); - i = paramEnds[currParamNameIdx++]; - continue; - } - - //(w+epsilon): Do forward pass and score - double origValue = params.getDouble(i); - params.putScalar(i, origValue + c.epsilon); - if(c.callEachIter != null){ - c.callEachIter.accept(c.net); - } - double scorePlus = c.net.score(ds, true); - - //(w-epsilon): Do forward pass and score - params.putScalar(i, origValue - c.epsilon); - if(c.callEachIter != null){ - c.callEachIter.accept(c.net); - } - double scoreMinus = c.net.score(ds, true); - - //Reset original param value - params.putScalar(i, origValue); - - //Calculate numerical parameter gradient: - double scoreDelta = scorePlus - scoreMinus; - - double numericalGradient = scoreDelta / (2 * c.epsilon); - if (Double.isNaN(numericalGradient)) - throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams); - - double backpropGradient = gradientToCheck.getDouble(i); - //http://cs231n.github.io/neural-networks-3/#gradcheck - //use mean centered - double relError = Math.abs(backpropGradient - numericalGradient) - / (Math.abs(numericalGradient) + Math.abs(backpropGradient)); - if (backpropGradient == 0.0 && numericalGradient == 0.0) - relError = 0.0; //Edge case: i.e., RNNs with time series length of 1.0 - - if (relError > maxError) - maxError = relError; - if (relError > c.maxRelError || Double.isNaN(relError)) { - double absError = Math.abs(backpropGradient - numericalGradient); - if (absError < c.minAbsoluteError) { - if(c.print == PrintMode.ALL || c.print == PrintMode.ZEROS && absError == 0.0) { - log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient - + ", numericalGrad= " + numericalGradient + ", relError= " + relError - + "; absolute error = " + absError + " < minAbsoluteError = " + c.minAbsoluteError); - } - } else { - log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient - + ", numericalGrad= " + numericalGradient + ", relError= " + relError - + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus + ", paramValue = " + origValue); - if (c.exitOnFirstError) - return false; - totalNFailures++; - } - } else if (c.print == PrintMode.ALL) { - log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " - + numericalGradient + ", relError= " + relError); - } - - long step; - if(c.subset){ - step = stepSizeForParam.get(paramName); - if(i + step > paramEnds[currParamNameIdx]+1){ - step = paramEnds[currParamNameIdx]+1 - i; - } - } else { - step = 1; - } - - i += step; - } - - val nPass = nParams - totalNFailures; - log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " - + totalNFailures + " failed. Largest relative error = " + maxError); - - return totalNFailures == 0; + i += step; } - public static boolean checkGradients(GraphConfig c){ - //Basic sanity checks on input: - if (c.epsilon <= 0.0 || c.epsilon > 0.1) - throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so"); - if (c.maxRelError <= 0.0 || c.maxRelError > 0.25) - throw new IllegalArgumentException("Invalid maxRelativeError: " + c.maxRelError); + val nPass = nParams - totalNFailures; + log.info( + "GradientCheckUtil.checkGradients(): " + + nParams + + " params checked, " + + nPass + + " passed, " + + totalNFailures + + " failed. Largest relative error = " + + maxError); - if (c.net.getNumInputArrays() != c.inputs.length) - throw new IllegalArgumentException("Invalid input arrays: expect " + c.net.getNumInputArrays() + " inputs"); - if (c.net.getNumOutputArrays() != c.labels.length) - throw new IllegalArgumentException( - "Invalid labels arrays: expect " + c.net.getNumOutputArrays() + " outputs"); + return totalNFailures == 0; + } - DataType dataType = DataTypeUtil.getDtypeFromContext(); - if (dataType != DataType.DOUBLE) { - throw new IllegalStateException("Cannot perform gradient check: Datatype is not set to double precision (" - + "is: " + dataType + "). Double precision must be used for gradient checks. Set " - + "DataTypeUtil.setDTypeForContext(DataType.DOUBLE); before using GradientCheckUtil"); - } + public static boolean checkGradients(GraphConfig c) { + // Basic sanity checks on input: + if (c.epsilon <= 0.0 || c.epsilon > 0.1) + throw new IllegalArgumentException( + "Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so"); + if (c.maxRelError <= 0.0 || c.maxRelError > 0.25) + throw new IllegalArgumentException("Invalid maxRelativeError: " + c.maxRelError); - DataType netDataType = c.net.getComputationGraphConfiguration().getDataType(); - if (netDataType != DataType.DOUBLE) { - throw new IllegalStateException("Cannot perform gradient check: Network datatype is not set to double precision (" - + "is: " + netDataType + "). Double precision must be used for gradient checks. Create network with .dataType(DataType.DOUBLE) before using GradientCheckUtil"); - } + if (c.net.getNumInputArrays() != c.inputs.length) + throw new IllegalArgumentException( + "Invalid input arrays: expect " + c.net.getNumInputArrays() + " inputs"); + if (c.net.getNumOutputArrays() != c.labels.length) + throw new IllegalArgumentException( + "Invalid labels arrays: expect " + c.net.getNumOutputArrays() + " outputs"); - if(netDataType != c.net.getModelParams().dataType()){ - throw new IllegalStateException("Parameters datatype does not match network configuration datatype (" - + "is: " + c.net.getModelParams().dataType() + "). If network datatype is set to DOUBLE, parameters must also be DOUBLE."); - } - - //Check configuration - int layerCount = 0; - for (String vertexName : c.net.getComputationGraphConfiguration().getVertices().keySet()) { - GraphVertex gv = c.net.getComputationGraphConfiguration().getVertices().get(vertexName); - if (!(gv instanceof LayerVertex)) - continue; - LayerVertex lv = (LayerVertex) gv; - - if (lv.getLayerConfiguration() instanceof BaseLayerConfiguration) { - BaseLayerConfiguration bl = (BaseLayerConfiguration) lv.getLayerConfiguration(); - IUpdater u = bl.getIUpdater(); - if (u instanceof Sgd) { - //Must have LR of 1.0 - double lr = ((Sgd) u).getLearningRate(); - if (lr != 1.0) { - throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " - + layerCount + "; got " + u + " with lr=" + lr + " for layer \"" - + lv.getLayerConfiguration().getLayerName() + "\""); - } - } else if (!(u instanceof NoOp)) { - throw new IllegalStateException( - "Must have Updater.NONE (or SGD + lr=1.0) for layer " + layerCount + "; got " + u); - } - - IActivation activation = bl.getActivationFn(); - if (activation != null) { - if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) { - log.warn("LayerConfiguration \"" + vertexName + "\" is possibly using an unsuitable activation function: " - + activation.getClass() - + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " - + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)"); - } - } - } - - if (lv.getLayerConfiguration().getIDropout() != null && c.callEachIter == null) { - throw new IllegalStateException("When gradient checking dropout, rng seed must be reset each iteration, or no" + - " dropout should be present during gradient checks - got dropout = " - + lv.getLayerConfiguration().getIDropout() + " for layer " + layerCount); - } - } - - //Set softmax clipping to 0 if necessary, to avoid spurious failures due to clipping - for(Layer l : c.net.getLayers()){ - if(l instanceof IOutputLayer){ - configureLossFnClippingIfPresent((IOutputLayer) l); - } - } - - for (int i = 0; i < c.inputs.length; i++) - c.net.setInput(i, c.inputs[i]); - for (int i = 0; i < c.labels.length; i++) - c.net.setLabel(i, c.labels[i]); - - c.net.setLayerMaskArrays(c.inputMask, c.labelMask); - - if(c.callEachIter != null){ - c.callEachIter.accept(c.net); - } - - c.net.computeGradientAndScore(); - Pair gradAndScore = c.net.gradientAndScore(); - - ComputationGraphUpdater updater = new ComputationGraphUpdater(c.net); - updater.update(gradAndScore.getFirst(), 0, 0, c.net.batchSize(), LayerWorkspaceMgr.noWorkspaces()); - - INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done) - INDArray originalParams = c.net.getModelParams().dup(); //need dup: params are a *view* of full parameters - - val nParams = originalParams.length(); - - Map paramTable = c.net.getParamTable(); - List paramNames = new ArrayList<>(paramTable.keySet()); - val paramEnds = new long[paramNames.size()]; - paramEnds[0] = paramTable.get(paramNames.get(0)).length(); - for (int i = 1; i < paramEnds.length; i++) { - paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length(); - } - - if(c.excludeParams != null && !c.excludeParams.isEmpty()){ - log.info("NOTE: parameters will be skipped due to config: {}", c.excludeParams); - } - - int currParamNameIdx = 0; - int totalNFailures = 0; - double maxError = 0.0; - MultiDataSet mds = new MultiDataSet(c.inputs, c.labels, c.inputMask, c.labelMask); - INDArray params = c.net.getModelParams(); //Assumption here: params is a view that we can modify in-place - for (long i = 0; i < nParams; i++) { - //Get param name - if (i >= paramEnds[currParamNameIdx]) { - currParamNameIdx++; - } - String paramName = paramNames.get(currParamNameIdx); - if(c.excludeParams != null && c.excludeParams.contains(paramName)){ - //log.info("Skipping parameters for parameter name: {}", paramName); - i = paramEnds[currParamNameIdx++]; - continue; - } - - //(w+epsilon): Do forward pass and score - double origValue = params.getDouble(i); - - params.putScalar(i, origValue + c.epsilon); - if(c.callEachIter != null){ - c.callEachIter.accept(c.net); - } - double scorePlus = c.net.score(mds, true); //training == true for batch norm, etc (scores and gradients need to be calculated on same thing) - - //(w-epsilon): Do forward pass and score - params.putScalar(i, origValue - c.epsilon); - if(c.callEachIter != null){ - c.callEachIter.accept(c.net); - } - double scoreMinus = c.net.score(mds, true); - - //Reset original param value - params.putScalar(i, origValue); - - //Calculate numerical parameter gradient: - double scoreDelta = scorePlus - scoreMinus; - - double numericalGradient = scoreDelta / (2 * c.epsilon); - if (Double.isNaN(numericalGradient)) - throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams); - - double backpropGradient = gradientToCheck.getDouble(i); - //http://cs231n.github.io/neural-networks-3/#gradcheck - //use mean centered - double relError = Math.abs(backpropGradient - numericalGradient) - / (Math.abs(numericalGradient) + Math.abs(backpropGradient)); - if (backpropGradient == 0.0 && numericalGradient == 0.0) - relError = 0.0; //Edge case: i.e., RNNs with time series length of 1.0 - - if (relError > maxError) - maxError = relError; - if (relError > c.maxRelError || Double.isNaN(relError)) { - double absError = Math.abs(backpropGradient - numericalGradient); - if (absError < c.minAbsoluteError) { - if(c.print == PrintMode.ALL || c.print == PrintMode.ZEROS && absError == 0.0) { - log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient - + ", numericalGrad= " + numericalGradient + ", relError= " + relError - + "; absolute error = " + absError + " < minAbsoluteError = " + c.minAbsoluteError); - } - } else { - log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient - + ", numericalGrad= " + numericalGradient + ", relError= " + relError - + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus + ", paramValue = " + origValue); - if (c.exitOnFirstError) - return false; - totalNFailures++; - } - } else if (c.print == PrintMode.ALL) { - log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " - + numericalGradient + ", relError= " + relError); - } - } - - val nPass = nParams - totalNFailures; - log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " - + totalNFailures + " failed. Largest relative error = " + maxError); - - return totalNFailures == 0; + DataType dataType = DataTypeUtil.getDtypeFromContext(); + if (dataType != DataType.DOUBLE) { + throw new IllegalStateException( + "Cannot perform gradient check: Datatype is not set to double precision (" + + "is: " + + dataType + + "). Double precision must be used for gradient checks. Set " + + "DataTypeUtil.setDTypeForContext(DataType.DOUBLE); before using GradientCheckUtil"); } - - - /** - * Check backprop gradients for a pretrain layer - * - * NOTE: gradient checking pretrain layers can be difficult... - */ - public static boolean checkGradientsPretrainLayer(Layer layer, double epsilon, double maxRelError, - double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray input, int rngSeed) { - - LayerWorkspaceMgr mgr = LayerWorkspaceMgr.noWorkspaces(); - - //Basic sanity checks on input: - if (epsilon <= 0.0 || epsilon > 0.1) - throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so"); - if (maxRelError <= 0.0 || maxRelError > 0.25) - throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError); - - DataType dataType = DataTypeUtil.getDtypeFromContext(); - if (dataType != DataType.DOUBLE) { - throw new IllegalStateException("Cannot perform gradient check: Datatype is not set to double precision (" - + "is: " + dataType + "). Double precision must be used for gradient checks. Set " - + "DataTypeUtil.setDTypeForContext(DataType.DOUBLE); before using GradientCheckUtil"); - } - - //Check network configuration: - layer.setInput(input, LayerWorkspaceMgr.noWorkspaces()); - Nd4j.getRandom().setSeed(rngSeed); - layer.computeGradientAndScore(mgr); - Pair gradAndScore = layer.gradientAndScore(); - - Updater updater = UpdaterCreator.getUpdater(layer); - updater.update(layer, gradAndScore.getFirst(), 0, 0, layer.batchSize(), LayerWorkspaceMgr.noWorkspaces()); - - INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup(); //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done) - INDArray originalParams = layer.getParams().dup(); //need dup: params are a *view* of full parameters - - val nParams = originalParams.length(); - - Map paramTable = layer.getParamTable(); - List paramNames = new ArrayList<>(paramTable.keySet()); - val paramEnds = new long[paramNames.size()]; - paramEnds[0] = paramTable.get(paramNames.get(0)).length(); - for (int i = 1; i < paramEnds.length; i++) { - paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length(); - } - - - int totalNFailures = 0; - double maxError = 0.0; - int currParamNameIdx = 0; - - INDArray params = layer.getParams(); //Assumption here: params is a view that we can modify in-place - for (int i = 0; i < nParams; i++) { - //Get param name - if (i >= paramEnds[currParamNameIdx]) { - currParamNameIdx++; - } - String paramName = paramNames.get(currParamNameIdx); - - //(w+epsilon): Do forward pass and score - double origValue = params.getDouble(i); - params.putScalar(i, origValue + epsilon); - - //TODO add a 'score' method that doesn't calculate gradients... - Nd4j.getRandom().setSeed(rngSeed); - layer.computeGradientAndScore(mgr); - double scorePlus = layer.getScore(); - - //(w-epsilon): Do forward pass and score - params.putScalar(i, origValue - epsilon); - Nd4j.getRandom().setSeed(rngSeed); - layer.computeGradientAndScore(mgr); - double scoreMinus = layer.getScore(); - - //Reset original param value - params.putScalar(i, origValue); - - //Calculate numerical parameter gradient: - double scoreDelta = scorePlus - scoreMinus; - - double numericalGradient = scoreDelta / (2 * epsilon); - if (Double.isNaN(numericalGradient)) - throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams); - - double backpropGradient = gradientToCheck.getDouble(i); - //http://cs231n.github.io/neural-networks-3/#gradcheck - //use mean centered - double relError = Math.abs(backpropGradient - numericalGradient) - / (Math.abs(numericalGradient) + Math.abs(backpropGradient)); - if (backpropGradient == 0.0 && numericalGradient == 0.0) - relError = 0.0; //Edge case: i.e., RNNs with time series length of 1.0 - - if (relError > maxError) - maxError = relError; - if (relError > maxRelError || Double.isNaN(relError)) { - double absError = Math.abs(backpropGradient - numericalGradient); - if (absError < minAbsoluteError) { - log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient - + ", numericalGrad= " + numericalGradient + ", relError= " + relError - + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError); - } else { - if (print) - log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient - + ", numericalGrad= " + numericalGradient + ", relError= " + relError - + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus + ", paramValue = " + origValue); - if (exitOnFirstError) - return false; - totalNFailures++; - } - } else if (print) { - log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " - + numericalGradient + ", relError= " + relError); - } - } - - if (print) { - val nPass = nParams - totalNFailures; - log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " - + totalNFailures + " failed. Largest relative error = " + maxError); - } - - return totalNFailures == 0; + DataType netDataType = c.net.getComputationGraphConfiguration().getDataType(); + if (netDataType != DataType.DOUBLE) { + throw new IllegalStateException( + "Cannot perform gradient check: Network datatype is not set to double precision (" + + "is: " + + netDataType + + "). Double precision must be used for gradient checks. Create network with .dataType(DataType.DOUBLE) before using GradientCheckUtil"); } + + if (netDataType != c.net.getModelParams().dataType()) { + throw new IllegalStateException( + "Parameters datatype does not match network configuration datatype (" + + "is: " + + c.net.getModelParams().dataType() + + "). If network datatype is set to DOUBLE, parameters must also be DOUBLE."); + } + + // Check configuration + int layerCount = 0; + for (String vertexName : c.net.getComputationGraphConfiguration().getVertices().keySet()) { + GraphVertex gv = c.net.getComputationGraphConfiguration().getVertices().get(vertexName); + if (!(gv instanceof LayerVertex)) continue; + LayerVertex lv = (LayerVertex) gv; + + if (lv.getLayerConfiguration() instanceof BaseLayerConfiguration) { + BaseLayerConfiguration bl = (BaseLayerConfiguration) lv.getLayerConfiguration(); + IUpdater u = bl.getIUpdater(); + if (u instanceof Sgd) { + // Must have LR of 1.0 + double lr = ((Sgd) u).getLearningRate(); + if (lr != 1.0) { + throw new IllegalStateException( + "When using SGD updater, must also use lr=1.0 for layer " + + layerCount + + "; got " + + u + + " with lr=" + + lr + + " for layer \"" + + lv.getLayerConfiguration().getName() + + "\""); + } + } else if (!(u instanceof NoOp)) { + throw new IllegalStateException( + "Must have Updater.NONE (or SGD + lr=1.0) for layer " + layerCount + "; got " + u); + } + + IActivation activation = bl.getActivationFn(); + if (activation != null) { + if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) { + log.warn( + "LayerConfiguration \"" + + vertexName + + "\" is possibly using an unsuitable activation function: " + + activation.getClass() + + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)"); + } + } + } + + if (lv.getLayerConfiguration().getDropOut() != null && c.callEachIter == null) { + throw new IllegalStateException( + "When gradient checking dropout, rng seed must be reset each iteration, or no" + + " dropout should be present during gradient checks - got dropout = " + + lv.getLayerConfiguration().getDropOut() + + " for layer " + + layerCount); + } + } + + // Set softmax clipping to 0 if necessary, to avoid spurious failures due to clipping + for (Layer l : c.net.getLayers()) { + if (l instanceof IOutputLayer) { + configureLossFnClippingIfPresent((IOutputLayer) l); + } + } + + for (int i = 0; i < c.inputs.length; i++) c.net.setInput(i, c.inputs[i]); + for (int i = 0; i < c.labels.length; i++) c.net.setLabel(i, c.labels[i]); + + c.net.setLayerMaskArrays(c.inputMask, c.labelMask); + + if (c.callEachIter != null) { + c.callEachIter.accept(c.net); + } + + c.net.computeGradientAndScore(); + Pair gradAndScore = c.net.gradientAndScore(); + + ComputationGraphUpdater updater = new ComputationGraphUpdater(c.net); + updater.update( + gradAndScore.getFirst(), 0, 0, c.net.batchSize(), LayerWorkspaceMgr.noWorkspaces()); + + INDArray gradientToCheck = + gradAndScore + .getFirst() + .gradient() + .dup(); // need dup: gradients are a *view* of the full gradient array (which will + // change every time backprop is done) + INDArray originalParams = + c.net.getModelParams().dup(); // need dup: params are a *view* of full parameters + + val nParams = originalParams.length(); + + Map paramTable = c.net.getParamTable(); + List paramNames = new ArrayList<>(paramTable.keySet()); + val paramEnds = new long[paramNames.size()]; + paramEnds[0] = paramTable.get(paramNames.get(0)).length(); + for (int i = 1; i < paramEnds.length; i++) { + paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length(); + } + + if (c.excludeParams != null && !c.excludeParams.isEmpty()) { + log.info("NOTE: parameters will be skipped due to config: {}", c.excludeParams); + } + + int currParamNameIdx = 0; + int totalNFailures = 0; + double maxError = 0.0; + MultiDataSet mds = new MultiDataSet(c.inputs, c.labels, c.inputMask, c.labelMask); + INDArray params = + c.net.getModelParams(); // Assumption here: params is a view that we can modify in-place + for (long i = 0; i < nParams; i++) { + // Get param name + if (i >= paramEnds[currParamNameIdx]) { + currParamNameIdx++; + } + String paramName = paramNames.get(currParamNameIdx); + if (c.excludeParams != null && c.excludeParams.contains(paramName)) { + // log.info("Skipping parameters for parameter name: {}", paramName); + i = paramEnds[currParamNameIdx++]; + continue; + } + + // (w+epsilon): Do forward pass and score + double origValue = params.getDouble(i); + + params.putScalar(i, origValue + c.epsilon); + if (c.callEachIter != null) { + c.callEachIter.accept(c.net); + } + double scorePlus = + c.net.score( + mds, + true); // training == true for batch norm, etc (scores and gradients need to be + // calculated on same thing) + + // (w-epsilon): Do forward pass and score + params.putScalar(i, origValue - c.epsilon); + if (c.callEachIter != null) { + c.callEachIter.accept(c.net); + } + double scoreMinus = c.net.score(mds, true); + + // Reset original param value + params.putScalar(i, origValue); + + // Calculate numerical parameter gradient: + double scoreDelta = scorePlus - scoreMinus; + + double numericalGradient = scoreDelta / (2 * c.epsilon); + if (Double.isNaN(numericalGradient)) + throw new IllegalStateException( + "Numerical gradient was NaN for parameter " + i + " of " + nParams); + + double backpropGradient = gradientToCheck.getDouble(i); + // http://cs231n.github.io/neural-networks-3/#gradcheck + // use mean centered + double relError = + Math.abs(backpropGradient - numericalGradient) + / (Math.abs(numericalGradient) + Math.abs(backpropGradient)); + if (backpropGradient == 0.0 && numericalGradient == 0.0) + relError = 0.0; // Edge case: i.e., RNNs with time series length of 1.0 + + if (relError > maxError) maxError = relError; + if (relError > c.maxRelError || Double.isNaN(relError)) { + double absError = Math.abs(backpropGradient - numericalGradient); + if (absError < c.minAbsoluteError) { + if (c.print == PrintMode.ALL || c.print == PrintMode.ZEROS && absError == 0.0) { + log.info( + "Param " + + i + + " (" + + paramName + + ") passed: grad= " + + backpropGradient + + ", numericalGrad= " + + numericalGradient + + ", relError= " + + relError + + "; absolute error = " + + absError + + " < minAbsoluteError = " + + c.minAbsoluteError); + } + } else { + log.info( + "Param " + + i + + " (" + + paramName + + ") FAILED: grad= " + + backpropGradient + + ", numericalGrad= " + + numericalGradient + + ", relError= " + + relError + + ", scorePlus=" + + scorePlus + + ", scoreMinus= " + + scoreMinus + + ", paramValue = " + + origValue); + if (c.exitOnFirstError) return false; + totalNFailures++; + } + } else if (c.print == PrintMode.ALL) { + log.info( + "Param " + + i + + " (" + + paramName + + ") passed: grad= " + + backpropGradient + + ", numericalGrad= " + + numericalGradient + + ", relError= " + + relError); + } + } + + val nPass = nParams - totalNFailures; + log.info( + "GradientCheckUtil.checkGradients(): " + + nParams + + " params checked, " + + nPass + + " passed, " + + totalNFailures + + " failed. Largest relative error = " + + maxError); + + return totalNFailures == 0; + } + + /** + * Check backprop gradients for a pretrain layer + * + *

NOTE: gradient checking pretrain layers can be difficult... + */ + public static boolean checkGradientsPretrainLayer( + Layer layer, + double epsilon, + double maxRelError, + double minAbsoluteError, + boolean print, + boolean exitOnFirstError, + INDArray input, + int rngSeed) { + + LayerWorkspaceMgr mgr = LayerWorkspaceMgr.noWorkspaces(); + + // Basic sanity checks on input: + if (epsilon <= 0.0 || epsilon > 0.1) + throw new IllegalArgumentException( + "Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so"); + if (maxRelError <= 0.0 || maxRelError > 0.25) + throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError); + + DataType dataType = DataTypeUtil.getDtypeFromContext(); + if (dataType != DataType.DOUBLE) { + throw new IllegalStateException( + "Cannot perform gradient check: Datatype is not set to double precision (" + + "is: " + + dataType + + "). Double precision must be used for gradient checks. Set " + + "DataTypeUtil.setDTypeForContext(DataType.DOUBLE); before using GradientCheckUtil"); + } + + // Check network configuration: + layer.setInput(input, LayerWorkspaceMgr.noWorkspaces()); + Nd4j.getRandom().setSeed(rngSeed); + layer.computeGradientAndScore(mgr); + Pair gradAndScore = layer.gradientAndScore(); + + Updater updater = UpdaterCreator.getUpdater(layer); + updater.update( + layer, gradAndScore.getFirst(), 0, 0, layer.batchSize(), LayerWorkspaceMgr.noWorkspaces()); + + INDArray gradientToCheck = + gradAndScore + .getFirst() + .gradient() + .dup(); // need dup: gradients are a *view* of the full gradient array (which will + // change every time backprop is done) + INDArray originalParams = + layer.getParams().dup(); // need dup: params are a *view* of full parameters + + val nParams = originalParams.length(); + + Map paramTable = layer.getParamTable(); + List paramNames = new ArrayList<>(paramTable.keySet()); + val paramEnds = new long[paramNames.size()]; + paramEnds[0] = paramTable.get(paramNames.get(0)).length(); + for (int i = 1; i < paramEnds.length; i++) { + paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length(); + } + + int totalNFailures = 0; + double maxError = 0.0; + int currParamNameIdx = 0; + + INDArray params = + layer.getParams(); // Assumption here: params is a view that we can modify in-place + for (int i = 0; i < nParams; i++) { + // Get param name + if (i >= paramEnds[currParamNameIdx]) { + currParamNameIdx++; + } + String paramName = paramNames.get(currParamNameIdx); + + // (w+epsilon): Do forward pass and score + double origValue = params.getDouble(i); + params.putScalar(i, origValue + epsilon); + + // TODO add a 'score' method that doesn't calculate gradients... + Nd4j.getRandom().setSeed(rngSeed); + layer.computeGradientAndScore(mgr); + double scorePlus = layer.getScore(); + + // (w-epsilon): Do forward pass and score + params.putScalar(i, origValue - epsilon); + Nd4j.getRandom().setSeed(rngSeed); + layer.computeGradientAndScore(mgr); + double scoreMinus = layer.getScore(); + + // Reset original param value + params.putScalar(i, origValue); + + // Calculate numerical parameter gradient: + double scoreDelta = scorePlus - scoreMinus; + + double numericalGradient = scoreDelta / (2 * epsilon); + if (Double.isNaN(numericalGradient)) + throw new IllegalStateException( + "Numerical gradient was NaN for parameter " + i + " of " + nParams); + + double backpropGradient = gradientToCheck.getDouble(i); + // http://cs231n.github.io/neural-networks-3/#gradcheck + // use mean centered + double relError = + Math.abs(backpropGradient - numericalGradient) + / (Math.abs(numericalGradient) + Math.abs(backpropGradient)); + if (backpropGradient == 0.0 && numericalGradient == 0.0) + relError = 0.0; // Edge case: i.e., RNNs with time series length of 1.0 + + if (relError > maxError) maxError = relError; + if (relError > maxRelError || Double.isNaN(relError)) { + double absError = Math.abs(backpropGradient - numericalGradient); + if (absError < minAbsoluteError) { + log.info( + "Param " + + i + + " (" + + paramName + + ") passed: grad= " + + backpropGradient + + ", numericalGrad= " + + numericalGradient + + ", relError= " + + relError + + "; absolute error = " + + absError + + " < minAbsoluteError = " + + minAbsoluteError); + } else { + if (print) + log.info( + "Param " + + i + + " (" + + paramName + + ") FAILED: grad= " + + backpropGradient + + ", numericalGrad= " + + numericalGradient + + ", relError= " + + relError + + ", scorePlus=" + + scorePlus + + ", scoreMinus= " + + scoreMinus + + ", paramValue = " + + origValue); + if (exitOnFirstError) return false; + totalNFailures++; + } + } else if (print) { + log.info( + "Param " + + i + + " (" + + paramName + + ") passed: grad= " + + backpropGradient + + ", numericalGrad= " + + numericalGradient + + ", relError= " + + relError); + } + } + + if (print) { + val nPass = nParams - totalNFailures; + log.info( + "GradientCheckUtil.checkGradients(): " + + nParams + + " params checked, " + + nPass + + " passed, " + + totalNFailures + + " failed. Largest relative error = " + + maxError); + } + + return totalNFailures == 0; + } + + public enum PrintMode { + ALL, + ZEROS, + FAILURES_ONLY + } + + @Accessors(fluent = true) + @Data + @NoArgsConstructor + public static class MLNConfig { + private MultiLayerNetwork net; + private INDArray input; + private INDArray labels; + private INDArray inputMask; + private INDArray labelMask; + private double epsilon = 1e-6; + private double maxRelError = 1e-3; + private double minAbsoluteError = 1e-8; + private PrintMode print = PrintMode.ZEROS; + private boolean exitOnFirstError = false; + private boolean subset; + private int maxPerParam; + private Set excludeParams; + private Consumer callEachIter; + } + + @Accessors(fluent = true) + @Data + @NoArgsConstructor + public static class GraphConfig { + private ComputationGraph net; + private INDArray[] inputs; + private INDArray[] labels; + private INDArray[] inputMask; + private INDArray[] labelMask; + private double epsilon = 1e-6; + private double maxRelError = 1e-3; + private double minAbsoluteError = 1e-8; + private PrintMode print = PrintMode.ZEROS; + private boolean exitOnFirstError = false; + private boolean subset; + private int maxPerParam; + private Set excludeParams; + private Consumer callEachIter; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java index 40a3170b4..b44ee12d0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java @@ -32,7 +32,7 @@ public interface ITraininableLayerConfiguration { /** * @return Name of the layer */ - String getLayerName(); + String getName(); /** * Get the regularization types (l1/l2/weight decay) for the given parameter. Different parameters may have different diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java index dac126dd7..784d768ff 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java @@ -211,7 +211,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { if (layer instanceof BaseLayerConfiguration && ((BaseLayerConfiguration) layer).getActivationFn() == null) { - String layerName = layer.getLayerName(); + String layerName = layer.getName(); try { if (vertices == null) { @@ -236,7 +236,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { if (activationFunction != null) { IActivation ia = Activation.fromString(activationFunction.asText()).getActivationFunction(); - ((BaseLayerConfiguration) layer).setActivationFn(ia); + ((BaseLayerConfiguration) layer).setActivation(ia); } } catch (IOException e) { @@ -260,7 +260,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { private static void handleLegacyWeightInitFromJson(String json, LayerConfiguration layer, ObjectMapper mapper, JsonNode vertices) { if (layer instanceof BaseLayerConfiguration && ((BaseLayerConfiguration) layer).getWeightInit() == null) { - String layerName = layer.getLayerName(); + String layerName = layer.getName(); try { if (vertices == null) { @@ -819,6 +819,9 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { public GraphBuilder addLayer(String layerName, LayerConfiguration layer, String... layerInputs) { return addLayer(layerName, layer, null, layerInputs); } + public GraphBuilder addLayer(String layerName, LayerConfiguration.LayerConfigurationBuilder layer, String... layerInputs) { + return addLayer(layerName, layer.build(), null, layerInputs); + } /** * Add a layer, with no {@link InputPreProcessor}, with the specified name @@ -872,7 +875,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { NeuralNetConfiguration conf = globalConfiguration.clone(); conf.getLayerConfigurations().add(layer); addVertex(layerName, new LayerVertex(conf, preProcessor), layerInputs); - layer.setLayerName(layerName); + layer.setName(layerName); return this; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java index 4f9a9bb1f..a9afe9332 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java @@ -20,22 +20,12 @@ package org.deeplearning4j.nn.conf; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.NonNull; -import lombok.Setter; +import lombok.*; import lombok.experimental.SuperBuilder; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.INeuralNetworkConfiguration; @@ -59,7 +49,6 @@ import org.deeplearning4j.util.NetworkUtils; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; -import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.Sgd; @@ -68,6 +57,9 @@ import org.nd4j.linalg.learning.regularization.L2Regularization; import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.WeightDecay; +import java.io.IOException; +import java.util.*; + /** * Deeplearning4j is a domain-specific language to configure deep neural networks, which are made of * multiple layers. Everything starts with a NeuralNetConfiguration, which organizes those layers @@ -88,17 +80,16 @@ import org.nd4j.linalg.learning.regularization.WeightDecay; * layers (the zero-indexed layer below is the input layer), the number of input and output nodes, * nIn and nOut, as well as the type: DenseLayer.
*
- * .layer(0, new DenseLayer.Builder().nIn(784).nOut(250)
+ * .layer(0, DenseLayer.builder().nIn(784).nOut(250)
* .build())
*
* Once you've configured your net, you train the model with model.fit. */ @Data @Slf4j -@EqualsAndHashCode(exclude = {"iterationCount", "epochCount"}) -@JsonIgnoreProperties(ignoreUnknown = true) // The inner builder, that we can then extend ... @SuperBuilder // TODO fix access +@EqualsAndHashCode public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetworkConfiguration { private static final int DEFAULT_TBPTT_LENGTH = 20; @@ -114,7 +105,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * * @param constraints Constraints to apply to all weight parameters of all layers */ - @lombok.Builder.Default protected final List contrainWeights = new ArrayList<>(); + @lombok.Builder.Default protected final List constrainWeights = new ArrayList<>(); /** * Set constraints to be applied to all layers. Default: no constraints.
@@ -140,16 +131,20 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param constraints Constraints to apply to all parameters of all layers */ @lombok.Builder.Default - protected final List allParamContraints = new ArrayList<>(); + protected final List allParamConstraints = new ArrayList<>(); /** * This is a basic concept, a neural network is made of layers, but also can use another neural * network as a building block. When the configuration is initialized, those building blocks will * be flattened into a single list of layers. Internal ordered list of layers and inner neural * networks. If the object is a NeuralNetConfiguration, each configuration must contain at least * one layer. + * -- SETTER -- + * Internal deserialisation use cases only. Do not use directly + * */ - @Getter @lombok.Builder.Default - protected final List innerConfigurations = new ArrayList<>(); + @Getter @Setter @lombok.Builder.Default + @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.WRAPPER_OBJECT) + protected List innerConfigurations = new ArrayList<>(); @Getter @Setter @NonNull @lombok.Builder.Default @Deprecated protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; @@ -188,17 +183,9 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param backwardLength <= forwardLength */ @Getter @Setter @lombok.Builder.Default protected int tbpttBackLength = 20; - // Counter for the number of parameter updates so far - // This is important for learning rate schedules, for example, and is stored here to ensure it is - // persisted - // for Spark and model serialization - @Getter @Setter @lombok.Builder.Default protected int iterationCount = 0; - // Counter for the number of epochs completed so far. Used for per-epoch schedules - @Getter @Setter @lombok.Builder.Default protected int epochCount = 0; + @lombok.Builder.Default protected double dampingFactor = 100; - // gradient keys used for ensuring order when getting and setting the gradient - // @lombok.Builder.Default - // protected List variables = new ArrayList<>(); + @Getter @Setter @lombok.Builder.Default private boolean miniBatch = false; /** A seed for this network, will be random if not specified. */ @Getter @Setter @lombok.Builder.Default private long seed = new Random().nextLong(); @@ -221,7 +208,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * The name for this configuration. Defaults to "Anonymous INeuralNetworkConfiguration" if it is * not specified. */ - @lombok.Builder.Default @Getter private String name = "Anonymous INeuralNetworkConfiguration"; + @lombok.Builder.Default @Getter @Setter private String name = "Anonymous INeuralNetworkConfiguration"; /** The {@link InputType} of the data for this network configuration */ @Getter @Setter private InputType inputType; /** @@ -299,6 +286,8 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * method are used as the default value, and can be overridden on a per-layer basis. */ @Getter @Setter private IActivation activation; + + // whether to constrain the gradient to unit norm or not @Getter @Setter private StepFunction stepFunction; @@ -317,7 +306,8 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param regularization Regularization to apply for the network parameters/weights (excluding * biases) */ - @Getter @lombok.Builder.Default private List regularization = new ArrayList<>(); + @Getter @Setter @lombok.Builder.Default + private List regularization = new ArrayList<>(); /** * Set the regularization for the biases only - for example {@link WeightDecay}
* Note: values set by this method will be applied to all applicable layers in the network, unless @@ -326,10 +316,10 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * * @param regularizationBias Regularization to apply for the network biases only */ - @Getter @lombok.Builder.Default + @Getter @Setter @lombok.Builder.Default private List regularizationBias = new ArrayList<>(); - @Getter @Setter @lombok.Builder.Default private IUpdater iUpdater = new Sgd(); + @Getter @Setter @lombok.Builder.Default @JsonIgnore private IUpdater iUpdater = new Sgd(); /** * Gradient updater configuration, for the biases only. If not set, biases will use the updater as * set by {@link #setIUpdater(IUpdater)}
@@ -341,8 +331,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor */ @Getter @Setter @lombok.Builder.Default private IUpdater biasUpdater = null; - @Getter @Setter @lombok.Builder.Default - private IActivation activationFn = new ActivationSigmoid(); + /** * Weight initialization scheme to use, for initial weight values Note: values set by this method * will be applied to all applicable layers in the network, unless a different value is explicitly @@ -473,7 +462,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor } catch (IOException e) { log.warn( - "ILayer with null WeightInit detected: " + l.getLayerName() + ", could not parse JSON", + "ILayer with null WeightInit detected: " + l.getName() + ", could not parse JSON", e); } } @@ -546,7 +535,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor clone.stepFunction = clone.stepFunction.clone(); } /** if (clone.variables != null) { clone.variables = new ArrayList<>(clone.variables); } */ - clone.getInnerConfigurations().addAll(innerConfigurations); + clone.setInnerConfigurations(new ArrayList<>(getInnerConfigurations())); if (clone.getInputPreProcessors() != null) { Map map = new HashMap<>(); @@ -566,12 +555,20 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor return clone; } - public abstract static class NeuralNetBaseBuilderConfigurationBuilder< + public static abstract class NeuralNetBaseBuilderConfigurationBuilder< C extends NeuralNetBaseBuilderConfiguration, B extends NeuralNetBaseBuilderConfiguration.NeuralNetBaseBuilderConfigurationBuilder> { List innerConfigurations$value = new ArrayList<>(); // initialize with an empty list + public B activation(Activation activation) { + this.activation = activation; + return self(); + } + public B activation(IActivation activation) { + this.activation = activation; + return self(); + } /** * Set constraints to be applied to all layers. Default: no constraints.
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm @@ -584,8 +581,8 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param constraints Constraints to apply to all weight parameters of all layers */ public B constrainWeights(LayerConstraint... constraints) { - contrainWeights$value = Arrays.asList(constraints); - contrainWeights$set = true; + constrainWeights$value = Arrays.asList(constraints); + constrainWeights$set = true; return (B) this; } @@ -619,8 +616,8 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param constraints Constraints to apply to all parameters of all layers */ public B constrainAllParameters(LayerConstraint... constraints) { - allParamContraints$value = Arrays.asList(constraints); - allParamContraints$set = true; + allParamConstraints$value = Arrays.asList(constraints); + allParamConstraints$set = true; return (B) this; } @@ -664,7 +661,17 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor public B layer(Integer index, @NonNull LayerConfiguration layer) { innerConfigurations$value.add(index, layer); innerConfigurations$set = true; - return (B) this; + return self(); + } + /** + * Set layer at index + * + * @param index where to insert + * @param layer the layer + * @return builder + */ + public B layer(Integer index, @NonNull LayerConfiguration.LayerConfigurationBuilder layer) { + return this.layer(index, layer.build()); } /** @@ -678,6 +685,9 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor innerConfigurations$set = true; return (B) this; } + public B layer(@NonNull LayerConfiguration.LayerConfigurationBuilder layer) { + return this.layer(layer.build()); + } // TODO this is a dirty workaround public boolean isOverrideNinUponBuild() { @@ -729,6 +739,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @see #weightDecay(double, boolean) */ public B l2(double l2) { + if(regularization$value == null) regularization$value = new ArrayList<>(); // Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make // sense to use both NetworkUtils.removeInstances(regularization$value, L2Regularization.class); @@ -862,16 +873,6 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor return (B) this; } - /** - * Activation function / neuron non-linearity
- * Note: values set by this method will be applied to all applicable layers in the network, - * unless a different value is explicitly set on a given layer. In other words: values set via - * this method are used as the default value, and can be overridden on a per-layer basis. - */ - @Deprecated - public B activation(@NonNull Activation activation) { - return (B) activationFn(activation.getActivationFunction()); - } /** * legacy code, does nothing @@ -892,19 +893,20 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * * @param distribution Distribution to use for weight initialization */ - public B weightInit(@NonNull Distribution distribution) { + @JsonIgnore + public B weightInit(Distribution distribution) { this.weightInit$value = new WeightInitDistribution(distribution); this.weightInit$set = true; return (B) this; } - - public B weightInit(@NonNull WeightInit weightInit) { + @JsonIgnore + public B weightInit(WeightInit weightInit) { this.weightInit$value = weightInit.getWeightInitFunction(); this.weightInit$set = true; return (B) this; } - public B weightInit(@NonNull IWeightInit iWeightInit) { + public B weightInit(IWeightInit iWeightInit) { this.weightInit$value = iWeightInit; this.weightInit$set = true; return (B) this; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index fe946e022..e6042e3d6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -20,70 +20,42 @@ package org.deeplearning4j.nn.conf; -import com.fasterxml.jackson.annotation.JsonIdentityInfo; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.ObjectIdGenerators; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.exc.InvalidTypeIdException; import com.fasterxml.jackson.databind.node.ArrayNode; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - import lombok.*; import lombok.experimental.SuperBuilder; import lombok.extern.jackson.Jacksonized; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; -import org.deeplearning4j.nn.api.OptimizationAlgorithm; -import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.distribution.Distribution; -import org.deeplearning4j.nn.conf.dropout.Dropout; -import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; -import org.deeplearning4j.nn.conf.layers.BaseOutputLayer; -import org.deeplearning4j.nn.conf.layers.BaseRecurrentLayer; -import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; -import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; -import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.EmbeddingLayer; -import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; -import org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer; -import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.conf.memory.NetworkMemoryReport; import org.deeplearning4j.nn.conf.serde.JsonMappers; -import org.deeplearning4j.nn.conf.stepfunctions.DefaultStepFunction; -import org.deeplearning4j.nn.conf.stepfunctions.StepFunction; -import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.util.OutputLayerUtil; import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.IActivation; -import org.nd4j.linalg.activations.impl.ActivationSigmoid; -import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.Sgd; -import org.nd4j.linalg.learning.regularization.Regularization; -import org.nd4j.linalg.learning.regularization.WeightDecay; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT; import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; import org.nd4j.linalg.lossfunctions.impl.LossMSE; import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; +import java.io.IOException; +import java.util.*; +import java.util.stream.Collectors; + /** * Deeplearning4j is a domain-specific language to configure deep neural networks, which are made of * multiple layers. Everything starts with a NeuralNetConfiguration, which organizes those layers @@ -105,52 +77,30 @@ import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; * layers (the zero-indexed layer below is the input layer), the number of input and output nodes, * nIn and nOut, as well as the type: DenseLayer.

* - * .layer(0, new DenseLayer.Builder().nIn(784).nOut(250)
+ * .layer(0, DenseLayer.builder().nIn(784).nOut(250)
* .build())

* * Once you've configured your net, you train the * model with model.fit. */ + @Data @Slf4j -@EqualsAndHashCode(exclude = {"iterationCount", "epochCount"}) @Jacksonized -@JsonIgnoreProperties(ignoreUnknown = true) -@JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") +@JsonIgnoreProperties(value={"net"}, ignoreUnknown = true) +@EqualsAndHashCode(exclude = {"net"}, callSuper = true) +//@JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") //The inner builder, that we can then extend ... @SuperBuilder //TODO fix access public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { + private IModel net; private static final int DEFAULT_TBPTT_LENGTH = 20; private boolean initCalled = false; - /** - * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param constraints Constraints to apply to all bias parameters of all layers - */ - @lombok.Builder.Default - protected final List biasConstraints = new ArrayList<>(); - /** - * Set constraints to be applied to all layers. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param constraints Constraints to apply to all parameters of all layers - */ - @lombok.Builder.Default - protected final List allParamContraints = new ArrayList<>(); @Getter @Setter @@ -164,55 +114,8 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { @lombok.Builder.Default @Deprecated protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; - /** - * The type of backprop. Default setting is used for most networks (MLP, CNN etc), but optionally - * truncated BPTT can be used for training recurrent neural networks. If using TruncatedBPTT make - * sure you set both tBPTTForwardLength() and tBPTTBackwardLength() - */ - @Getter - @Setter - @NonNull - @lombok.Builder.Default - protected BackpropType backpropType = BackpropType.Standard; - /** - * When doing truncated BPTT: how many steps of forward pass should we do before doing (truncated) - * backprop?
Only applicable when doing backpropType(BackpropType.TruncatedBPTT)
Typically - * tBPTTForwardLength parameter is same as the tBPTTBackwardLength parameter, but may be larger - * than it in some circumstances (but never smaller)
Ideally your training data time series - * length should be divisible by this This is the k1 parameter on pg23 of http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf - * - * @param forwardLength Forward length > 0, >= backwardLength - */ - @Getter - @Setter - @lombok.Builder.Default - protected int tbpttFwdLength = 20; - /** - * When doing truncated BPTT: how many steps of backward should we do?
Only applicable when - * doing backpropType(BackpropType.TruncatedBPTT)
This is the k2 parameter on pg23 of http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf - * - * @param backwardLength <= forwardLength - */ - @Getter - @Setter - @lombok.Builder.Default - protected int tbpttBackLength = 20; - /** - * Creates and returns a copy of this object. - * - * @return a clone of this instance. - * @throws CloneNotSupportedException if the object's class does not support the {@code Cloneable} - * interface. Subclasses that override the {@code clone} method can also throw this exception to - * indicate that an instance cannot be cloned. - * @see Cloneable - */ - //Nd4j.getRandom().setSeed(getConf(0).getSeed()); //TODO - //Counter for the number of parameter updates so far - // This is important for learning rate schedules, for example, and is stored here to ensure it is persisted - // for Spark and model serialization + @Getter @Setter @lombok.Builder.Default @@ -224,95 +127,9 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { protected int epochCount = 0; @lombok.Builder.Default protected double dampingFactor = 100; - //gradient keys used for ensuring order when getting and setting the gradient - @lombok.Builder.Default - protected List netWideVariables = new ArrayList<>(); - @Getter - @Setter - @lombok.Builder.Default - private boolean miniBatch = false; - /** - * A seed for this network, will be random if not specified. + // gradient keys used for ensuring order when getting and setting the gradient + @lombok.Builder.Default private LinkedHashSet netWideVariables = new LinkedHashSet<>(); - @Getter - @Setter - @lombok.Builder.Default - private long seed = new Random().nextLong(); */ - /** - * The default {@link CacheMode} for this configuration. Will be set to "NONE" if not specified - * otherwise. This method defines how/if preOutput cache is handled: NONE: cache disabled (default - * value) HOST: Host memory will be used DEVICE: GPU memory will be used (on CPU backends effect - * will be the same as for HOST) - *

- * Valid values are
CacheMode.NONE,
CacheMode.HOST or
CacheMode.DEVICE
- * - * @param cacheMode - */ - @NonNull - @Getter - @Setter - @lombok.Builder.Default - private CacheMode cacheMode = CacheMode.NONE; - /** - * The list of layer configurations in this configuration. They will be indexed automatically as - * the layers get added starting with index 0. - */ - - @lombok.Builder.Default - @Getter - private String name = "Anonymous INeuralNetworkConfiguration"; - /** - * The {@link InputType} of the data for this network configuration - */ - @Getter - @Setter - private InputType inputType; - /** - * Set the DataType for the network parameters and activations for all layers in the network. - * Default: Float - * - * @param dataType Datatype to use for parameters and activations - */ - @Getter - @Setter - @lombok.Builder.Default - @NonNull - private DataType dataType = DataType.FLOAT; - /** - * Whether to override the nIn configuration forcibly upon construction. Default value is true. - * - * @return builder pattern - */ - @Getter - @Setter - @lombok.Builder.Default - private boolean overrideNinUponBuild = true; - /** - * Enabled by default. If enabled, the output layer configuration will be validated, to throw an - * exception on likely invalid outputs - such as softmax + nOut=1, or LossMCXENT + Tanh.
If - * disabled (false) no output layer validation will be performed.
Disabling this validation is - * not recommended, as the configurations that fail validation usually will not be able to learn - * correctly. However, the option to disable this validation is provided for advanced users when - * creating non-standard architectures. - * - * @param validate If true: validate output layer configuration. False: don't validate - */ - @Getter - @Setter - @lombok.Builder.Default - private boolean validateOutputLayerConfig = true; - /** - * Enabled by default. If enabled, an exception will be throw when using the (invalid) combination - * of truncated backpropagation through time (TBPTT) with either a GlobalPoolingLayer or - * LastTimeStepLayer.
It is possible to disable this validation to allow what is almost - * certainly an invalid configuration to be used, however this is not recommended. - * - * @param validate Whether TBPTT validation should be performed - */ - @Getter - @Setter - @lombok.Builder.Default - private boolean validateTbpttConfig = true; /** * Gradient updater configuration. For example, {@link org.nd4j.linalg.learning.config.Adam} or * {@link org.nd4j.linalg.learning.config.Nesterovs}
@@ -323,98 +140,8 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { * @param updater Updater to use */ @Getter @Setter @Builder.Default private IUpdater updater = new Sgd(); - /** - * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping - * etc. See {@link GradientNormalization} for details
Note: values set by this method will be - * applied to all applicable layers in the network, unless a different value is explicitly set on - * a given layer. In other words: values set via this method are used as the default value, and - * can be overridden on a per-layer basis. - * - * @param gradientNormalization Type of normalization to use. Defaults to None. - * @see GradientNormalization - */ - @Getter - @Setter - @NonNull - @lombok.Builder.Default - private GradientNormalization gradientNormalization = GradientNormalization.None; - /** - * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, - * GradientNormalization.ClipL2PerParamType, and - * GradientNormalization.ClipElementWiseAbsoluteValue
Not used otherwise.
L2 threshold for - * first two types of clipping, or absolute value threshold for last type of clipping.
Note: - * values set by this method will be applied to all applicable layers in the network, unless a - * different value is explicitly set on a given layer. In other words: values set via this method - * are used as the default value, and can be overridden on a per-layer basis. - */ - @Getter - @Setter - private double gradientNormalizationThreshold; - // whether to constrain the gradient to unit norm or not - @Getter @Setter @Builder.Default private StepFunction stepFunction = new DefaultStepFunction(); - @Getter - @Setter - @lombok.Builder.Default - private OptimizationAlgorithm optimizationAlgo = OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT; - @Getter - @Setter - @lombok.Builder.Default - private int maxNumLineSearchIterations = 5; - /** - * Set the regularization for the parameters (excluding biases) - for example {@link WeightDecay}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
- * - * @param regularization Regularization to apply for the network parameters/weights (excluding biases) - */ - @Getter - @lombok.Builder.Default - private List regularization = new ArrayList<>(); - /** - * Set the regularization for the biases only - for example {@link WeightDecay}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis.
- * - * @param regularizationBias Regularization to apply for the network biases only - */ - @Getter - @lombok.Builder.Default - private List regularizationBias = new ArrayList<>(); - - /** - * Gradient updater configuration, for the biases only. If not set, biases will use the updater as - * set by {@link #setUpdater(IUpdater)}
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param updater Updater to use for bias parameters - */ - @Getter - @Setter - @lombok.Builder.Default - private IUpdater biasUpdater = null; - @Getter - @Setter - @lombok.Builder.Default - private IActivation activation = new ActivationSigmoid(); - - /** - * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. - * See {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * @param convolutionMode Convolution mode to use - */ - @Getter - @Setter - @lombok.Builder.Default - private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; /** * Sets the cuDNN algo mode for convolutional layers, which impacts performance and memory usage of cuDNN. * See {@link ConvolutionLayer.AlgoMode} for details. Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. @@ -428,67 +155,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { @Setter @lombok.Builder.Default private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST; - @Getter - @Setter - @lombok.Builder.Default - private boolean minimize = true; - /** - * Set the dropout for all layers in this network
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * * Dropout probability. This is the probability of retaining each input activation value for a layer. - * * dropOut(x) will keep an input activation with probability x, and set to 0 with probability 1-x.
- * * dropOut(0.0) is a special value / special case - when set to 0.0., dropout is disabled (not applied). Note - * * that a dropout value of 1.0 is functionally equivalent to no dropout: i.e., 100% probability of retaining - * * each input activation.
- * *

- * * Note 1: Dropout is applied at training time only - and is automatically not applied at test time - * * (for evaluation, etc)
- * * Note 2: This sets the probability per-layer. Care should be taken when setting lower values for - * * complex networks (too much information may be lost with aggressive (very low) dropout values).
- * * Note 3: Frequently, dropout is not applied to (or, has higher retain probability for) input (first layer) - * * layers. Dropout is also often not applied to output layers. This needs to be handled MANUALLY by the user - * * - set .dropout(0) on those layers when using global dropout setting.
- * * Note 4: Implementation detail (most users can ignore): DL4J uses inverted dropout, as described here: - * * http://cs231n.github.io/neural-networks-2/ - * *

- * *
- * * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * * value, and can be overridden on a per-layer basis. - * * - * * @param inputRetainProbability Dropout probability (probability of retaining each input activation value for a layer) - * * @see #dropOut(IDropout) - * - * - * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, - * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc - * @return - */ - @Getter - @Setter - private IDropout idropOut; - /** - * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and - * {@link org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for the layers in this network.
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. - * - * @param weightNoise Weight noise instance to use - */ - @Getter - @Setter - private IWeightNoise weightNoise; - @Getter - @Setter - @lombok.Builder.Default - private double biasInit = 0.0; - @Getter - @Setter - @lombok.Builder.Default - private double gainInit = 1.0; + /** * Create a neural net configuration from json @@ -545,7 +212,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { JsonNode confs = null; for (LayerConfiguration nnc : conf.getFlattenedLayerConfigurations()) { LayerConfiguration l = nnc; - if (l instanceof BaseOutputLayer && ((BaseOutputLayer) l).getLossFn() == null) { + if (l instanceof BaseOutputLayer && ((BaseOutputLayer) l).getLossFunction() == null) { //lossFn field null -> may be an old config format, with lossFunction field being for the enum //if so, try walking the JSON graph to extract out the appropriate enum value @@ -585,16 +252,16 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { if (lossFunction != null) { switch (lossFunction) { case MSE: - ol.setLossFn(new LossMSE()); + ol.setLossFunction(new LossMSE()); break; case XENT: - ol.setLossFn(new LossBinaryXENT()); + ol.setLossFunction(new LossBinaryXENT()); break; case NEGATIVELOGLIKELIHOOD: - ol.setLossFn(new LossNegativeLogLikelihood()); + ol.setLossFunction(new LossNegativeLogLikelihood()); break; case MCXENT: - ol.setLossFn(new LossMCXENT()); + ol.setLossFunction(new LossMCXENT()); break; //Remaining: TODO @@ -648,9 +315,8 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { "activationFunction"); //Should only have 1 element: "dense", "output", etc if (activationFunction != null) { - IActivation ia = Activation.fromString(activationFunction.asText()) - .getActivationFunction(); - ((BaseLayerConfiguration) l).setActivationFn(ia); + Activation ia = Activation.fromString(activationFunction.asText()); + ((BaseLayerConfiguration) l).setActivation(ia.getActivationFunction()); } } @@ -716,7 +382,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { } catch (IOException e) { log.warn( - "ILayer with null WeightInit detected: " + l.getLayerName() + ", could not parse JSON", + "ILayer with null WeightInit detected: " + l.getName() + ", could not parse JSON", e); } } @@ -787,9 +453,9 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { public NeuralNetConfiguration clone() { NeuralNetConfiguration clone; clone = (NeuralNetConfiguration) super.clone(); - clone.stepFunction = clone.stepFunction.clone(); - clone.netWideVariables = new ArrayList<>(netWideVariables); - clone.getInnerConfigurations().addAll(innerConfigurations); + if(getStepFunction() != null) { clone.setStepFunction(getStepFunction().clone()); } + clone.netWideVariables = new LinkedHashSet<>(netWideVariables); + clone.setInnerConfigurations(new ArrayList<>(innerConfigurations)); if (clone.getInputPreProcessors() != null) { Map map = new HashMap<>(); @@ -802,9 +468,9 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { clone.setInferenceWorkspaceMode(this.inferenceWorkspaceMode); clone.setTrainingWorkspaceMode(this.trainingWorkspaceMode); - clone.setCacheMode(this.cacheMode); - clone.setValidateOutputLayerConfig(this.validateOutputLayerConfig); - clone.setDataType(this.dataType); + clone.setCacheMode(this.getCacheMode()); + clone.setValidateOutputLayerConfig(isValidateOutputLayerConfig()); + clone.setDataType(this.getDataType()); return clone; @@ -821,33 +487,24 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { /** * Run init() for each layer */ + for( NeuralNetConfiguration nconf : getNetConfigurations() ) { + nconf.init(); + } + //getNetConfigurations().stream().forEach( conf -> { + // conf.init(); //do not call on self + //}); //call init on all embedded net configurations - getNetConfigurations().stream().forEach( conf -> { - conf.init(); //do not call on self - }); //call init on all embedded net configurations - innerConfigurations.add(0, this); //put this configuration at first place + //TODO do not put inside self to avoid serialization issues + // innerConfigurations.add(0, this); //put this configuration at first place /** * Inherit network wide configuration setting to those layer configurations * that do not have an individual setting (nor a default) */ for(LayerConfiguration lconf : this.getFlattenedLayerConfigurations()) { - if(lconf.getActivationFn() == null ) lconf.setActivationFn(this.getActivation()); - if(lconf.getIUpdater() == null ) lconf.setIUpdater( this.getUpdater() ); - if(lconf.getIDropout() == null ) lconf.setIDropout( this.getIdropOut() ); - if(lconf.getWeightNoise() == null ) lconf.setWeightNoise( this.getWeightNoise()); - - // ... maybe more to set here ... - if(lconf instanceof BaseLayerConfiguration ) { // then we can set some additional config settings - BaseLayerConfiguration bconf = (BaseLayerConfiguration) lconf; - if(bconf.getBiasUpdater() == null) bconf.setBiasUpdater(this.getBiasUpdater()); - if(bconf.getGradientNormalization() == null) bconf.setGradientNormalization(this.getGradientNormalization()); - // ... maybe more to set here ... - } + lconf.runInheritance(); } - - getLayerConfigurations().stream().forEach( lconf -> lconf.setNetConfiguration(this)); //set this as net config for all layers (defined in here, not stacked @@ -861,7 +518,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { " settings will only take effect if backprop type is set to BackpropType.TruncatedBPTT"); } - if (backpropType == BackpropType.TruncatedBPTT && validateTbpttConfig) { + if (backpropType == BackpropType.TruncatedBPTT && isValidateTbpttConfig()) { //Check for invalid combination - tbptt plus LastTimeStepLayer or for (int i = 0; i < getFlattenedLayerConfigurations().size(); i++) { LayerConfiguration l = getFlattenedLayerConfigurations().get(i); @@ -878,7 +535,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { } } - if (inputType == null && inputPreProcessors.get(0) == null) { + if (getInputType() == null && inputPreProcessors.get(0) == null) { //User hasn't set the InputType. Sometimes we can infer it... // For example, Dense/RNN layers, where preprocessor isn't set -> user is *probably* going to feed in // standard feedforward or RNN data @@ -889,7 +546,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { BaseRecurrentLayer brl = (BaseRecurrentLayer) firstLayer; val nIn = brl.getNIn(); if (nIn > 0) { - inputType = InputType.recurrent(nIn, brl.getRnnDataFormat()); + setInputType( InputType.recurrent(nIn, brl.getDataFormat())); } } else if (firstLayer instanceof DenseLayer || firstLayer instanceof EmbeddingLayer || firstLayer instanceof OutputLayer) { @@ -897,7 +554,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { FeedForwardLayer ffl = (FeedForwardLayer) firstLayer; val nIn = ffl.getNIn(); if (nIn > 0) { - inputType = InputType.feedForward(nIn); + setInputType( InputType.feedForward(nIn)); } } } @@ -910,8 +567,8 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { if(inputPreProcessors == null) { inputPreProcessors = new HashMap<>(); } - if (inputType != null) { - InputType currentInputType = inputType; + if (getInputType() != null) { + InputType currentInputType = getInputType(); for (int i = 0; i < getFlattenedLayerConfigurations().size(); i++) { LayerConfiguration l = getFlattenedLayerConfigurations().get(i); if (inputPreProcessors.get(i) == null) { @@ -932,24 +589,24 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { //convolution 1d is an edge case where it has rnn input type but the filters //should be the output if (layer instanceof Convolution1DLayer) { - if (l instanceof DenseLayer && inputType instanceof InputType.InputTypeRecurrent) { + if (l instanceof DenseLayer && getInputType() instanceof InputType.InputTypeRecurrent) { FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l; - if (inputType instanceof InputType.InputTypeRecurrent) { - InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType; + if (getInputType() instanceof InputType.InputTypeRecurrent) { + InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) getInputType(); feedForwardLayer.setNIn(recurrent.getTimeSeriesLength()); } } else { l.setNIn(currentInputType, - overrideNinUponBuild); //Don't override the nIn setting, if it's manually set by the user + isOverrideNinUponBuild()); //Don't override the nIn setting, if it's manually set by the user } } else { l.setNIn(currentInputType, - overrideNinUponBuild); //Don't override the nIn setting, if it's manually set by the user + isOverrideNinUponBuild()); //Don't override the nIn setting, if it's manually set by the user } } else { l.setNIn(currentInputType, - overrideNinUponBuild); //Don't override the nIn setting, if it's manually set by the user + isOverrideNinUponBuild()); //Don't override the nIn setting, if it's manually set by the user } currentInputType = l.getOutputType(i, currentInputType); @@ -957,13 +614,13 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { } - Nd4j.getRandom().setSeed(getNetConfigurations().get(0).getSeed()); + Nd4j.getRandom().setSeed(getSeed()); //Validate output layer configuration if (isValidateOutputLayerConfig()) { //Validate output layer configurations... for (LayerConfiguration n : getFlattenedLayerConfigurations()) { - OutputLayerUtil.validateOutputLayer(n.getLayerName(), n); //No-op for non output/loss layers + OutputLayerUtil.validateOutputLayer(n.getName(), n); //No-op for non output/loss layers } } } @@ -984,7 +641,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { Map memoryReportMap = new LinkedHashMap<>(); int nLayers = getFlattenedLayerConfigurations().size(); for (int i = 0; i < nLayers; i++) { - String layerName = getFlattenedLayerConfigurations().get(i).getLayerName(); + String layerName = getFlattenedLayerConfigurations().get(i).getName(); if (layerName == null) { layerName = String.valueOf(i); } @@ -1028,15 +685,13 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { return out; } - - public List netWideVariables() { - + public Set getNetWideVariables() { return netWideVariables; } - public List netWideVariables(boolean copy) { + public Set getNetWideVariables(boolean copy) { if (copy) { - return netWideVariables(); + return new LinkedHashSet<>(netWideVariables); } return netWideVariables; } @@ -1063,6 +718,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { * layers) * @return list with layer configurations */ + @JsonIgnore public List getLayerConfigurations() { return innerConfigurations.stream() .filter(obj -> (obj instanceof LayerConfiguration)) @@ -1074,11 +730,18 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { * From the list of layers and neural net configurations, only return the neural net configurations * @return list with neural net configurations */ + //@Synchronized("innerConfigurationsLock") + @JsonIgnore public List getNetConfigurations() { - return innerConfigurations.stream() - .filter(obj -> (obj instanceof NeuralNetConfiguration)) - .map( obj -> (NeuralNetConfiguration)obj ) - .collect( Collectors.toList()); + List list; + synchronized (innerConfigurations) { + list = + innerConfigurations.stream() + .filter(obj -> (obj instanceof NeuralNetConfiguration) && !obj.equals(this)) + .map(obj -> (NeuralNetConfiguration) obj) + .collect(Collectors.toList()); + } + return list; } /** @@ -1092,6 +755,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { //When properly initialized, _this_ configuration is set first in the list, however we //can find cases where this is not true, thus the first configuration is another net or layer configuration //and should not be skipped. In essence, skip first configuration if that is "this". + //TODO: skipping not needed anymore as we removed _this_ from innerConfigurations int iSkip = 0; if(conf.getInnerConfigurations().size()>0 && conf.getInnerConfigurations().get(0).equals(this)) { iSkip=1;} conf.getInnerConfigurations().stream().skip(iSkip) @@ -1118,6 +782,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { * list of configurations * @return list of layer configurations */ + @JsonIgnore public List getFlattenedLayerConfigurations() { return getFlattenedLayerConfigurations(this); } @@ -1146,6 +811,13 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { return getFlattenedLayerConfigurations().get(0); } + + + protected boolean canEqual(final Object other) { + return other instanceof NeuralNetConfiguration; + } + + public static abstract class NeuralNetConfigurationBuilder> extends NeuralNetBaseBuilderConfigurationBuilder { @@ -1163,13 +835,4 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { } } - - public IModel getNeuralNet() { - return net; - } - - public void setNeuralNet(IModel model) { - this.net = model; - } - } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java index 1a5a30e72..e50e1aae5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java @@ -157,9 +157,9 @@ public class AttentionVertex extends SameDiffVertex { val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); - attention = sameDiff.nn.multiHeadDotProductAttention(getLayerName(), queries, keys, values, Wq, Wk, Wv, Wo, mask, true); + attention = sameDiff.nn.multiHeadDotProductAttention(getName(), queries, keys, values, Wq, Wk, Wv, Wo, mask, true); }else{ - attention = sameDiff.nn.dotProductAttention(getLayerName(), queries, keys, values, mask, true); + attention = sameDiff.nn.dotProductAttention(getName(), queries, keys, values, mask, true); } if(maskVars != null){ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java index a974a7f91..13a520590 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java @@ -21,105 +21,39 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationSigmoid; +import org.nd4j.linalg.activations.impl.ActivationTanH; + -@Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public abstract class AbstractLSTM extends BaseRecurrentLayer { - protected double forgetGateBiasInit; - protected IActivation gateActivationFn = new ActivationSigmoid(); - protected boolean helperAllowFallback = true; - - protected AbstractLSTM(Builder builder) { - super(builder); - this.forgetGateBiasInit = builder.forgetGateBiasInit; - this.gateActivationFn = builder.gateActivationFn; - this.helperAllowFallback = builder.helperAllowFallback; - } - - @AllArgsConstructor - @NoArgsConstructor - @Getter - @Setter - public static abstract class Builder> extends BaseRecurrentLayer.Builder { - - /** - * Set forget gate bias initalizations. Values in range 1-5 can potentially help with learning or longer-term - * dependencies. - */ - protected double forgetGateBiasInit = 1.0; - - /** - * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or hard sigmoid, - * for example - */ - protected IActivation gateActivationFn = new ActivationSigmoid(); - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in - * (non-CuDNN) implementation for LSTM/GravesLSTM will be used - * - */ - protected boolean helperAllowFallback = true; - - /** - * Set forget gate bias initalizations. Values in range 1-5 can potentially help with learning or longer-term - * dependencies. - */ - public T forgetGateBiasInit(double biasInit) { - this.setForgetGateBiasInit(biasInit); - return (T) this; - } - - /** - * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or hard sigmoid, - * for example - * - * @param gateActivationFn Activation function for the LSTM gates - */ - public T gateActivationFunction(String gateActivationFn) { - return gateActivationFunction(Activation.fromString(gateActivationFn)); - } - - /** - * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or hard sigmoid, - * for example - * - * @param gateActivationFn Activation function for the LSTM gates - */ - public T gateActivationFunction(Activation gateActivationFn) { - return gateActivationFunction(gateActivationFn.getActivationFunction()); - } - - /** - * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or hard sigmoid, - * for example - * - * @param gateActivationFn Activation function for the LSTM gates - */ - public T gateActivationFunction(IActivation gateActivationFn) { - this.setGateActivationFn(gateActivationFn); - return (T) this; - } - - /** - * When using a helper (CuDNN or MKLDNN in some cases) and an error is encountered, should fallback to the non-helper implementation be allowed? - * If set to false, an exception in the helper will be propagated back to the user. If false, the built-in - * (non-helper) implementation for LSTM/GravesLSTM will be used - * - * @param allowFallback Whether fallback to non-helper implementation should be used - */ - public T helperAllowFallback(boolean allowFallback) { - this.setHelperAllowFallback(allowFallback); - return (T) this; - } - - } + /** + * Set forget gate bias initalizations. Values in range 1-5 can potentially help with learning or + * longer-term dependencies. + */ + @lombok.Builder.Default @Getter protected double forgetGateBiasInit = 1.0; + /** + * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation + * be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If + * false, the built-in (non-CuDNN) implementation for LSTM/GravesLSTM will be used + */ + @lombok.Builder.Default @Getter protected boolean helperAllowFallback = true; + /** + * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or + * hard sigmoid, for example + */ + @lombok.Builder.Default @Getter @Setter private IActivation activation = new ActivationSigmoid(); + /** + * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or + * hard sigmoid, for example + */ + @lombok.Builder.Default @Getter @Setter + private IActivation gateActivationFunction = new ActivationTanH(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java index d7ee4b8ef..3f748a361 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; @@ -37,143 +40,103 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; -import java.util.Collection; -import java.util.Map; -@Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class ActivationLayer extends NoParamLayer { - protected IActivation activationFn; - protected ActivationLayer(Builder builder) { - super(builder); - setType(LayerType.ACT); - this.activationFn = builder.activationFn; - initializeConstraints(builder); + public static ActivationLayerBuilder builder(Activation activation) { + return innerBuilder().activation(activation); + } + public static ActivationLayerBuilder builder(IActivation activation) { + return innerBuilder().activation(activation); + } + + + public static ActivationLayerBuilder builder() { + return innerBuilder(); + } + @Override + public ActivationLayer clone() { + ActivationLayer clone = (ActivationLayer) super.clone(); + return clone; + } + + @Override + public IUpdater getIUpdater() { + return null; + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + this.setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + + org.deeplearning4j.nn.layers.ActivationLayer ret = + new org.deeplearning4j.nn.layers.ActivationLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input type: null for layer name \"" + getName() + "\""); } + return inputType; + } - /** - * @param activation Activation function for the layer - */ - public ActivationLayer(Activation activation) { - this(new Builder().activation(activation)); - } - - /** - * @param activationFn Activation function for the layer - */ - public ActivationLayer(IActivation activationFn) { - this(new Builder().activation(activationFn)); - } - - @Override - public ActivationLayer clone() { - ActivationLayer clone = (ActivationLayer) super.clone(); - return clone; - } - - @Override - public IUpdater getIUpdater() { - return null; - } - - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - this.setNetConfiguration(conf); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - - org.deeplearning4j.nn.layers.ActivationLayer ret = new org.deeplearning4j.nn.layers.ActivationLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input type: null for layer name \"" + getLayerName() + "\""); - } - return inputType; - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - //No input preprocessor required for any input - return null; - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - val actElementsPerEx = inputType.arrayElementsPerExample(); - - return new LayerMemoryReport.Builder(layerName, ActivationLayer.class, inputType, inputType) - .standardMemory(0, 0) //No params - //During inference: modify input activation in-place - //During backprop: dup the input for later re-use - .workingMemory(0, 0, 0, actElementsPerEx) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - //No op - } - - @AllArgsConstructor - @NoArgsConstructor - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - - /** - * Activation function for the layer - */ - private IActivation activationFn = null; - - /** - * ILayer activation function. Typical values include:
"relu" (rectified linear), "tanh", "sigmoid", - * "softmax", "hardtanh", "leakyrelu", "maxout", "softsign", "softplus" - * - * @deprecated Use {@link #activation(Activation)} or {@link @activation(IActivation)} - */ - @Deprecated - public Builder activation(String activationFunction) { - return activation(Activation.fromString(activationFunction)); - } - - /** - * @param activationFunction Activation function for the layer - */ - public Builder activation(IActivation activationFunction) { - this.setActivationFn(activationFunction); - return this; - } - - /** - * @param activation Activation function for the layer - */ - public Builder activation(Activation activation) { - return activation(activation.getActivationFunction()); - } - - @Override - @SuppressWarnings("unchecked") - public ActivationLayer build() { - return new ActivationLayer(this); - } + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + // No input preprocessor required for any input + return null; + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + val actElementsPerEx = inputType.arrayElementsPerExample(); + + return new LayerMemoryReport.Builder(name, ActivationLayer.class, inputType, inputType) + .standardMemory(0, 0) // No params + // During inference: modify input activation in-place + // During backprop: dup the input for later re-use + .workingMemory(0, 0, 0, actElementsPerEx) + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + // No op + } + + public static abstract class ActivationLayerBuilder< + C extends ActivationLayer, B extends ActivationLayerBuilder> + extends NoParamLayer.NoParamLayerBuilder { + public C build() { + C l = this.initBuild(); + l.initializeConstraints(); + return l; } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java index c0dbc4f56..54cb41d71 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -35,22 +36,27 @@ import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; import java.util.Map; -@Data -@NoArgsConstructor + @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class AutoEncoder extends BasePretrainNetwork { - protected double corruptionLevel; - protected double sparsity; + /** + * Level of corruption - 0.0 (none) to 1.0 (all values corrupted) + * + */ + @lombok.Builder.Default @Getter @Setter + private double corruptionLevel = 3e-1f; + /** + * Autoencoder sparity parameter + * + */ + @lombok.Builder.Default @Getter @Setter + protected double sparsity = 0f; + + - // Builder - private AutoEncoder(Builder builder) { - super(builder); - this.corruptionLevel = builder.corruptionLevel; - this.sparsity = builder.sparsity; - initializeConstraints(builder); - } @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, @@ -85,7 +91,7 @@ public class AutoEncoder extends BasePretrainNetwork { val updaterStateSize = (int) getIUpdater().stateSize(numParams); int trainSizePerEx = 0; - if (getIDropout() != null) { + if (getDropOut() != null) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... @@ -99,64 +105,11 @@ public class AutoEncoder extends BasePretrainNetwork { // which is modified in-place by loss function trainSizePerEx += actElementsPerEx; - return new LayerMemoryReport.Builder(layerName, AutoEncoder.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, AutoEncoder.class, inputType, outputType) .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, trainSizePerEx) .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); } - @AllArgsConstructor - @Getter - @Setter - public static class Builder extends BasePretrainNetwork.Builder { - /** - * Level of corruption - 0.0 (none) to 1.0 (all values corrupted) - * - */ - private double corruptionLevel = 3e-1f; - - /** - * Autoencoder sparity parameter - * - */ - private double sparsity = 0f; - - public Builder() {} - - /** - * Builder - sets the level of corruption - 0.0 (none) to 1.0 (all values corrupted) - * - * @param corruptionLevel Corruption level (0 to 1) - */ - public Builder(double corruptionLevel) { - this.setCorruptionLevel(corruptionLevel); - } - - /** - * Level of corruption - 0.0 (none) to 1.0 (all values corrupted) - * - * @param corruptionLevel Corruption level (0 to 1) - */ - public Builder corruptionLevel(double corruptionLevel) { - this.setCorruptionLevel(corruptionLevel); - return this; - } - - /** - * Autoencoder sparity parameter - * - * @param sparsity Sparsity - */ - public Builder sparsity(double sparsity) { - this.setSparsity(sparsity); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public AutoEncoder build() { - return new AutoEncoder(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java index b16ecb768..bd415e22b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java @@ -20,20 +20,23 @@ package org.deeplearning4j.nn.conf.layers; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; -import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInitDistribution; import org.deeplearning4j.util.NetworkUtils; import org.jetbrains.annotations.NotNull; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.L1Regularization; @@ -41,486 +44,375 @@ import org.nd4j.linalg.learning.regularization.L2Regularization; import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.WeightDecay; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; - -/** - * A neural network layer. - */ -@Data +/** A neural network layer. */ @EqualsAndHashCode(callSuper = true) -@NoArgsConstructor(force = true) -public abstract class BaseLayerConfiguration extends LayerConfiguration implements ITraininableLayerConfiguration, Serializable, Cloneable { - - @NonNull - protected IWeightInit weightInit; - protected double biasInit = 0.0; - protected double gainInit = 0.0; - protected List regularization; - protected List regularizationBias; - protected IUpdater iUpdater; - protected IUpdater biasUpdater; - private DataType dataType; - - protected GradientNormalization gradientNormalization = GradientNormalization.None; //Clipping, rescale based on l2 norm, etc - protected double gradientNormalizationThreshold = 1.0; //Threshold for l2 and element-wise gradient clipping +@NoArgsConstructor +@SuperBuilder +public abstract class BaseLayerConfiguration extends LayerConfiguration + implements ITraininableLayerConfiguration, Serializable, Cloneable { - public BaseLayerConfiguration(Builder builder) { - super(builder); - this.layerName = builder.layerName; - this.weightInit = builder.weightInit; - this.biasInit = builder.biasInit; - this.gainInit = builder.gainInit; - this.regularization = builder.regularization; - this.regularizationBias = builder.regularizationBias; - this.iUpdater = builder.iupdater; - this.biasUpdater = builder.biasUpdater; - this.gradientNormalization = builder.gradientNormalization; - this.gradientNormalizationThreshold = builder.gradientNormalizationThreshold; - this.weightNoise = builder.weightNoise; - super.setActivationFn(builder.activationFn); + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all bias parameters of all layers + */ + //@lombok.Builder.Default @Getter protected final List biasConstraints = new ArrayList<>(); + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all weight parameters of all layers + */ + @lombok.Builder.Default @Getter + protected final List constrainWeights = new ArrayList<>(); + /** Weight initialization scheme to use, for initial weight values */ + @Getter @Setter + protected IWeightInit weightInit; + /** Bias initialization value, for layers with biases. Defaults to 0 */ + @Getter @Setter @Builder.Default + protected double biasInit = 0.0; + /** Gain initialization value, for layers with ILayer Normalization. Defaults to 1 */ + @Getter @Setter @Builder.Default + protected double gainInit = 0.0; + /** Regularization for the parameters (excluding biases). */ + @Builder.Default @Getter @Setter protected List regularization = new ArrayList<>(); + /** Regularization for the bias parameters only */ + @Builder.Default @Getter @Setter + protected List regularizationBias = new ArrayList<>(); + /** + * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link + * org.nd4j.linalg.learning.config.Nesterovs} + */ + @Getter @Setter + protected IUpdater updater; + /** + * Gradient updater configuration, for the biases only. If not set, biases will use the updater as + * set by {@link #setUpdater(IUpdater)} + */ + @Getter @Setter + protected IUpdater biasUpdater; + /** + * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping + * etc. Defaults to {@link GradientNormalization#None}. + * + * @see GradientNormalization + */ + @Builder.Default + protected @Getter @Setter GradientNormalization gradientNormalization = + GradientNormalization.None; // Clipping, rescale based on l2 norm, etc + /** + * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, + * GradientNormalization.ClipL2PerParamType, and + * GradientNormalization.ClipElementWiseAbsoluteValue
+ * Not used otherwise.
+ * L2 threshold for first two types of clipping, or absolute value threshold for last type of + * clipping. + */ + @Builder.Default @Getter @Setter + protected double gradientNormalizationThreshold = + 1.0; // Threshold for l2 and element-wise gradient clipping +@Getter @Setter + private DataType dataType; + + /** + * Reset the learning related configs of the layer to default. When instantiated with a global + * neural network configuration the parameters specified in the neural network configuration will + * be used. For internal use with the transfer learning API. Users should not have to call this + * method directly. + */ + public void resetLayerDefaultConfig() { + // clear the learning related params for all layers in the origConf and set to defaults + this.setIUpdater(null); + this.setWeightInit(null); + this.setBiasInit(Double.NaN); + this.setGainInit(Double.NaN); + this.regularization = null; + this.regularizationBias = null; + this.setGradientNormalization(GradientNormalization.None); + this.setGradientNormalizationThreshold(1.0); + this.updater = null; + this.biasUpdater = null; + } + + @Override + public BaseLayerConfiguration clone() { + BaseLayerConfiguration clone = (BaseLayerConfiguration) super.clone(); + if (regularization != null) { + // Regularization fields are _usually_ thread safe and immutable, but let's clone to be sure + clone.regularization = new ArrayList<>(regularization.size()); + for (Regularization r : regularization) { + clone.regularization.add(r.clone()); + } + } + if (regularizationBias != null) { + clone.regularizationBias = new ArrayList<>(regularizationBias.size()); + for (Regularization r : regularizationBias) { + clone.regularizationBias.add(r.clone()); + } + } + return clone; + } + + /** + * Get the updater for the given parameter. Typically the same updater will be used for all + * parameters, but this is not necessarily the case + * + * @param paramName Parameter name + * @return IUpdater for the parameter + */ + @Override + public IUpdater getUpdaterByParam(String paramName) { + if (biasUpdater != null && initializer().isBiasParam(this, paramName)) { + return biasUpdater; + } + return updater; + } + + @Override + public List getRegularizationByParam(String paramName) { + if (initializer().isWeightParam(this, paramName)) { + return regularization; + } else if (initializer().isBiasParam(this, paramName)) { + return regularizationBias; + } + return null; + } + + /** + * Inherit setting from neural network for those settings, that are not already set or do have a + * layer(type) specific default. + * + * @param conf the neural net configration to inherit parameters from + */ + @Override + public void runInheritance(@NotNull NeuralNetConfiguration conf) { + super.runInheritance(conf); + if (this.biasUpdater == null) this.biasUpdater = conf.getBiasUpdater(); + if (this.updater == null) this.updater = conf.getUpdater(); + if (this.regularizationBias == null) this.regularizationBias = conf.getRegularizationBias(); + if (this.regularization == null) this.regularization = conf.getRegularization(); + if (this.gradientNormalization == null) + this.gradientNormalization = conf.getGradientNormalization(); + // if(this.weightInit == null) this.weightInit = conf.getWeightInit(); + + } + + public static abstract class BaseLayerConfigurationBuilder< + C extends BaseLayerConfiguration, B extends BaseLayerConfigurationBuilder> + extends LayerConfigurationBuilder { + +public B updater(Updater upd) { + this.updater = upd.getIUpdaterWithDefaultConfig(); + return self(); +} + + + public B updater(IUpdater upd) { + this.updater = upd; + return self(); } /** - * Reset the learning related configs of the layer to default. When instantiated with a global neural network - * configuration the parameters specified in the neural network configuration will be used. For internal use with - * the transfer learning API. Users should not have to call this method directly. - */ - public void resetLayerDefaultConfig() { - //clear the learning related params for all layers in the origConf and set to defaults - this.setIUpdater(null); - this.setWeightInit(null); - this.setBiasInit(Double.NaN); - this.setGainInit(Double.NaN); - this.regularization = null; - this.regularizationBias = null; - this.setGradientNormalization(GradientNormalization.None); - this.setGradientNormalizationThreshold(1.0); - this.iUpdater = null; - this.biasUpdater = null; - } - - @Override - public BaseLayerConfiguration clone() { - BaseLayerConfiguration clone = (BaseLayerConfiguration) super.clone(); - if(regularization != null){ - //Regularization fields are _usually_ thread safe and immutable, but let's clone to be sure - clone.regularization = new ArrayList<>(regularization.size()); - for(Regularization r : regularization){ - clone.regularization.add(r.clone()); - } - } - if(regularizationBias != null){ - clone.regularizationBias = new ArrayList<>(regularizationBias.size()); - for(Regularization r : regularizationBias){ - clone.regularizationBias.add(r.clone()); - } - } - return clone; - } - - /** - * Get the updater for the given parameter. Typically the same updater will be used for all parameters, but this is - * not necessarily the case + * Set weight initialization scheme to random sampling via the specified distribution. + * Equivalent to: {@code .weightInit(new WeightInitDistribution(distribution))} * - * @param paramName Parameter name - * @return IUpdater for the parameter + * @param dist Distribution to use for weight initialization */ - @Override - public IUpdater getUpdaterByParam(String paramName) { - if (biasUpdater != null && initializer().isBiasParam(this, paramName)) { - return biasUpdater; - } - return iUpdater; - } - - @Override - public GradientNormalization getGradientNormalization() { - return gradientNormalization; - } - - @Override - public List getRegularizationByParam(String paramName){ - if(initializer().isWeightParam(this, paramName)){ - return regularization; - } else if(initializer().isBiasParam(this, paramName)){ - return regularizationBias; - } - return null; - } - - - @SuppressWarnings("unchecked") - @Getter - @Setter - public abstract static class Builder> extends LayerConfiguration.Builder { - - /** - * Set the activation function for the layer. This overload can be used for custom {@link IActivation} - * instances - * - */ - protected IActivation activationFn = null; - - /** - * Weight initialization scheme to use, for initial weight values - * - * @see IWeightInit - */ - protected IWeightInit weightInit = null; - - /** - * Bias initialization value, for layers with biases. Defaults to 0 - * - */ - protected double biasInit = 0.0; - - /** - * Gain initialization value, for layers with ILayer Normalization. Defaults to 1 - * - */ - protected double gainInit = 1.0; - - /** - * Regularization for the parameters (excluding biases). - */ - protected List regularization = new ArrayList<>(); - /** - * Regularization for the bias parameters only - */ - protected List regularizationBias = new ArrayList<>(); - - /** - * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link - * org.nd4j.linalg.learning.config.Nesterovs} - * - */ - protected IUpdater iupdater = null; - - /** - * Gradient updater configuration, for the biases only. If not set, biases will use the updater as set by {@link - * #updater(IUpdater)} - * - */ - protected IUpdater biasUpdater = null; - - /** - * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping etc. - * - * @see GradientNormalization - */ - protected GradientNormalization gradientNormalization = null; - - /** - * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, - * GradientNormalization.ClipL2PerParamType, and GradientNormalization.ClipElementWiseAbsoluteValue
Not used - * otherwise.
L2 threshold for first two types of clipping, or absolute value threshold for last type of - * clipping. - */ - protected double gradientNormalizationThreshold = Double.NaN; - - /** - * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and {@link - * org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for this layer - * - */ - protected IWeightNoise weightNoise; - - /** - * Set the activation function for the layer. This overload can be used for custom {@link IActivation} - * instances - * - * @param activationFunction Activation function to use for the layer - */ - public T activation(IActivation activationFunction) { - this.setActivationFn(activationFunction); - return (T) this; - } - - /** - * Set the activation function for the layer, from an {@link Activation} enumeration value. - * - * @param activation Activation function to use for the layer - */ - public T activation(Activation activation) { - return activation(activation.getActivationFunction()); - } - - /** - * Weight initialization scheme to use, for initial weight values - * - * @see IWeightInit - */ - public T weightInit(IWeightInit weightInit) { - this.setWeightInit(weightInit); - return (T) this; - } - - /** - * Weight initialization scheme to use, for initial weight values - * - * @see WeightInit - */ - public T weightInit(WeightInit weightInit) { - if (weightInit == WeightInit.DISTRIBUTION) { - throw new UnsupportedOperationException( - "Not supported!, Use weightInit(Distribution distribution) instead!"); - } - - this.setWeightInit(weightInit.getWeightInitFunction()); - return (T) this; - } - - /** - * Set weight initialization scheme to random sampling via the specified distribution. Equivalent to: {@code - * .weightInit(new WeightInitDistribution(distribution))} - * - * @param distribution Distribution to use for weight initialization - */ - public T weightInit(Distribution distribution) { - return weightInit(new WeightInitDistribution(distribution)); - } - - /** - * Bias initialization value, for layers with biases. Defaults to 0 - * - * @param biasInit Value to use for initializing biases - */ - public T biasInit(double biasInit) { - this.setBiasInit(biasInit); - return (T) this; - } - - /** - * Gain initialization value, for layers with ILayer Normalization. Defaults to 1 - * - * @param gainInit Value to use for initializing gain - */ - public T gainInit(double gainInit) { - this.gainInit = gainInit; - return (T) this; - } - - /** - * Distribution to sample initial weights from. Equivalent to: {@code .weightInit(new - * WeightInitDistribution(distribution))} - */ - @Deprecated - public T dist(Distribution dist) { - return weightInit(dist); - } - - /** - * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization - * coefficient for the bias. - */ - public T l1(double l1) { - //Check if existing L1 exists; if so, replace it - NetworkUtils.removeInstances(this.regularization, L1Regularization.class); - if(l1 > 0.0) { - this.regularization.add(new L1Regularization(l1)); - } - return (T) this; - } - - /** - * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2 regularization - * coefficient for the bias.
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double,boolean)} should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
- */ - public T l2(double l2) { - //Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularization, L2Regularization.class); - if(l2 > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularization, WeightDecay.class, "WeightDecay regularization removed: incompatible with added L2 regularization"); - this.regularization.add(new L2Regularization(l2)); - } - return (T) this; - } - - /** - * L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)} - */ - public T l1Bias(double l1Bias) { - NetworkUtils.removeInstances(this.regularizationBias, L1Regularization.class); - if(l1Bias > 0.0) { - this.regularizationBias.add(new L1Regularization(l1Bias)); - } - return (T) this; - } - - /** - * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)}
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double,boolean)} should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
- */ - public T l2Bias(double l2Bias) { - NetworkUtils.removeInstances(this.regularizationBias, L2Regularization.class); - if(l2Bias > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularizationBias, WeightDecay.class, "WeightDecay regularization removed: incompatible with added L2 regularization"); - this.regularizationBias.add(new L2Regularization(l2Bias)); - } - return (T) this; - } - - /** - * Add weight decay regularization for the network parameters (excluding biases).
- * This applies weight decay with multiplying the learning rate - see {@link WeightDecay} for more details.
- * - * @param coefficient Weight decay regularization coefficient - * @see #weightDecay(double, boolean) - */ - public Builder weightDecay(double coefficient) { - return weightDecay(coefficient, true); - } - - /** - * Add weight decay regularization for the network parameters (excluding biases). See {@link WeightDecay} for more details.
- * - * @param coefficient Weight decay regularization coefficient - * @param applyLR Whether the learning rate should be multiplied in when performing weight decay updates. See {@link WeightDecay} for more details. - * @see #weightDecay(double, boolean) - */ - public Builder weightDecay(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularization, WeightDecay.class); - if(coefficient > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularization, L2Regularization.class, "L2 regularization removed: incompatible with added WeightDecay regularization"); - this.regularization.add(new WeightDecay(coefficient, applyLR)); - } - return this; - } - - /** - * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. - * This applies weight decay with multiplying the learning rate.
- * - * @param coefficient Weight decay regularization coefficient - * @see #weightDecayBias(double, boolean) - */ - public Builder weightDecayBias(double coefficient) { - return weightDecayBias(coefficient, true); - } - - /** - * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
- * - * @param coefficient Weight decay regularization coefficient - */ - public Builder weightDecayBias(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularizationBias, WeightDecay.class); - if(coefficient > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularizationBias, L2Regularization.class, "L2 regularization removed: incompatible with added WeightDecay regularization"); - this.regularizationBias.add(new WeightDecay(coefficient, applyLR)); - } - return this; - } - - /** - * Set the regularization for the parameters (excluding biases) - for example {@link WeightDecay}
- * - * @param regularization Regularization to apply for the network parameters/weights (excluding biases) - */ - public Builder regularization(List regularization) { - this.setRegularization(regularization); - return this; - } - - /** - * Set the regularization for the biases only - for example {@link WeightDecay}
- * - * @param regularizationBias Regularization to apply for the network biases only - */ - public Builder regularizationBias(List regularizationBias) { - this.setRegularizationBias(regularizationBias); - return this; - } - - /** - * Gradient updater. For example, SGD for standard stochastic gradient descent, NESTEROV for Nesterov momentum, - * RSMPROP for RMSProp, etc. - * - * @see Updater - */ - @Deprecated - public T updater(Updater updater) { - return updater(updater.getIUpdaterWithDefaultConfig()); - } - - /** - * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link - * org.nd4j.linalg.learning.config.Nesterovs} - * - * @param updater Updater to use - */ - public T updater(IUpdater updater) { - this.setIupdater(updater); - return (T) this; - } - - /** - * Gradient updater configuration, for the biases only. If not set, biases will use the updater as set by {@link - * #updater(IUpdater)} - * - * @param biasUpdater Updater to use for bias parameters - */ - public T biasUpdater(IUpdater biasUpdater) { - this.setBiasUpdater(biasUpdater); - return (T) this; - } - - /** - * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping etc. - * - * @param gradientNormalization Type of normalization to use. Defaults to None. - * @see GradientNormalization - */ - public T gradientNormalization(GradientNormalization gradientNormalization) { - this.setGradientNormalization(gradientNormalization); - return (T) this; - } - - /** - * Threshold for gradient normalization, only used for GradientNormalization.ClipL2PerLayer, - * GradientNormalization.ClipL2PerParamType, and GradientNormalization.ClipElementWiseAbsoluteValue
Not used - * otherwise.
L2 threshold for first two types of clipping, or absolute value threshold for last type of - * clipping. - */ - public T gradientNormalizationThreshold(double threshold) { - this.setGradientNormalizationThreshold(threshold); - return (T) this; - } - - /** - * Set the weight noise (such as {@link org.deeplearning4j.nn.conf.weightnoise.DropConnect} and {@link - * org.deeplearning4j.nn.conf.weightnoise.WeightNoise}) for this layer - * - * @param weightNoise Weight noise instance to use - */ - public T weightNoise(IWeightNoise weightNoise) { - this.setWeightNoise(weightNoise); - return (T) this; - } + @Deprecated + public B dist(Distribution dist) { + this.weightInit = new WeightInitDistribution(dist); + return self(); } /** - * Inherit setting from neural network for those settings, that are not already set or do have - * a layer(type) specific default. - * @param conf the neural net configration to inherit parameters from + * Weight initialization scheme to use, for initial weight values + * + * @see WeightInit */ - @Override - public void runInheritance(@NotNull NeuralNetConfiguration conf) { - super.runInheritance(conf); - if(this.biasUpdater == null ) this.biasUpdater = conf.getBiasUpdater(); - if(this.iUpdater == null ) this.iUpdater = conf.getUpdater(); - if(this.regularizationBias == null) this.regularizationBias = conf.getRegularizationBias(); - if(this.regularization == null ) this.regularization = conf.getRegularization(); - if(this.gradientNormalization == null) this.gradientNormalization = conf.getGradientNormalization(); - if(this.weightInit == null) this.weightInit = conf.getWeightInit(); + public B weightInit(WeightInit weightInit) { + if (weightInit == WeightInit.DISTRIBUTION) { + throw new UnsupportedOperationException( + "Not supported!, Use weightInit(Distribution distribution) instead!"); + } + this.weightInit = weightInit.getWeightInitFunction(); + return self(); } -} \ No newline at end of file + public B weightInit(IWeightInit weightInit) { + if (weightInit.enumValue() == WeightInit.DISTRIBUTION) { + throw new UnsupportedOperationException( + "Not supported!, Use weightInit(Distribution distribution) instead!"); + } + this.weightInit = weightInit.enumValue().getWeightInitFunction(); + return self(); + } + + public B weightInit(Distribution dist) { + this.weightInit = new WeightInitDistribution(dist); + return self(); + } + + /** + * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 + * regularization coefficient for the bias. + */ + public B l1(double l1) { + // Check if existing L1 exists; if so, replace it + NetworkUtils.removeInstances(regularization$value, L1Regularization.class); + if (l1 > 0.0) { + this.regularization$value.add(new L1Regularization(l1)); + } + regularization$set = true; + return self(); + } + + /** + * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2 + * regularization coefficient for the bias.
+ * Note: Generally, {@link org.nd4j.linalg.learning.regularization.WeightDecay} (set via + * {@link #weightDecay(double,boolean)} should be preferred to L2 regularization. See {@link + * org.nd4j.linalg.learning.regularization.WeightDecay} javadoc for further details.
+ */ + public B l2(double l2) { + // Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make + // sense to use both + NetworkUtils.removeInstances(this.regularization$value, L2Regularization.class); + if (l2 > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularization$value, + WeightDecay.class, + "WeightDecay regularization removed: incompatible with added L2 regularization"); + this.regularization$value.add(new L2Regularization(l2)); + } + regularization$set = true; + return self(); + } + + /** L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)} */ + public B l1Bias(double l1Bias) { + NetworkUtils.removeInstances(this.regularizationBias$value, L1Regularization.class); + if (l1Bias > 0.0) { + this.regularizationBias$value.add(new L1Regularization(l1Bias)); + } + regularizationBias$set = true; + return self(); + } + + /** + * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)}
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double,boolean)} + * should be preferred to L2 regularization. See {@link WeightDecay} javadoc for further + * details.
+ */ + public B l2Bias(double l2Bias) { + NetworkUtils.removeInstances(this.regularizationBias$value, L2Regularization.class); + if (l2Bias > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularizationBias$value, + WeightDecay.class, + "WeightDecay regularization removed: incompatible with added L2 regularization"); + this.regularizationBias$value.add(new L2Regularization(l2Bias)); + } + regularizationBias$set = true; + return self(); + } + + /** + * Add weight decay regularization for the network parameters (excluding biases).
+ * This applies weight decay with multiplying the learning rate - see {@link WeightDecay} + * for more details.
+ * + * @param coefficient Weight decay regularization coefficient + * @see #weightDecay(double, boolean) + */ + public B weightDecay(double coefficient) { + return weightDecay(coefficient, true); + } + + /** + * Add weight decay regularization for the network parameters (excluding biases). See {@link + * WeightDecay} for more details.
+ * + * @param coefficient Weight decay regularization coefficient + * @param applyLR Whether the learning rate should be multiplied in when performing weight decay + * updates. See {@link WeightDecay} for more details. + * @see #weightDecay(double, boolean) + */ + public B weightDecay(double coefficient, boolean applyLR) { + // Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't + // make sense to use both + NetworkUtils.removeInstances(this.regularization$value, WeightDecay.class); + if (coefficient > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularization$value, + L2Regularization.class, + "L2 regularization removed: incompatible with added WeightDecay regularization"); + this.regularization$value.add(new WeightDecay(coefficient, applyLR)); + } + regularization$set = true; + return self(); + } + + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all weight parameters of all layers + */ + public B constrainWeights(LayerConstraint... constraints) { + constrainWeights$value = Arrays.asList(constraints); + constrainWeights$set = true; + return self(); + } + + /** + * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. This + * applies weight decay with multiplying the learning rate.
+ * + * @param coefficient Weight decay regularization coefficient + * @see #weightDecayBias(double, boolean) + */ + public B weightDecayBias(double coefficient) { + return weightDecayBias(coefficient, true); + } + + + + /** + * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
+ * + * @param coefficient Weight decay regularization coefficient + */ + public B weightDecayBias(double coefficient, boolean applyLR) { + // Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't + // make sense to use both + NetworkUtils.removeInstances(this.regularizationBias$value, WeightDecay.class); + if (coefficient > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularizationBias$value, + L2Regularization.class, + "L2 regularization removed: incompatible with added WeightDecay regularization"); + this.regularizationBias$value.add(new WeightDecay(coefficient, applyLR)); + } + regularizationBias$set = true; + return self(); + } + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java index 8231f2e2f..a24720130 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java @@ -21,34 +21,34 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; -import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT; +import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; -import org.nd4j.linalg.lossfunctions.impl.LossMSE; -import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; -@Data -@NoArgsConstructor + @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@NoArgsConstructor +@SuperBuilder(builderMethodName = "innerBuilder") public abstract class BaseOutputLayer extends FeedForwardLayer { - protected ILossFunction lossFn; + /** + * Loss function for the output layer + */ + @lombok.Builder.Default @Getter @Setter + protected ILossFunction lossFunction = new LossMCXENT(); + /** + * If true (default): include bias parameters in the model. False: no bias. + * + */ + @lombok.Builder.Default @Getter @Setter protected boolean hasBias = true; - protected BaseOutputLayer(Builder builder) { - super(builder); - this.lossFn = builder.lossFn; - this.hasBias = builder.hasBias; - } - public boolean hasBias() { - return hasBias; - } @Override public LayerMemoryReport getMemoryReport(InputType inputType) { @@ -60,7 +60,7 @@ public abstract class BaseOutputLayer extends FeedForwardLayer { int trainSizeFixed = 0; int trainSizeVariable = 0; - if (getIDropout() != null) { + if (getDropOut() != null) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... @@ -76,68 +76,25 @@ public abstract class BaseOutputLayer extends FeedForwardLayer { // then we have 'epsilonNext' which is equivalent to input size trainSizeVariable += outputType.arrayElementsPerExample(); - return new LayerMemoryReport.Builder(layerName, OutputLayer.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, OutputLayer.class, inputType, outputType) .standardMemory(numParams, updaterStateSize) .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); } - - @Getter - @Setter - public static abstract class Builder> extends FeedForwardLayer.Builder { - - /** - * Loss function for the output layer - */ - protected ILossFunction lossFn = new LossMCXENT(); - - /** - * If true (default): include bias parameters in the model. False: no bias. - * - */ - private boolean hasBias = true; - - public Builder() {} - - /** - * @param lossFunction Loss function for the output layer - */ - public Builder(LossFunction lossFunction) { - lossFunction(lossFunction); + public static abstract class BaseOutputLayerBuilder> extends + FeedForwardLayerBuilder { + public B lossFunction(LossFunctions.LossFunction lossFn) { + this.lossFunction$value = lossFn.getILossFunction(); + this.lossFunction$set = true; + return self(); } - /** - * @param lossFunction Loss function for the output layer - */ - public Builder(ILossFunction lossFunction) { - this.setLossFn(lossFunction); - } - - /** - * @param lossFunction Loss function for the output layer - */ - public T lossFunction(LossFunction lossFunction) { - return lossFunction(lossFunction.getILossFunction()); - } - - /** - * If true (default): include bias parameters in the model. False: no bias. - * - * @param hasBias If true: include bias parameters in this model - */ - public T hasBias(boolean hasBias) { - this.setHasBias(hasBias); - return (T) this; - } - - /** - * @param lossFunction Loss function for the output layer - */ - public T lossFunction(ILossFunction lossFunction) { - this.setLossFn(lossFunction); - return (T) this; + public B lossFunction(ILossFunction lossFn) { + this.lossFunction$value = lossFn; + this.lossFunction$set = true; + return self(); } } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java index 2fdbffd07..0ea89b064 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BasePretrainNetwork.java @@ -20,52 +20,25 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.params.PretrainParamInitializer; import org.nd4j.linalg.lossfunctions.LossFunctions; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -@Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties("pretrain") +@SuperBuilder public abstract class BasePretrainNetwork extends FeedForwardLayer { + @Builder.Default @Getter + protected LossFunctions.LossFunction lossFunction = + LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY; - protected LossFunctions.LossFunction lossFunction; - protected double visibleBiasInit; + @Builder.Default @Getter protected double visibleBiasInit = 0.0; - public BasePretrainNetwork(Builder builder) { - super(builder); - this.lossFunction = builder.lossFunction; - this.visibleBiasInit = builder.visibleBiasInit; - - } - - @Override - public boolean isPretrainParam(String paramName) { - return PretrainParamInitializer.VISIBLE_BIAS_KEY.equals(paramName); - } - - @Getter - @Setter - public static abstract class Builder> extends FeedForwardLayer.Builder { - - protected LossFunctions.LossFunction lossFunction = LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY; - - protected double visibleBiasInit = 0.0; - - public Builder() {} - - public T lossFunction(LossFunctions.LossFunction lossFunction) { - this.setLossFunction(lossFunction); - return (T) this; - } - - public T visibleBiasInit(double visibleBiasInit) { - this.setVisibleBiasInit(visibleBiasInit); - return (T) this; - } - - } + @Override + public boolean isPretrainParam(String paramName) { + return PretrainParamInitializer.VISIBLE_BIAS_KEY.equals(paramName); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java index d8e55c19c..10f24c4ae 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java @@ -20,39 +20,57 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Arrays; +import java.util.List; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; -import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.weights.IWeightInit; -import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.nn.weights.WeightInitDistribution; -import java.util.Arrays; -import java.util.List; -@Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public abstract class BaseRecurrentLayer extends FeedForwardLayer { - protected IWeightInit weightInitFnRecurrent; - protected RNNFormat rnnDataFormat; - - protected BaseRecurrentLayer(Builder builder) { - super(builder); - this.weightInitFnRecurrent = builder.weightInitFnRecurrent; - this.rnnDataFormat = builder.rnnDataFormat; - } + /** + * Set the weight initialization for the recurrent weights. Not that if this is not set explicitly, the same + * weight initialization as the layer input weights is also used for the recurrent weights. + * + */ + @Getter + protected IWeightInit weightInitRecurrent; + /** + * Set the format of data expected by the RNN. NCW = [miniBatchSize, size, timeSeriesLength], + * NWC = [miniBatchSize, timeSeriesLength, size]. Defaults to NCW. + */ + @Builder.Default @Getter @Setter + protected RNNFormat dataFormat = RNNFormat.NCW; + /** + * Set constraints to be applied to the RNN recurrent weight parameters of this layer. Default: no + * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, + * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have + * been updated. + */ + @Getter + protected List recurrentConstraints; + /** + * Set constraints to be applied to the RNN input weight parameters of this layer. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, + * etc). These constraints are applied at each iteration, after the parameters have been updated. + * + */ + @Getter + protected List inputWeightConstraints; @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.RNN) { throw new IllegalStateException("Invalid input for RNN layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect RNN input type with size > 0. Got: " + + ", layer name = \"" + getName() + "\"): expect RNN input type with size > 0. Got: " + inputType); } @@ -64,7 +82,7 @@ public abstract class BaseRecurrentLayer extends FeedForwardLayer { @Override public void setNIn(InputType inputType, boolean override) { if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for RNN layer (layer name = \"" + getLayerName() + throw new IllegalStateException("Invalid input for RNN layer (layer name = \"" + getName() + "\"): expect RNN input type with size > 0. Got: " + inputType); } @@ -73,49 +91,17 @@ public abstract class BaseRecurrentLayer extends FeedForwardLayer { this.nIn = r.getSize(); } - if(rnnDataFormat == null || override) - this.rnnDataFormat = r.getFormat(); + if(dataFormat == null || override) + this.dataFormat = r.getFormat(); } @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat,getLayerName()); + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, dataFormat, getName()); } - @NoArgsConstructor - @Getter - @Setter - public static abstract class Builder> extends FeedForwardLayer.Builder { - - /** - * Set the format of data expected by the RNN. NCW = [miniBatchSize, size, timeSeriesLength], - * NWC = [miniBatchSize, timeSeriesLength, size]. Defaults to NCW. - */ - protected RNNFormat rnnDataFormat = RNNFormat.NCW; - - /** - * Set constraints to be applied to the RNN recurrent weight parameters of this layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, - * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have - * been updated. - */ - protected List recurrentConstraints; - - /** - * Set constraints to be applied to the RNN input weight parameters of this layer. Default: no constraints.
- * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm regularization, - * etc). These constraints are applied at each iteration, after the parameters have been updated. - * - */ - protected List inputWeightConstraints; - - /** - * Set the weight initialization for the recurrent weights. Not that if this is not set explicitly, the same - * weight initialization as the layer input weights is also used for the recurrent weights. - * - */ - protected IWeightInit weightInitFnRecurrent; - + public static abstract class BaseRecurrentLayerBuilder> + extends FeedForwardLayerBuilder { /** * Set constraints to be applied to the RNN recurrent weight parameters of this layer. Default: no * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, @@ -124,9 +110,9 @@ public abstract class BaseRecurrentLayer extends FeedForwardLayer { * * @param constraints Constraints to apply to the recurrent weight parameters of this layer */ - public T constrainRecurrent(LayerConstraint... constraints) { - this.setRecurrentConstraints(Arrays.asList(constraints)); - return (T) this; + public B constrainRecurrent(LayerConstraint... constraints) { + this.recurrentConstraints(Arrays.asList(constraints)); + return self(); } /** @@ -136,53 +122,9 @@ public abstract class BaseRecurrentLayer extends FeedForwardLayer { * * @param constraints Constraints to apply to the input weight parameters of this layer */ - public T constrainInputWeights(LayerConstraint... constraints) { - this.setInputWeightConstraints(Arrays.asList(constraints)); - return (T) this; - } - - /** - * Set the weight initialization for the recurrent weights. Not that if this is not set explicitly, the same - * weight initialization as the layer input weights is also used for the recurrent weights. - * - * @param weightInit Weight initialization for the recurrent weights only. - */ - public T weightInitRecurrent(IWeightInit weightInit) { - this.setWeightInitFnRecurrent(weightInit); - return (T) this; - } - - /** - * Set the weight initialization for the recurrent weights. Not that if this is not set explicitly, the same - * weight initialization as the layer input weights is also used for the recurrent weights. - * - * @param weightInit Weight initialization for the recurrent weights only. - */ - public T weightInitRecurrent(WeightInit weightInit) { - if (weightInit == WeightInit.DISTRIBUTION) { - throw new UnsupportedOperationException( - "Not supported!, Use weightInit(Distribution distribution) instead!"); - } - - this.setWeightInitFnRecurrent(weightInit.getWeightInitFunction()); - return (T) this; - } - - /** - * Set the weight initialization for the recurrent weights, based on the specified distribution. Not that if - * this is not set explicitly, the same weight initialization as the layer input weights is also used for the - * recurrent weights. - * - * @param dist Distribution to use for initializing the recurrent weights - */ - public T weightInitRecurrent(Distribution dist) { - this.setWeightInitFnRecurrent(new WeightInitDistribution(dist)); - return (T) this; - } - - public T dataFormat(RNNFormat rnnDataFormat){ - this.rnnDataFormat = rnnDataFormat; - return (T)this; + public B constrainInputWeights(LayerConstraint... constraints) { + this.inputWeightConstraints(Arrays.asList(constraints)); + return self(); } } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java index 07220f89e..d40ac019c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -30,18 +31,18 @@ import org.deeplearning4j.nn.conf.inputs.InputType; * @author Max Pumperla */ -@Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder() public abstract class BaseUpsamplingLayer extends NoParamLayer { - protected int[] size; - - protected BaseUpsamplingLayer(UpsamplingBuilder builder) { - super(builder); - this.size = builder.size; - } + /** + * An int array to specify upsampling dimensions, the length of which has to equal to the number of spatial + * dimensions (e.g. 2 for Upsampling2D etc.) + * + */ + @Builder.Default @Getter + protected int[] size = new int[] {1}; @Override public BaseUpsamplingLayer clone() { @@ -52,43 +53,9 @@ public abstract class BaseUpsamplingLayer extends NoParamLayer { @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType == null) { - throw new IllegalStateException("Invalid input for Upsampling layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for Upsampling layer (layer name=\"" + getName() + "\"): input is null"); } - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); } - - - @NoArgsConstructor - @Getter - @Setter - protected static abstract class UpsamplingBuilder> extends LayerConfiguration.Builder { - - /** - * An int array to specify upsampling dimensions, the length of which has to equal to the number of spatial - * dimensions (e.g. 2 for Upsampling2D etc.) - * - */ - protected int[] size = new int[] {1}; - - /** - * A single size integer is used for upsampling in all spatial dimensions - * - * @param size int for upsampling - */ - protected UpsamplingBuilder(int size) { - this.setSize(new int[] {size}); - } - - /** - * An int array to specify upsampling dimensions, the length of which has to equal to the number of spatial - * dimensions (e.g. 2 for Upsampling2D etc.) - * - * @param size int for upsampling - */ - protected UpsamplingBuilder(int[] size) { - this.setSize(size); - } - } - } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java index 5e266afb2..095d5b3bd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java @@ -20,11 +20,15 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.List; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -41,45 +45,120 @@ import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Map; - @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@Builder +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class BatchNormalization extends FeedForwardLayer { + + /** + * At test time: we can use a global estimate of the mean and variance, calculated using a moving average of the + * batch means/variances. This moving average is implemented as:
globalMeanEstimate = decay * + * globalMeanEstimate + (1-decay) * batchMean
globalVarianceEstimate = decay * globalVarianceEstimate + + * (1-decay) * batchVariance
+ * + * @param decay Decay value to use for global stats calculation + */ + @lombok.Builder.Default + protected double decay = 0.9; //Note: need to set defaults here in addition to builder, in case user uses no-op constructor... - protected double decay = 0.9; - protected double eps = 1e-5; - protected boolean isMinibatch = true; - protected double gamma = 1.0; - protected double beta = 0.0; - protected boolean lockGammaBeta = false; - protected boolean cudnnAllowFallback = true; - protected boolean useLogStd = false; //Default for deserialized models (1.0.0-beta3) and earlier: store variance as variance. Post 1.0.0-beta3: use log stdev instead - protected CNN2DFormat cnn2DFormat = CNN2DFormat.NCHW; //Default for deserialized models, 1.0.0-beta6 and earlier + /** + * Epsilon value for batch normalization; small floating point value added to variance (algorithm 1 in https://arxiv.org/pdf/1502.03167v3.pdf) to reduce/avoid + * underflow issues.
Default: 1e-5 + * + * @param eps Epsilon values to use + */ + @lombok.Builder.Default protected double eps = 1e-5; + /** + * If doing minibatch training or not. Default: true. Under most circumstances, this should be set to true. If + * doing full batch training (i.e., all examples in a single DataSet object - very small data sets) then this + * should be set to false. Affects how global mean/variance estimates are calculated. + * + * @param minibatch Minibatch parameter + */ + @lombok.Builder.Default protected boolean isMinibatch = true; - private BatchNormalization(Builder builder) { - super(builder); - this.setType(LayerType.BN); - this.decay = builder.decay; - this.eps = builder.eps; - this.isMinibatch = builder.isMinibatch; - this.gamma = builder.gamma; - this.beta = builder.beta; - this.lockGammaBeta = builder.lockGammaBeta; - this.cudnnAllowFallback = builder.cudnnAllowFallback; - this.useLogStd = builder.useLogStd; - this.cnn2DFormat = builder.cnn2DFormat; - initializeConstraints(builder); + /** + * Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}. Value is not used otherwise.
Default: + * 1.0 + * + * @param gamma Gamma parameter for all activations, used only with locked gamma/beta configuration mode + */ + @lombok.Builder.Default protected double gamma = 1.0; + /** + * Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}. Value is not used otherwise.
Default: + * 0.0 + * + * @param beta Beta parameter for all activations, used only with locked gamma/beta configuration mode + */ + @lombok.Builder.Default protected double beta = 0.0; + /** + * Set constraints to be applied to the beta parameter of this batch normalisation layer. Default: no + * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, + * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have + * been updated. + * + */ + protected List betaConstraints; + + /** + * Set constraints to be applied to the gamma parameter of this batch normalisation layer. Default: no + * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, + * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have + * been updated. + * + */ + protected List gammaConstraints; + + + /** + * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed? + * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in + * (non-MKL/CuDNN) implementation for BatchNormalizationLayer will be used + * + * @param allowFallback Whether fallback to non-CuDNN implementation should be used + */ + @lombok.Builder.Default protected boolean cudnnAllowFallback = true; + /** + * How should the moving average of variance be stored? Two different parameterizations are supported. + * useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is stored directly as + * variable
useLogStd(true): (Default) variance is stored as log10(stdev)
The motivation here is for + * numerical stability (FP16 etc) and also distributed training: storing the variance directly can cause + * numerical issues. For example, a standard deviation of 1e-3 (something that could be encountered in practice) + * gives a variance of 1e-6, which can be problematic for 16-bit floating point +* + * How should the moving average of variance be stored? Two different parameterizations are supported. + * useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is stored directly as + * variable
useLogStd(true): (Default) variance is stored as log10(stdev)
The motivation here is for + * numerical stability (FP16 etc) and also distributed training: storing the variance directly can cause + * numerical issues. For example, a standard deviation of 1e-3 (something that could be encountered in practice) + * gives a variance of 1e-6, which can be problematic for 16-bit floating point + */ + @lombok.Builder.Default protected boolean useLogStd = false; //Default for deserialized models (1.0.0-beta3) and earlier: store variance as variance. Post 1.0.0-beta3: use log stdev instead + /** + * Set the input and output array data format. Defaults to NCHW format - i.e., channels first. + * See {@link CNN2DFormat} for more details + * @param format Format to use + */ + @lombok.Builder.Default protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; //Default for deserialized models, 1.0.0-beta6 and earlier + + private boolean lockGammaBeta; + + public static BatchNormalizationBuilder builder() { + return innerBuilder(); } - public BatchNormalization() { - this(new Builder()); //Defaults from builder + public static BatchNormalizationBuilder builder(double gamma, double beta) { + return innerBuilder() + .gamma(gamma) + .beta(beta); + } + + public static BatchNormalizationBuilder builder(boolean lockGammaBeta) { + return innerBuilder() + .lockGammaBeta(lockGammaBeta); } @Override @@ -92,7 +171,7 @@ public class BatchNormalization extends FeedForwardLayer { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { this.setNetConfiguration(conf); - LayerValidation.assertNOutSet("BatchNormalization", getLayerName(), layerIndex, getNOut()); + LayerValidation.assertNOutSet("BatchNormalization", getName(), layerIndex, getNOut()); runInheritance(); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); @@ -112,13 +191,12 @@ public class BatchNormalization extends FeedForwardLayer { return BatchNormalizationParamInitializer.getInstance(); } - @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null) { throw new IllegalStateException( "Invalid input type: Batch norm layer expected input of type CNN, got null for layer \"" - + getLayerName() + "\""); + + getName() + "\""); } //Can handle CNN, flat CNN, CNN3D or FF input formats only @@ -132,7 +210,7 @@ public class BatchNormalization extends FeedForwardLayer { throw new IllegalStateException( "Invalid input type: Batch norm layer expected input of type CNN, CNN Flat or FF, got " + inputType + " for layer index " + layerIndex + ", layer name = " - + getLayerName()); + + getName()); } } @@ -145,7 +223,7 @@ public class BatchNormalization extends FeedForwardLayer { break; case CNN: nIn = ((InputType.InputTypeConvolutional) inputType).getChannels(); - cnn2DFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); break; case CNN3D: nIn = ((InputType.InputTypeConvolutional3D) inputType).getChannels(); @@ -155,7 +233,7 @@ public class BatchNormalization extends FeedForwardLayer { default: throw new IllegalStateException( "Invalid input type: Batch norm layer expected input of type CNN, CNN Flat or FF, got " - + inputType + " for layer " + getLayerName() + "\""); + + inputType + " for layer " + getName() + "\""); } nOut = nIn; } @@ -184,7 +262,7 @@ public class BatchNormalization extends FeedForwardLayer { switch (paramName) { case BatchNormalizationParamInitializer.BETA: case BatchNormalizationParamInitializer.GAMMA: - return iUpdater; + return getUpdater(); case BatchNormalizationParamInitializer.GLOBAL_MEAN: case BatchNormalizationParamInitializer.GLOBAL_VAR: case BatchNormalizationParamInitializer.GLOBAL_LOG_STD: @@ -216,7 +294,7 @@ public class BatchNormalization extends FeedForwardLayer { val trainWorkingSizePerExample = inferenceWorkingSize //Inference during backprop + (outputType.arrayElementsPerExample() + 2 * nOut); //Backprop gradient calculation - return new LayerMemoryReport.Builder(layerName, BatchNormalization.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, BatchNormalization.class, inputType, outputType) .standardMemory(numParams, updaterStateSize) .workingMemory(0, 0, trainWorkFixed, trainWorkingSizePerExample) //No additional memory (beyond activations) for inference .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching @@ -228,268 +306,29 @@ public class BatchNormalization extends FeedForwardLayer { return false; //No pretrain params in BN } - @AllArgsConstructor - @Getter - @Setter - public static class Builder extends FeedForwardLayer.Builder { - - /** - * At test time: we can use a global estimate of the mean and variance, calculated using a moving average of the - * batch means/variances. This moving average is implemented as:
globalMeanEstimate = decay * - * globalMeanEstimate + (1-decay) * batchMean
globalVarianceEstimate = decay * globalVarianceEstimate + - * (1-decay) * batchVariance
- * - */ - protected double decay = 0.9; - - /** - * Epsilon value for batch normalization; small floating point value added to variance (algorithm 1 in https://arxiv.org/pdf/1502.03167v3.pdf) to reduce/avoid - * underflow issues.
Default: 1e-5 - */ - protected double eps = 1e-5; - - /** - * If doing minibatch training or not. Default: true. Under most circumstances, this should be set to true. If - * doing full batch training (i.e., all examples in a single DataSet object - very small data sets) then this - * should be set to false. Affects how global mean/variance estimates are calculated. - * - */ - protected boolean isMinibatch = true; // TODO auto set this if layer conf is batch - - /** - * If set to true: lock the gamma and beta parameters to the values for each activation, specified by {@link - * #gamma(double)} and {@link #beta(double)}. Default: false -> learn gamma and beta parameter values during - * network training. - * - */ - protected boolean lockGammaBeta = false; - - /** - * Used only when 'true' is passed to {@link #lockGammaBeta(boolean)}. Value is not used otherwise.
Default: - * 1.0 - * - */ - protected double gamma = 1.0; - - /** - * Used only when 'true' is passed to {@link #lockGammaBeta(boolean)}. Value is not used otherwise.
Default: - * 0.0 - * - */ - protected double beta = 0.0; - - /** - * Set constraints to be applied to the beta parameter of this batch normalisation layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, - * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have - * been updated. - * - */ - protected List betaConstraints; - - /** - * Set constraints to be applied to the gamma parameter of this batch normalisation layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, - * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have - * been updated. - * - */ - protected List gammaConstraints; - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in - * (non-CuDNN) implementation for BatchNormalization will be used - * - */ - protected boolean cudnnAllowFallback = true; - - /** - * How should the moving average of variance be stored? Two different parameterizations are supported. - * useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is stored directly as - * variable
useLogStd(true): (Default) variance is stored as log10(stdev)
The motivation here is for - * numerical stability (FP16 etc) and also distributed training: storing the variance directly can cause - * numerical issues. For example, a standard deviation of 1e-3 (something that could be encountered in practice) - * gives a variance of 1e-6, which can be problematic for 16-bit floating point - */ - protected boolean useLogStd = true; - - protected CNN2DFormat cnn2DFormat = CNN2DFormat.NCHW; //Default for deserialized models, 1.0.0-beta6 and earlier - - public Builder(double decay, boolean isMinibatch) { - this.setDecay(decay); - this.setMinibatch(isMinibatch); + public static abstract class BatchNormalizationBuilder> extends FeedForwardLayerBuilder { + public C build() { + C l = this.initBuild(); + l.setType(LayerType.BN); + l.initializeConstraints(); + return l; + } + public B helperAllowFallback(boolean b) { + this.cudnnAllowFallback$value = b; + this.cudnnAllowFallback$set = true; + return self(); } - public Builder(double gamma, double beta) { - this.setGamma(gamma); - this.setBeta(beta); + public B constrainBeta(LayerConstraint ... constraints) { + this.betaConstraints = List.of(constraints); + return self(); } - - public Builder(double gamma, double beta, boolean lockGammaBeta) { - this.setGamma(gamma); - this.setBeta(beta); - this.setLockGammaBeta(lockGammaBeta); - } - - public Builder(boolean lockGammaBeta) { - this.setLockGammaBeta(lockGammaBeta); - } - - public Builder() {} - - /** - * Set the input and output array data format. Defaults to NCHW format - i.e., channels first. - * See {@link CNN2DFormat} for more details - * @param format Format to use - */ - public Builder dataFormat(CNN2DFormat format){ - this.cnn2DFormat = format; - return this; - } - - /** - * If doing minibatch training or not. Default: true. Under most circumstances, this should be set to true. If - * doing full batch training (i.e., all examples in a single DataSet object - very small data sets) then this - * should be set to false. Affects how global mean/variance estimates are calculated. - * - * @param minibatch Minibatch parameter - */ - public Builder minibatch(boolean minibatch) { - this.setMinibatch(minibatch); - return this; - } - - /** - * Used only when 'true' is passed to {@link #lockGammaBeta(boolean)}. Value is not used otherwise.
Default: - * 1.0 - * - * @param gamma Gamma parameter for all activations, used only with locked gamma/beta configuration mode - */ - public Builder gamma(double gamma) { - this.setGamma(gamma); - return this; - } - - /** - * Used only when 'true' is passed to {@link #lockGammaBeta(boolean)}. Value is not used otherwise.
Default: - * 0.0 - * - * @param beta Beta parameter for all activations, used only with locked gamma/beta configuration mode - */ - public Builder beta(double beta) { - this.setBeta(beta); - return this; - } - - /** - * Epsilon value for batch normalization; small floating point value added to variance (algorithm 1 in https://arxiv.org/pdf/1502.03167v3.pdf) to reduce/avoid - * underflow issues.
Default: 1e-5 - * - * @param eps Epsilon values to use - */ - public Builder eps(double eps) { - this.setEps(eps); - return this; - } - - /** - * At test time: we can use a global estimate of the mean and variance, calculated using a moving average of the - * batch means/variances. This moving average is implemented as:
globalMeanEstimate = decay * - * globalMeanEstimate + (1-decay) * batchMean
globalVarianceEstimate = decay * globalVarianceEstimate + - * (1-decay) * batchVariance
- * - * @param decay Decay value to use for global stats calculation - */ - public Builder decay(double decay) { - this.setDecay(decay); - return this; - } - - /** - * If set to true: lock the gamma and beta parameters to the values for each activation, specified by {@link - * #gamma(double)} and {@link #beta(double)}. Default: false -> learn gamma and beta parameter values during - * network training. - * - * @param lockGammaBeta If true: use fixed beta/gamma values. False: learn during - */ - public Builder lockGammaBeta(boolean lockGammaBeta) { - this.setLockGammaBeta(lockGammaBeta); - return this; - } - - /** - * Set constraints to be applied to the beta parameter of this batch normalisation layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, - * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have - * been updated. - * - * @param constraints Constraints to apply to the beta parameter of this layer - */ - public Builder constrainBeta(LayerConstraint... constraints) { - this.setBetaConstraints(Arrays.asList(constraints)); - return this; - } - - /** - * Set constraints to be applied to the gamma parameter of this batch normalisation layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, - * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have - * been updated. - * - * @param constraints Constraints to apply to the gamma parameter of this layer - */ - public Builder constrainGamma(LayerConstraint... constraints) { - this.setGammaConstraints(Arrays.asList(constraints)); - return this; - } - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If true, the built-in - * (non-CuDNN) implementation for BatchNormalization will be used - * - * @deprecated Use {@link #helperAllowFallback(boolean)} - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - @Deprecated - public Builder cudnnAllowFallback(boolean allowFallback) { - this.setCudnnAllowFallback(allowFallback); - return this; - } - - /** - * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed? - * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in - * (non-MKL/CuDNN) implementation for BatchNormalizationLayer will be used - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - public Builder helperAllowFallback(boolean allowFallback) { - this.cudnnAllowFallback = allowFallback; - return this; - } - - /** - * How should the moving average of variance be stored? Two different parameterizations are supported. - * useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is stored directly as - * variable
useLogStd(true): (Default) variance is stored as log10(stdev)
The motivation here is for - * numerical stability (FP16 etc) and also distributed training: storing the variance directly can cause - * numerical issues. For example, a standard deviation of 1e-3 (something that could be encountered in practice) - * gives a variance of 1e-6, which can be problematic for 16-bit floating point - */ - public Builder useLogStd(boolean useLogStd) { - this.setUseLogStd(useLogStd); - return this; - } - - @Override - public BatchNormalization build() { - return new BatchNormalization(this); + public B constrainGamma(LayerConstraint ... constraints) { + this.gammaConstraints = List.of(constraints); + return self(); } } + + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java index 05d32dc56..0d13f2b88 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeRecurrent; import org.deeplearning4j.nn.conf.inputs.InputType.Type; @@ -36,53 +38,51 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import java.util.Map; -@Data -@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class CapsuleLayer extends SameDiffLayer { private static final String WEIGHT_PARAM = "weight"; private static final String BIAS_PARAM = "bias"; - - private boolean hasBias = false; - private long inputCapsules = 0; - private long inputCapsuleDimensions = 0; - private int capsules; - private int capsuleDimensions; - private int routings = 3; - - public CapsuleLayer(Builder builder){ - super(builder); - this.hasBias = builder.hasBias; - this.inputCapsules = builder.inputCapsules; - this.inputCapsuleDimensions = builder.inputCapsuleDimensions; - this.capsules = builder.capsules; - this.capsuleDimensions = builder.capsuleDimensions; - this.routings = builder.routings; - - if(capsules <= 0 || capsuleDimensions <= 0 || routings <= 0){ - throw new IllegalArgumentException("Invalid configuration for Capsule ILayer (layer name = \"" - + layerName + "\"):" - + " capsules, capsuleDimensions, and routings must be > 0. Got: " - + capsules + ", " + capsuleDimensions + ", " + routings); - } - - if(inputCapsules < 0 || inputCapsuleDimensions < 0){ - throw new IllegalArgumentException("Invalid configuration for Capsule ILayer (layer name = \"" - + layerName + "\"):" - + " inputCapsules and inputCapsuleDimensions must be >= 0 if set. Got: " - + inputCapsules + ", " + inputCapsuleDimensions); - } - - } + /** + * Sets whether to use bias. False by default. + * @param hasBias + * @return + */ + @Builder.Default @Getter @Setter private boolean hasBias = false; + /** + * Usually inferred automatically. + * @param inputCapsules + * @return + */ + @Builder.Default @Getter @Setter private long inputCapsules = 0; + /** + * Usually inferred automatically. + * @param inputCapsuleDimensions + * @return + */ + @Builder.Default @Getter @Setter private long inputCapsuleDimensions = 0; + /** + * Set the number of capsules to use. + * @param capsules + * @return + */ + @Getter @Setter private int capsules; + @Getter @Setter private int capsuleDimensions; + /** + * Set the number of dynamic routing iterations to use. + * The default is 3 (recommendedded in Dynamic Routing Between Capsules) + * @param routings + * @return + */ + @Builder.Default @Getter @Setter private int routings = 3; @Override public void setNIn(InputType inputType, boolean override) { if(inputType == null || inputType.getType() != Type.RNN) { throw new IllegalStateException("Invalid input for Capsule layer (layer name = \"" - + layerName + "\"): expect RNN input. Got: " + inputType); + + name + "\"): expect RNN input. Got: " + inputType); } if(inputCapsules <= 0 || inputCapsuleDimensions <= 0){ @@ -185,108 +185,65 @@ public class CapsuleLayer extends SameDiffLayer { return InputType.recurrent(capsules, capsuleDimensions); } - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder{ - - private int capsules; - private int capsuleDimensions; - - private int routings = 3; - - private boolean hasBias = false; - - private int inputCapsules = 0; - private int inputCapsuleDimensions = 0; - - public Builder(int capsules, int capsuleDimensions){ - this(capsules, capsuleDimensions, 3); - } - - public Builder(int capsules, int capsuleDimensions, int routings){ - super(); - this.setCapsules(capsules); - this.setCapsuleDimensions(capsuleDimensions); - this.setRoutings(routings); - } - - @Override - public E build() { - return (E) new CapsuleLayer(this); - } - - /** - * Set the number of capsules to use. - * @param capsules - * @return - */ - public Builder capsules(int capsules){ - this.setCapsules(capsules); - return this; - } - - /** - * Set the number dimensions of each capsule - * @param capsuleDimensions - * @return - */ - public Builder capsuleDimensions(int capsuleDimensions){ - this.setCapsuleDimensions(capsuleDimensions); - return this; - } - - /** - * Set the number of dynamic routing iterations to use. - * The default is 3 (recommendedded in Dynamic Routing Between Capsules) - * @param routings - * @return - */ - public Builder routings(int routings){ - this.setRoutings(routings); - return this; - } - - /** - * Usually inferred automatically. - * @param inputCapsules - * @return - */ - public Builder inputCapsules(int inputCapsules){ - this.setInputCapsules(inputCapsules); - return this; - } - - /** - * Usually inferred automatically. - * @param inputCapsuleDimensions - * @return - */ - public Builder inputCapsuleDimensions(int inputCapsuleDimensions){ - this.setInputCapsuleDimensions(inputCapsuleDimensions); - return this; - } - - /** - * Usually inferred automatically. - * @param inputShape - * @return - */ - public Builder inputShape(int... inputShape){ - int[] input = ValidationUtils.validate2NonNegative(inputShape, false, "inputShape"); - this.setInputCapsules(input[0]); - this.setInputCapsuleDimensions(input[1]); - return this; - } - - /** - * Sets whether to use bias. False by default. - * @param hasBias - * @return - */ - public Builder hasBias(boolean hasBias){ - this.setHasBias(hasBias); - return this; - } - + public static CapsuleLayerBuilder builder() { + return innerBuilder() + ; } + public static CapsuleLayerBuilder builder(int capsules, int capsulesDim, int routings) { + return innerBuilder() + .capsules(capsules) + .capsuleDimensions(capsulesDim) + .routings(routings); + } + public static abstract class CapsuleLayerBuilder< + C extends CapsuleLayer, B extends CapsuleLayerBuilder> + extends SameDiffLayerBuilder { + + + /** + * Usually inferred automatically. + * @param inputShape + * @return + */ + public B inputShape(int... inputShape){ + int[] input = ValidationUtils.validate2NonNegative(inputShape, false, "inputShape"); + this.inputCapsules$value = input[0]; + this.inputCapsuleDimensions$value = input[1]; + this.inputCapsules$set = true; + this.inputCapsuleDimensions$set = true; + return self(); + } + + + public C build() { + C l = this.initBuild(); + if (capsules <= 0 || capsuleDimensions <= 0 || routings$value <= 0) { + throw new IllegalArgumentException( + "Invalid configuration for Capsule ILayer (layer name = \"" + + l.getName() + + "\"):" + + " capsules, capsuleDimensions, and routings must be > 0. Got: " + + capsules + + ", " + + capsuleDimensions + + ", " + + routings$value); + } + + if (inputCapsules$value < 0 || inputCapsuleDimensions$value < 0) { + throw new IllegalArgumentException( + "Invalid configuration for Capsule ILayer (layer name = \"" + + l.getName() + + "\"):" + + " inputCapsules and inputCapsuleDimensions must be >= 0 if set. Got: " + + inputCapsules$value + + ", " + + inputCapsuleDimensions$value); + } + + return l; + } + } + + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java index e702b2de1..99980c490 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeRecurrent; import org.deeplearning4j.nn.conf.inputs.InputType.Type; @@ -31,14 +32,10 @@ import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @Data -@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class CapsuleStrengthLayer extends SameDiffLambdaLayer { - public CapsuleStrengthLayer(Builder builder){ - super(); - } - @Override public SDVariable defineLayer(SameDiff SD, SDVariable layerInput) { return SD.norm2("caps_strength", layerInput, 2); @@ -49,18 +46,10 @@ public class CapsuleStrengthLayer extends SameDiffLambdaLayer { if(inputType == null || inputType.getType() != Type.RNN) { throw new IllegalStateException("Invalid input for Capsule Strength layer (layer name = \"" - + layerName + "\"): expect RNN input. Got: " + inputType); + + name + "\"): expect RNN input. Got: " + inputType); } InputTypeRecurrent ri = (InputTypeRecurrent) inputType; return InputType.feedForward(ri.getSize()); } - - public static class Builder extends SameDiffLambdaLayer.Builder{ - - @Override - public E build() { - return (E) new CapsuleStrengthLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java index afe3fcc48..ec95558cf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -41,30 +42,31 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class CenterLossOutputLayer extends BaseOutputLayer { - protected double alpha; - protected double lambda; - protected boolean gradientCheck; + @Builder.Default protected double alpha= 0.805; + @Builder.Default protected double lambda = 2e-4; + @Builder.Default protected boolean gradientCheck = false; - protected CenterLossOutputLayer(Builder builder) { - super(builder); - this.alpha = builder.alpha; - this.lambda = builder.lambda; - this.gradientCheck = builder.gradientCheck; - initializeConstraints(builder); +public static abstract class CenterLossOutputLayerBuilder> extends + BaseOutputLayerBuilder { + public C build() { + C l = initBuild(); + l.initializeConstraints(); + return l; } +} @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { setNetConfiguration(conf); - LayerValidation.assertNInNOutSet("CenterLossOutputLayer", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerValidation.assertNInNOutSet("CenterLossOutputLayer", getName(), layerIndex, getNIn(), getNOut()); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - + runInheritance(); Layer ret = new org.deeplearning4j.nn.layers.training.CenterLossOutputLayer(lconf, networkDataType); ret.addTrainingListeners(trainingListeners.toArray(new TrainingListener[]{})); ret.setIndex(layerIndex); @@ -86,16 +88,9 @@ public class CenterLossOutputLayer extends BaseOutputLayer { if (CenterLossParamInitializer.CENTER_KEY.equals(paramName)) { return new NoOp(); } - return iUpdater; + return getUpdater(); } - public double getAlpha() { - return alpha; - } - - public double getLambda() { - return lambda; - } public boolean getGradientCheck() { return gradientCheck; @@ -117,7 +112,7 @@ public class CenterLossOutputLayer extends BaseOutputLayer { int trainSizeFixed = 0; int trainSizeVariable = 0; - if (getIDropout() != null) { + if (getDropOut() != null) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... @@ -133,53 +128,13 @@ public class CenterLossOutputLayer extends BaseOutputLayer { // then we have 'epsilonNext' which is equivalent to input size trainSizeVariable += outputType.arrayElementsPerExample(); - return new LayerMemoryReport.Builder(layerName, CenterLossOutputLayer.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, CenterLossOutputLayer.class, inputType, outputType) .standardMemory(numParams, updaterStateSize) .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); } - @Getter - @Setter - public static class Builder extends BaseOutputLayer.Builder { - protected double alpha = 0.05; - protected double lambda = 2e-4; - protected boolean gradientCheck = false; - - public Builder(){ - this.setActivationFn(new ActivationSoftmax()); - } - - public Builder(LossFunction lossFunction) { - super.lossFunction(lossFunction); - } - - public Builder(ILossFunction lossFunction) { - this.setLossFn(lossFunction); - } - - public Builder alpha(double alpha) { - this.setAlpha(alpha); - return this; - } - - public Builder lambda(double lambda) { - this.setLambda(lambda); - return this; - } - - public Builder gradientCheck(boolean isGradientCheck) { - this.setGradientCheck(isGradientCheck); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public CenterLossOutputLayer build() { - return new CenterLossOutputLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java index 8ae76bd41..15c31aaf2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -30,131 +33,109 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; -import java.util.Collection; -import java.util.Map; -@Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class Cnn3DLossLayer extends FeedForwardLayer { +@Getter @Setter + protected ILossFunction lossFunction; + /** Format of the input/output data. See {@link Convolution3D.DataFormat} for details */ + @Getter @Setter + protected Convolution3D.DataFormat dataFormat; - protected ILossFunction lossFn; - protected Convolution3D.DataFormat dataFormat; + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); - private Cnn3DLossLayer(Builder builder) { - super(builder); - this.lossFn = builder.lossFn; - this.dataFormat = builder.dataFormat; + org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer ret = + new org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null + || (inputType.getType() != InputType.Type.CNN3D + && inputType.getType() != InputType.Type.CNNFlat)) { + throw new IllegalStateException( + "Invalid input type for CnnLossLayer (layer index = " + + layerIndex + + ", layer name=\"" + + getName() + + "\"): Expected CNN3D or CNNFlat input, got " + + inputType); + } + return inputType; + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getName()); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + // During inference and training: dup the input array. But, this counts as *activations* not + // working memory + return new LayerMemoryReport.Builder(name, getClass(), inputType, inputType) + .standardMemory(0, 0) // No params + .workingMemory(0, 0, 0, 0) + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + // No op + } + + public static abstract class Cnn3DLossLayerBuilder< + C extends Cnn3DLossLayer, B extends Cnn3DLossLayerBuilder> + extends FeedForwardLayerBuilder { + + @Override + public B nIn(long nIn) { + throw new UnsupportedOperationException( + "Cnn3DLossLayer has no parameters, thus nIn will always equal nOut."); } @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - setNetConfiguration(conf); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - runInheritance(); - - org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer ret = - new org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + public B nOut(long nOut) { + throw new UnsupportedOperationException( + "Cnn3DLossLayer has no parameters, thus nIn will always equal nOut."); } - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); + public B nIn(int nIn) { + return nIn((Long.valueOf(nIn))); } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || (inputType.getType() != InputType.Type.CNN3D - && inputType.getType() != InputType.Type.CNNFlat)) { - throw new IllegalStateException("Invalid input type for CnnLossLayer (layer index = " + layerIndex - + ", layer name=\"" + getLayerName() + "\"): Expected CNN3D or CNNFlat input, got " - + inputType); - } - return inputType; - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - //During inference and training: dup the input array. But, this counts as *activations* not working memory - return new LayerMemoryReport.Builder(layerName, getClass(), inputType, inputType).standardMemory(0, 0) //No params - .workingMemory(0, 0, 0, 0) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - //No op - } - - - @Getter - @Setter - public static class Builder extends BaseOutputLayer.Builder { - - /** - * Format of the input/output data. See {@link Convolution3D.DataFormat} for details - */ - protected Convolution3D.DataFormat dataFormat; - - /** - * @param format Format of the input/output data. See {@link Convolution3D.DataFormat} for details - */ - public Builder(@NonNull Convolution3D.DataFormat format) { - this.setDataFormat(format); - this.setActivationFn(Activation.IDENTITY.getActivationFunction()); - } - - @Override - @SuppressWarnings("unchecked") - public Builder nIn(int nIn) { - throw new UnsupportedOperationException( - "Cnn3DLossLayer has no parameters, thus nIn will always equal nOut."); - } - - @Override - @SuppressWarnings("unchecked") - public Builder nOut(int nOut) { - throw new UnsupportedOperationException( - "Cnn3DLossLayer has no parameters, thus nIn will always equal nOut."); - } - - @Override - public void setNIn(long nIn){ - throw new UnsupportedOperationException( - "Cnn3DLossLayer has no parameters, thus nIn will always equal nOut."); - } - - @Override - public void setNOut(long nOut){ - throw new UnsupportedOperationException( - "Cnn3DLossLayer has no parameters, thus nIn will always equal nOut."); - } - - - @Override - @SuppressWarnings("unchecked") - public Cnn3DLossLayer build() { - return new Cnn3DLossLayer(this); - } + public B nOut(int nOut) { + return nOut(Long.valueOf(nOut)); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java index 50e917dac..8c6643d6d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java @@ -20,10 +20,10 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; -import lombok.ToString; +import java.util.Collection; +import java.util.Map; +import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -34,29 +34,19 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; - -import java.util.Collection; -import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class CnnLossLayer extends FeedForwardLayer { - protected ILossFunction lossFn; - protected CNN2DFormat format = CNN2DFormat.NCHW; - - private CnnLossLayer(Builder builder) { - super(builder); - this.lossFn = builder.lossFn; - this.format = builder.format; - } + protected ILossFunction lossFunction; + @Builder.Default + protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, @@ -87,20 +77,20 @@ public class CnnLossLayer extends FeedForwardLayer { && inputType.getType() != InputType.Type.CNNFlat)) { throw new IllegalStateException( "Invalid input type for CnnLossLayer (layer index = " + layerIndex + ", layer name=\"" - + getLayerName() + "\"): Expected CNN or CNNFlat input, got " + inputType); + + getName() + "\"): Expected CNN or CNNFlat input, got " + inputType); } return inputType; } @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); } @Override public LayerMemoryReport getMemoryReport(InputType inputType) { //During inference and training: dup the input array. But, this counts as *activations* not working memory - return new LayerMemoryReport.Builder(layerName, getClass(), inputType, inputType).standardMemory(0, 0) //No params + return new LayerMemoryReport.Builder(name, getClass(), inputType, inputType).standardMemory(0, 0) //No params .workingMemory(0, 0, 0, 0) .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); @@ -109,60 +99,21 @@ public class CnnLossLayer extends FeedForwardLayer { @Override public void setNIn(InputType inputType, boolean override) { if(inputType instanceof InputType.InputTypeConvolutional){ - this.format = ((InputType.InputTypeConvolutional) inputType).getFormat(); + this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); } } - - public static class Builder extends BaseOutputLayer.Builder { - - protected CNN2DFormat format = CNN2DFormat.NCHW; - - public Builder() { - this.activationFn = Activation.IDENTITY.getActivationFunction(); - } - - public Builder(LossFunction lossFunction) { - lossFunction(lossFunction); - } - - public Builder(ILossFunction lossFunction) { - this.lossFn = lossFunction; - } - - public Builder format(CNN2DFormat format){ - this.format = format; - return this; - } - + public static abstract class CnnLossLayerBuilder> extends FeedForwardLayerBuilder { @Override - @SuppressWarnings("unchecked") - public Builder nIn(int nIn) { + public B nIn(long nIn) { throw new UnsupportedOperationException("Ths layer has no parameters, thus nIn will always equal nOut."); } @Override - @SuppressWarnings("unchecked") - public Builder nOut(int nOut) { + public B nOut(long nOut) { throw new UnsupportedOperationException("Ths layer has no parameters, thus nIn will always equal nOut."); } - - @Override - public void setNIn(long nIn){ - throw new UnsupportedOperationException( - "This layer has no parameters, thus nIn will always equal nOut."); - } - - @Override - public void setNOut(long nOut){ - throw new UnsupportedOperationException( - "This layer has no parameters, thus nIn will always equal nOut."); - } - - @Override - @SuppressWarnings("unchecked") - public CnnLossLayer build() { - return new CnnLossLayer(this); - } } + + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java index e4ab7f9ef..ec96d6aef 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java @@ -24,10 +24,11 @@ import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; +import lombok.experimental.SuperBuilder; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class Convolution1D extends Convolution1DLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java index cf4fb5a1a..a82f6c7c5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java @@ -20,10 +20,10 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; -import lombok.ToString; +import java.util.Collection; +import java.util.Map; +import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -36,245 +36,186 @@ import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - +/* +//TODO: We will eventually want to NOT subclass off of ConvolutionLayer. +//Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1 + * This approach treats a multivariate time series with L timesteps and + * P variables as an L x 1 x P image (L rows high, 1 column wide, P + * channels deep). The kernel should be H + * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat dataFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons + /** + * Size of the convolution + * + * @param kernelSize the length of the kernel + */ + @Builder.Default private int[] kernelSize = new int[] {1, 1}; + /** + * Stride for the convolution. Must be > 0 + * + * @param stride Stride + */ + @Builder.Default private int[] stride = new int[] {1, 1}; + /** + * Padding value for the convolution. Not used with {@link + * org.deeplearning4j.nn.conf.ConvolutionMode#Same} + * + * @param padding Padding value + */ + @Builder.Default private int[] padding = new int[] {0, 0}; - private Convolution1DLayer(Builder builder) { - super(builder); - initializeConstraints(builder); - this.rnnDataFormat = builder.rnnDataFormat; + private int[] dilation; + + public static Convolution1DLayerBuilder builder() { + return innerBuilder(); + } + + public static Convolution1DLayerBuilder builder(int kernelSize) { + return innerBuilder().kernelSize(kernelSize); + } + + public static Convolution1DLayerBuilder builder(int kernelSize, int stride, int padding) { + return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding); + } + + public static Convolution1DLayerBuilder builder(int kernelSize, int stride) { + return innerBuilder().kernelSize(kernelSize).stride(stride); + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + setNetConfiguration(conf); + LayerValidation.assertNInNOutSet( + "Convolution1DLayer", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.Convolution1DLayer ret = + new org.deeplearning4j.nn.layers.convolution.Convolution1DLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for 1D CNN layer (layer index = " + + layerIndex + + ", layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); + } + InputType.InputTypeRecurrent it = (InputType.InputTypeRecurrent) inputType; + long inputTsLength = it.getTimeSeriesLength(); + long outLength; + if (inputTsLength < 0) { + // Probably: user did InputType.recurrent(x) without specifying sequence length + outLength = -1; + } else { + outLength = + Convolution1DUtils.getOutputSize( + inputTsLength, kernelSize[0], stride[0], padding[0], getConvolutionMode(), dilation[0]); } - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - setNetConfiguration(conf); - LayerValidation.assertNInNOutSet("Convolution1DLayer", getLayerName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - org.deeplearning4j.nn.layers.convolution.Convolution1DLayer ret = - new org.deeplearning4j.nn.layers.convolution.Convolution1DLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + return InputType.recurrent(nOut, outLength, rnnDataFormat); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for 1D CNN layer (layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for 1D CNN layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect RNN input type with size > 0. Got: " - + inputType); - } - InputType.InputTypeRecurrent it = (InputType.InputTypeRecurrent) inputType; - long inputTsLength = it.getTimeSeriesLength(); - long outLength; - if (inputTsLength < 0) { - //Probably: user did InputType.recurrent(x) without specifying sequence length - outLength = -1; - } else { - outLength = Convolution1DUtils.getOutputSize(inputTsLength, kernelSize[0], stride[0], padding[0], - convolutionMode, dilation[0]); - } + InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; + if (nIn <= 0 || override) { + this.nIn = r.getSize(); + } + if (this.rnnDataFormat == null || override) this.rnnDataFormat = r.getFormat(); - return InputType.recurrent(nOut, outLength, rnnDataFormat); + if (this.dataFormat == null || override) + this.dataFormat = rnnDataFormat == RNNFormat.NCW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC; + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for Convolution1D layer (layer name=\"" + + getName() + + "\"): input is null"); } - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for 1D CNN layer (layer name = \"" + getLayerName() - + "\"): expect RNN input type with size > 0. Got: " + inputType); - } + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat, getName()); + } - InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; - if (nIn <= 0 || override) { - this.nIn = r.getSize(); - } - if(this.rnnDataFormat == null || override) - this.rnnDataFormat = r.getFormat(); + protected boolean allowCausal() { + return true; + } - if(this.cnn2dDataFormat == null || override) - this.cnn2dDataFormat = rnnDataFormat == RNNFormat.NCW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC; + public static abstract class Convolution1DLayerBuilder< + C extends ConvolutionLayer, B extends Convolution1DLayerBuilder> + extends ConvolutionLayerBuilder { + public C build() { + C l = initBuild(); + ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), padding$value); + ConvolutionUtils.validateCnnKernelStridePadding( + kernelSize$value, stride$value, padding$value); + l.initializeConstraints(); + return l; } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input for Convolution1D layer (layer name=\"" + getLayerName() - + "\"): input is null"); - } - - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat,getLayerName()); + public B kernelSize(int @NonNull ... kernelSize) { + this.kernelSize$value[0] = ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]; + this.kernelSize$set = true; + return self(); } - public static class Builder extends ConvolutionLayer.BaseConvBuilder { - - private RNNFormat rnnDataFormat = RNNFormat.NCW; - - public Builder() { - this(0, 1, 0); - this.setKernelSize((int[]) null); - } - - @Override - protected boolean allowCausal() { - return true; - } - - - public Builder rnnDataFormat(RNNFormat rnnDataFormat) { - this.rnnDataFormat = rnnDataFormat; - return this; - } - /** - * @param kernelSize Kernel size - * @param stride Stride - */ - public Builder(int kernelSize, int stride) { - this(kernelSize, stride, 0); - } - - /** - * Constructor with specified kernel size, stride of 1, padding of 0 - * - * @param kernelSize Kernel size - */ - public Builder(int kernelSize) { - this(kernelSize, 1, 0); - } - - /** - * @param kernelSize Kernel size - * @param stride Stride - * @param padding Padding - */ - public Builder(int kernelSize, int stride, int padding) { - this.kernelSize = new int[] {1, 1}; - this.stride = new int[] {1, 1}; - this.padding = new int[] {0, 0}; - - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - } - - /** - * Size of the convolution - * - * @param kernelSize the length of the kernel - */ - public Builder kernelSize(int kernelSize) { - this.setKernelSize(kernelSize); - return this; - } - - /** - * Stride for the convolution. Must be > 0 - * - * @param stride Stride - */ - public Builder stride(int stride) { - this.setStride(stride); - return this; - } - - /** - * Padding value for the convolution. Not used with {@link org.deeplearning4j.nn.conf.ConvolutionMode#Same} - * - * @param padding Padding value - */ - public Builder padding(int padding) { - this.setPadding(padding); - return this; - } - - @Override - public void setKernelSize(int... kernelSize) { - - if(kernelSize == null){ - this.kernelSize = null; - return; - } - - if(this.kernelSize == null){ - this.kernelSize = new int[] {1, 1}; - } - - this.kernelSize[0] = ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]; - } - - @Override - public void setStride(int... stride) { - - if(stride == null){ - this.stride = null; - return; - } - - if(this.stride == null){ - this.stride = new int[] {1, 1}; - } - - this.stride[0] = ValidationUtils.validate1NonNegative(stride, "stride")[0]; - } - - @Override - public void setPadding(int... padding) { - - if(padding == null){ - this.padding = null; - return; - } - - if(this.padding == null){ - this.padding = new int[] {0, 0}; - } - - this.padding[0] = ValidationUtils.validate1NonNegative(padding, "padding")[0]; - } - - @Override - public void setDilation(int... dilation) { - - if(dilation == null){ - this.dilation = null; - return; - } - - if(this.dilation == null){ - this.dilation = new int[] {1, 1}; - } - - this.dilation[0] = ValidationUtils.validate1NonNegative(dilation, "dilation")[0]; - } - - @Override - @SuppressWarnings("unchecked") - public Convolution1DLayer build() { - ConvolutionUtils.validateConvolutionModePadding(convolutionMode, padding); - ConvolutionUtils.validateCnnKernelStridePadding(kernelSize, stride, padding); - - return new Convolution1DLayer(this); - } + public B padding(int @NonNull ... padding) { + this.padding$value[0] = ValidationUtils.validate1NonNegative(padding, "padding")[0]; + this.padding$set = true; + return self(); } + + public B dilation(int @NonNull ... dilation) { + this.dilation[0] = ValidationUtils.validate1NonNegative(dilation, "dilation")[0]; + return self(); + } + + public B stride(int @NonNull ... stride) { + this.stride$value[0] = ValidationUtils.validate1NonNegative(stride, "stride")[0]; + this.stride$set = true; + return self(); + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java index 865c6b9ed..49d2750a1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java @@ -24,10 +24,12 @@ import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; +import lombok.experimental.SuperBuilder; @Data -@NoArgsConstructor + @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class Convolution2D extends ConvolutionLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java index 99992463a..7f02d91f1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java @@ -20,12 +20,13 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.layers.convolution.Convolution3DLayer; import org.deeplearning4j.nn.params.Convolution3DParamInitializer; @@ -36,267 +37,240 @@ import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild") public class Convolution3D extends ConvolutionLayer { - /** - * An optional dataFormat: "NDHWC" or "NCDHW". Defaults to "NCDHW".
The data format of the input and output - * data.
For "NCDHW" (also known as 'channels first' format), the data storage order is: [batchSize, - * inputChannels, inputDepth, inputHeight, inputWidth].
For "NDHWC" ('channels last' format), the data is stored - * in the order of: [batchSize, inputDepth, inputHeight, inputWidth, inputChannels]. - */ - public enum DataFormat { - NCDHW, NDHWC + /** + * Set dilation size for 3D convolutions in (depth, height, width) order + * + * @param dilation kernel size + */ + private int[] dilation; + /** + * Set padding size for 3D convolutions in (depth, height, width) order + * + * @param padding kernel size + */ + private int[] padding; + + private int[] stride; + + @Builder.Default + private ConvolutionMode mode = ConvolutionMode.Same; // in libnd4j: 0 - same mode, 1 - valid mode + + @Builder.Default + private DataFormat dataFormat = DataFormat.NCDHW; // in libnd4j: 1 - NCDHW, 0 - NDHWC + /** + * Set kernel size for 3D convolutions in (depth, height, width) order + * + * @param kernelSize kernel size + */ + private int[] kernelSize; + + @Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + + public static Convolution3DBuilder builder() { + return innerBuilder() + .kernelSize(new int[] {2, 2, 2}) + .stride(new int[] {1, 1, 1}) + .padding(new int[] {0, 0, 0}) + .dilation(new int[] {1, 1, 1}) + .convolutionDim(3); + } + + public static Convolution3DBuilder builder( + int[] kernelSize, int[] stride, int[] padding, int[] dilation) { + return innerBuilder() + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + .dilation(dilation) + .convolutionDim(3); + } + + public static Convolution3DBuilder builder( + int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + .convolutionDim(3); + } + + + + public static Convolution3DBuilder builder(int[] kernelSize, int[] stride) { + return innerBuilder().kernelSize(kernelSize).stride(stride).convolutionDim(3); + } + + public static Convolution3DBuilder builder(int... kernelSize) { + return innerBuilder().kernelSize(kernelSize).convolutionDim(3); + } + + /** + * 3-dimensional convolutional layer configuration nIn in the input layer is the number of + * channels nOut is the number of filters to be used in the net or in other words the depth The + * builder specifies the filter/kernel size, the stride and padding The pooling layer takes the + * kernel size + */ + public boolean hasBias() { + return isHasBias(); + } + + @Override + public Convolution3D clone() { + Convolution3D clone = (Convolution3D) super.clone(); + if (clone.kernelSize != null) { + clone.kernelSize = clone.kernelSize.clone(); + } + if (clone.stride != null) { + clone.stride = clone.stride.clone(); + } + if (clone.padding != null) { + clone.padding = clone.padding.clone(); + } + if (clone.dilation != null) { + clone.dilation = clone.dilation.clone(); + } + return clone; + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection iterationListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerValidation.assertNInNOutSet("Convolution3D", getName(), layerIndex, getNIn(), getNOut()); + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + + Convolution3DLayer ret = new Convolution3DLayer(lconf, networkDataType); + ret.addTrainingListeners(iterationListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return Convolution3DParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { + throw new IllegalStateException( + "Invalid input for Convolution3D layer (layer name=\"" + + getName() + + "\"): Expected CNN3D input, got " + + inputType); + } + return InputTypeUtil.getOutputTypeCnn3DLayers( + inputType, + dataFormat, + kernelSize, + stride, + padding, + dilation, + convolutionMode, + nOut, + layerIndex, + getName(), + Convolution3DLayer.class); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for Convolution3D layer (layer name=\"" + + getName() + + "\"): input is null"); } - private ConvolutionMode mode = ConvolutionMode.Same; // in libnd4j: 0 - same mode, 1 - valid mode - private DataFormat dataFormat = DataFormat.NCDHW; // in libnd4j: 1 - NCDHW, 0 - NDHWC + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getName()); + } - /** - * 3-dimensional convolutional layer configuration nIn in the input layer is the number of channels nOut is the - * number of filters to be used in the net or in other words the depth The builder specifies the filter/kernel size, - * the stride and padding The pooling layer takes the kernel size - */ - public Convolution3D(Builder builder) { - super(builder); - this.dataFormat = builder.dataFormat; - this.convolutionMode = builder.convolutionMode; + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { + throw new IllegalStateException( + "Invalid input for Convolution 3D layer (layer name=\"" + + getName() + + "\"): Expected CNN3D input, got " + + inputType); } - public boolean hasBias() { - return hasBias; + if (nIn <= 0 || override) { + InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; + this.nIn = c.getChannels(); + } + } + + protected boolean allowCausal() { + // Causal convolution - allowed for 1D only + return false; + } + + /** + * An optional dataFormat: "NDHWC" or "NCDHW". Defaults to "NCDHW".
+ * The data format of the input and output data.
+ * For "NCDHW" (also known as 'channels first' format), the data storage order is: [batchSize, + * inputChannels, inputDepth, inputHeight, inputWidth].
+ * For "NDHWC" ('channels last' format), the data is stored in the order of: [batchSize, + * inputDepth, inputHeight, inputWidth, inputChannels]. + */ + public enum DataFormat { + NCDHW, + NDHWC + } + + // public Builder(int[] kernelSize, int[] stride, int[] padding, int[] dilation) { + // sup/er(kernelSize, stride, padding, dilation, 3); + public static abstract class Convolution3DBuilder< + C extends Convolution3D, B extends Convolution3DBuilder> + extends ConvolutionLayer.ConvolutionLayerBuilder { + public C build() { + ConvolutionUtils.validateConvolutionModePadding(convolutionMode$value, padding); + Convolution3DUtils.validateCnn3DKernelStridePadding(kernelSize, stride, padding); + C l = initBuild(); + return l; } + @Override // TODO we can use the parent builder and do not need to redefine the variables. + // Validation can be done in override function! + public B stride(int... stride) { + this.stride = ValidationUtils.validate3NonNegative(stride, "stride"); + return self(); + } - @Override - public Convolution3D clone() { - Convolution3D clone = (Convolution3D) super.clone(); - if (clone.kernelSize != null) { - clone.kernelSize = clone.kernelSize.clone(); - } - if (clone.stride != null) { - clone.stride = clone.stride.clone(); - } - if (clone.padding != null) { - clone.padding = clone.padding.clone(); - } - if (clone.dilation != null) { - clone.dilation = clone.dilation.clone(); - } - return clone; + public B kernelSize(int... kernelSize) { + this.kernelSize = ValidationUtils.validate3NonNegative(kernelSize, "kernelSize"); + return self(); } @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("Convolution3D", getLayerName(), layerIndex, getNIn(), getNOut()); - - - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - Convolution3DLayer ret = new Convolution3DLayer(lconf, networkDataType); - ret.addTrainingListeners(iterationListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + public B dilation(int... dilation) { + this.dilation = ValidationUtils.validate3NonNegative(dilation, "dilation"); + return self(); } @Override - public ParamInitializer initializer() { - return Convolution3DParamInitializer.getInstance(); - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { - throw new IllegalStateException("Invalid input for Convolution3D layer (layer name=\"" + getLayerName() - + "\"): Expected CNN3D input, got " + inputType); - } - return InputTypeUtil.getOutputTypeCnn3DLayers(inputType, dataFormat, kernelSize, stride, padding, dilation, convolutionMode, - nOut, layerIndex, getLayerName(), Convolution3DLayer.class); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input for Convolution3D layer (layer name=\"" + getLayerName() - + "\"): input is null"); - } - - return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); + public B padding(int... padding) { + this.padding = ValidationUtils.validate3NonNegative(padding, "padding"); + return self(); } - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { - throw new IllegalStateException("Invalid input for Convolution 3D layer (layer name=\"" + getLayerName() - + "\"): Expected CNN3D input, got " + inputType); - } - - if (nIn <= 0 || override) { - InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; - this.nIn = c.getChannels(); - } - } - - @AllArgsConstructor - @Getter - @Setter - public static class Builder extends ConvolutionLayer.BaseConvBuilder { - - /** - * The data format for input and output activations.
NCDHW: activations (in/out) should have shape - * [minibatch, channels, depth, height, width]
NDHWC: activations (in/out) should have shape [minibatch, - * depth, height, width, channels]
- */ - private DataFormat dataFormat = DataFormat.NCDHW; - - public Builder() { - super(new int[] {2, 2, 2}, new int[] {1, 1, 1}, new int[] {0, 0, 0}, new int[] {1, 1, 1}, 3); - } - - @Override - protected boolean allowCausal() { - //Causal convolution - allowed for 1D only - return false; - } - - public Builder(int[] kernelSize, int[] stride, int[] padding, int[] dilation) { - super(kernelSize, stride, padding, dilation, 3); - } - - public Builder(int[] kernelSize, int[] stride, int[] padding) { - this(kernelSize, stride, padding, new int[] {1, 1, 1}); - } - - public Builder(int[] kernelSize, int[] stride) { - this(kernelSize, stride, new int[] {0, 0, 0}); - } - - public Builder(int... kernelSize) { - this(kernelSize, new int[] {1, 1, 1}); - } - - /** - * Set kernel size for 3D convolutions in (depth, height, width) order - * - * @param kernelSize kernel size - * @return 3D convolution layer builder - */ - public Builder kernelSize(int... kernelSize) { - this.setKernelSize(kernelSize); - return this; - } - - /** - * Set stride size for 3D convolutions in (depth, height, width) order - * - * @param stride kernel size - * @return 3D convolution layer builder - */ - public Builder stride(int... stride) { - this.setStride(stride); - return this; - } - - /** - * Set padding size for 3D convolutions in (depth, height, width) order - * - * @param padding kernel size - * @return 3D convolution layer builder - */ - public Builder padding(int... padding) { - this.setPadding(padding); - return this; - } - - /** - * Set dilation size for 3D convolutions in (depth, height, width) order - * - * @param dilation kernel size - * @return 3D convolution layer builder - */ - public Builder dilation(int... dilation) { - this.setDilation(dilation); - return this; - } - - public Builder convolutionMode(ConvolutionMode mode) { - this.setConvolutionMode(mode); - return this; - } - - /** - * The data format for input and output activations.
NCDHW: activations (in/out) should have shape - * [minibatch, channels, depth, height, width]
NDHWC: activations (in/out) should have shape [minibatch, - * depth, height, width, channels]
- * - * @param dataFormat Data format to use for activations - */ - public Builder dataFormat(DataFormat dataFormat) { - this.setDataFormat(dataFormat); - return this; - } - - /** - * Set kernel size for 3D convolutions in (depth, height, width) order - * - * @param kernelSize kernel size - */ - @Override - public void setKernelSize(int... kernelSize) { - this.kernelSize = ValidationUtils.validate3NonNegative(kernelSize, "kernelSize"); - } - - /** - * Set stride size for 3D convolutions in (depth, height, width) order - * - * @param stride kernel size - */ - @Override - public void setStride(int... stride) { - this.stride = ValidationUtils.validate3NonNegative(stride, "stride"); - } - - /** - * Set padding size for 3D convolutions in (depth, height, width) order - * - * @param padding kernel size - */ - @Override - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate3NonNegative(padding, "padding"); - } - - /** - * Set dilation size for 3D convolutions in (depth, height, width) order - * - * @param dilation kernel size - */ - @Override - public void setDilation(int... dilation) { - this.dilation = ValidationUtils.validate3NonNegative(dilation, "dilation"); - } - - - - @Override - @SuppressWarnings("unchecked") - public Convolution3D build() { - ConvolutionUtils.validateConvolutionModePadding(convolutionMode, padding); - Convolution3DUtils.validateCnn3DKernelStridePadding(kernelSize, stride, padding); - - return new Convolution3D(this); - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java index 9ef539ae9..a5915e9ec 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java @@ -20,7 +20,13 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.annotation.JsonIgnore; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; @@ -32,578 +38,420 @@ import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.util.ConvolutionUtils; import org.deeplearning4j.util.ValidationUtils; -import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonIgnore; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; - -@Data -@NoArgsConstructor +/** + * ConvolutionLayer nIn in the input layer is the number of channels nOut is the number of filters + * to be used in the net or in other words the channels The builder specifies the filter/kernel + * size, the stride and padding The pooling layer takes the kernel size + */ @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild") public class ConvolutionLayer extends FeedForwardLayer { + /** + * Size of the convolution rows/columns + * + * @param kernelSize the height and width of the kernel + */ + private @Builder.Default @Getter @Setter int[] kernelSize = new int[] {5, 5}; // Square filter + /** If true (default): include bias parameters in the model. False: no bias. */ + @Builder.Default @Getter @Setter private boolean hasBias = true; + /** + * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more + * details Default is {@link ConvolutionMode}.Truncate. + */ + @Builder.Default @Getter @Setter + private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; - protected boolean hasBias = true; - protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; //Default to truncate here - default for 0.6.0 and earlier networks on JSON deserialization - protected int[] dilation = new int[] {1, 1}; - protected int[] kernelSize; // Square filter - protected int[] stride; // Default is 2. Down-sample by a factor of 2 - protected int[] padding; - protected boolean cudnnAllowFallback = true; - protected CNN2DFormat cnn2dDataFormat = CNN2DFormat.NCHW; //default value for legacy serialization reasons - @JsonIgnore - @EqualsAndHashCode.Exclude - private boolean defaultValueOverriden = false; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default @Getter @Setter + private CNN2DFormat convFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons + + /** + * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated + * convolutions, which are also known as atrous convolutions. + * + *

For more details, see: Yu and Koltun (2014) + * and Chen et al. (2014), as well as + * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions + *
+ */ + @Getter @Setter private @Builder.Default int[] dilation = new int[] {1, 1}; + /** Default is 2. Down-sample by a factor of 2 */ + @Getter @Setter private @Builder.Default int[] stride = new int[] {1, 1}; + + @Getter @Setter private @Builder.Default int[] padding = new int[] {0, 0}; + /** + * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation + * be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If + * false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used + */ + @Getter @Setter @Builder.Default private boolean cudnnAllowFallback = true; + + /** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */ + @Getter @Setter @Builder.Default private AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST; + + @Getter @Setter private FwdAlgo cudnnFwdAlgo; + @Getter @Setter private BwdFilterAlgo cudnnBwdFilterAlgo; + @Getter @Setter private BwdDataAlgo cudnnBwdDataAlgo; + @Getter @Setter @Builder.Default private int convolutionDim = 2; // 2D convolution by default + /** Causal convolution - allowed for 1D only */ + @Builder.Default private boolean allowCausal = false; + + @Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter + private boolean defaultValueOverriden = false; + + public static ConvolutionLayerBuilder builder() { + return innerBuilder(); + } + + public static ConvolutionLayerBuilder builder(int... kernelSize) { + return innerBuilder().kernelSize(kernelSize); + } + + public static ConvolutionLayerBuilder builder(int[] kernelSize, int[] stride) { + return innerBuilder().kernelSize(kernelSize).stride(stride); + } + + public static ConvolutionLayerBuilder builder( + int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding); + } + + public boolean hasBias() { + return hasBias; + } + + @Override + public ConvolutionLayer clone() { + ConvolutionLayer clone = (ConvolutionLayer) super.clone(); + if (clone.kernelSize != null) { + clone.kernelSize = clone.kernelSize.clone(); + } + if (clone.stride != null) { + clone.stride = clone.stride.clone(); + } + if (clone.padding != null) { + clone.padding = clone.padding.clone(); + } + return clone; + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + + LayerValidation.assertNInNOutSet( + "ConvolutionLayer", getName(), layerIndex, getNIn(), getNOut()); + + org.deeplearning4j.nn.layers.convolution.ConvolutionLayer ret = + new org.deeplearning4j.nn.layers.convolution.ConvolutionLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return ConvolutionParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalStateException( + "Invalid input for Convolution layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); + } + + return InputTypeUtil.getOutputTypeCnnLayers( + inputType, + kernelSize, + stride, + padding, + dilation, + convolutionMode, + nOut, + layerIndex, + getName(), + convFormat, + ConvolutionLayer.class); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalStateException( + "Invalid input for Convolution layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); + } + + if (!defaultValueOverriden || nIn <= 0 || override) { + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + this.nIn = c.getChannels(); + this.convFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + } + + if (convFormat == null || override) + this.convFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for Convolution layer (layer name=\"" + getName() + "\"): input is null"); + } + + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + val paramSize = initializer().numParams(this); + val updaterStateSize = (int) getIUpdater().stateSize(paramSize); + + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + InputType.InputTypeConvolutional outputType = + (InputType.InputTypeConvolutional) getOutputType(-1, inputType); + + // TODO convolution helper memory use... (CuDNN etc) + + // During forward pass: im2col array, mmul (result activations), in-place broadcast add + val im2colSizePerEx = + c.getChannels() + * outputType.getHeight() + * outputType.getWidth() + * kernelSize[0] + * kernelSize[1]; + + // During training: have im2col array, in-place gradient calculation, then epsilons... + // But: im2col array may be cached... + Map trainWorkingMemoryPerEx = new HashMap<>(); + Map cachedPerEx = new HashMap<>(); + + // During backprop: im2col array for forward pass (possibly cached) + the epsilon6d array + // required to calculate + // the 4d epsilons (equal size to input) + // Note that the eps6d array is same size as im2col + for (CacheMode cm : CacheMode.values()) { + long trainWorkingSizePerEx; + long cacheMemSizePerEx = 0; + if (cm == CacheMode.NONE) { + trainWorkingSizePerEx = 2 * im2colSizePerEx; + } else { + // im2col is cached, but epsNext2d/eps6d is not + cacheMemSizePerEx = im2colSizePerEx; + trainWorkingSizePerEx = im2colSizePerEx; + } + + if (getDropOut() != null) { + // Dup on the input before dropout, but only for training + trainWorkingSizePerEx += inputType.arrayElementsPerExample(); + } + + trainWorkingMemoryPerEx.put(cm, trainWorkingSizePerEx); + cachedPerEx.put(cm, cacheMemSizePerEx); + } + + return new LayerMemoryReport.Builder(name, ConvolutionLayer.class, inputType, outputType) + .standardMemory(paramSize, updaterStateSize) + // im2col caching -> only variable size caching + .workingMemory( + 0, im2colSizePerEx, MemoryReport.CACHE_MODE_ALL_ZEROS, trainWorkingMemoryPerEx) + .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, cachedPerEx) + .build(); + } + /** + * The "PREFER_FASTEST" mode will pick the fastest algorithm for the specified parameters from the + * {@link FwdAlgo}, {@link BwdFilterAlgo}, and {@link BwdDataAlgo} lists, but they may be very + * memory intensive, so if weird errors occur when using cuDNN, please try the "NO_WORKSPACE" + * mode. Alternatively, it is possible to specify the algorithm manually by setting the + * "USER_SPECIFIED" mode, but this is not recommended. + * + *

Note: Currently only supported with cuDNN. + */ + public enum AlgoMode { + NO_WORKSPACE, + PREFER_FASTEST, + USER_SPECIFIED + } + + /** + * The forward algorithm to use when {@link AlgoMode} is set to "USER_SPECIFIED". + * + *

Note: Currently only supported with cuDNN. + */ + public enum FwdAlgo { + IMPLICIT_GEMM, + IMPLICIT_PRECOMP_GEMM, + GEMM, + DIRECT, + FFT, + FFT_TILING, + WINOGRAD, + WINOGRAD_NONFUSED, + COUNT + } + + /** + * The backward filter algorithm to use when {@link AlgoMode} is set to "USER_SPECIFIED". + * + *

Note: Currently only supported with cuDNN. + */ + public enum BwdFilterAlgo { + ALGO_0, + ALGO_1, + FFT, + ALGO_3, + WINOGRAD, + WINOGRAD_NONFUSED, + FFT_TILING, + COUNT + } + + /** + * The backward data algorithm to use when {@link AlgoMode} is set to "USER_SPECIFIED". + * + *

Note: Currently only supported with cuDNN. + */ + public enum BwdDataAlgo { + ALGO_0, + ALGO_1, + FFT, + FFT_TILING, + WINOGRAD, + WINOGRAD_NONFUSED, + COUNT + } + + public abstract static class ConvolutionLayerBuilder< + C extends ConvolutionLayer, B extends ConvolutionLayerBuilder> + extends FeedForwardLayerBuilder { /** - * The "PREFER_FASTEST" mode will pick the fastest algorithm for the specified parameters from the {@link FwdAlgo}, - * {@link BwdFilterAlgo}, and {@link BwdDataAlgo} lists, but they may be very memory intensive, so if weird errors - * occur when using cuDNN, please try the "NO_WORKSPACE" mode. Alternatively, it is possible to specify the - * algorithm manually by setting the "USER_SPECIFIED" mode, but this is not recommended. - *

- * Note: Currently only supported with cuDNN. + * Set kernel size for 3D convolutions in (depth, height, width) order + * + * @param kernelSize kernel size */ - public enum AlgoMode { - NO_WORKSPACE, PREFER_FASTEST, USER_SPECIFIED + public B kernelSize(int... kernelSize) { + this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"); + this.kernelSize$set = true; + return self(); + } + /** + * Set stride size for 3D convolutions in (depth, height, width) order + * + * @param stride kernel size + */ + public B stride(int... stride) { + this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride"); + this.stride$set = true; + return self(); } /** - * The forward algorithm to use when {@link AlgoMode} is set to "USER_SPECIFIED". - *

- * Note: Currently only supported with cuDNN. + * Set padding size for 3D convolutions in (depth, height, width) order + * + * @param padding kernel size */ - public enum FwdAlgo { - IMPLICIT_GEMM, IMPLICIT_PRECOMP_GEMM, GEMM, DIRECT, FFT, FFT_TILING, WINOGRAD, WINOGRAD_NONFUSED, COUNT + public B padding(int... padding) { + this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding"); + this.padding$set = true; + return self(); + } + /** + * Set dilation size for 3D convolutions in (depth, height, width) order + * + * @param dilation kernel size + */ + public B dilation(int... dilation) { + this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); + this.dilation$set = true; + return self(); + } + + public C build() { + ConvolutionUtils.validateConvolutionModePadding(convolutionMode$value, padding$value); + ConvolutionUtils.validateCnnKernelStridePadding( + kernelSize$value, stride$value, padding$value); + + if (kernelSize$value.length != convolutionDim$value) { + throw new IllegalArgumentException( + "Kernel argument should be a " + + convolutionDim$value + + "d array, got " + + Arrays.toString(kernelSize$value)); + } + + if (stride$value.length != convolutionDim$value) { + throw new IllegalArgumentException( + "Strides argument should be a " + + convolutionDim$value + + "d array, got " + + Arrays.toString(stride$value)); + } + + if (padding$value.length != convolutionDim$value) { + throw new IllegalArgumentException( + "Padding argument should be a " + + convolutionDim$value + + "d array, got " + + Arrays.toString(padding$value)); + } + + if (dilation$value.length != convolutionDim$value) { + throw new IllegalArgumentException( + "Dilation argument should be a " + + convolutionDim$value + + "d array, got " + + Arrays.toString(dilation$value)); + } + + C l = initBuild(); + l.setType(LayerType.CONV); + l.initializeConstraints(); + return l; } /** - * The backward filter algorithm to use when {@link AlgoMode} is set to "USER_SPECIFIED". - *

- * Note: Currently only supported with cuDNN. + * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper + * implementation be allowed? If set to false, an exception in the helper will be propagated + * back to the user. If true, the built-in (non-MKL/CuDNN) implementation for ConvolutionLayer + * will be used + * + * @param allowFallback Whether fallback to non-CuDNN implementation should be used */ - public enum BwdFilterAlgo { - ALGO_0, ALGO_1, FFT, ALGO_3, WINOGRAD, WINOGRAD_NONFUSED, FFT_TILING, COUNT - } - - /** - * The backward data algorithm to use when {@link AlgoMode} is set to "USER_SPECIFIED". - *

- * Note: Currently only supported with cuDNN. - */ - public enum BwdDataAlgo { - ALGO_0, ALGO_1, FFT, FFT_TILING, WINOGRAD, WINOGRAD_NONFUSED, COUNT - } - - /** - * Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. - */ - protected AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST; - protected FwdAlgo cudnnFwdAlgo; - protected BwdFilterAlgo cudnnBwdFilterAlgo; - protected BwdDataAlgo cudnnBwdDataAlgo; - - /** - * ConvolutionLayer nIn in the input layer is the number of channels nOut is the number of filters to be used in the - * net or in other words the channels The builder specifies the filter/kernel size, the stride and padding The - * pooling layer takes the kernel size - */ - protected ConvolutionLayer(BaseConvBuilder builder) { - super(builder); - this.setType(LayerType.CONV); - int dim = builder.convolutionDim; - - this.hasBias = builder.hasBias; - this.convolutionMode = builder.convolutionMode; - this.dilation = builder.dilation; - if (builder.kernelSize.length != dim) { - throw new IllegalArgumentException("Kernel argument should be a " + dim + "d array, got " + Arrays.toString(builder.kernelSize)); - } - this.kernelSize = builder.kernelSize; - if (builder.stride.length != dim) { - throw new IllegalArgumentException("Strides argument should be a " + dim + "d array, got " + Arrays.toString(builder.stride)); - } - this.stride = builder.stride; - if (builder.padding.length != dim) { - throw new IllegalArgumentException("Padding argument should be a " + dim + "d array, got " + Arrays.toString(builder.padding)); - } - this.padding = builder.padding; - if (builder.dilation.length != dim) { - throw new IllegalArgumentException("Dilation argument should be a " + dim + "d array, got " + Arrays.toString(builder.dilation)); - } - this.dilation = builder.dilation; - this.cudnnAlgoMode = builder.cudnnAlgoMode; - this.cudnnFwdAlgo = builder.cudnnFwdAlgo; - this.cudnnBwdFilterAlgo = builder.cudnnBwdFilterAlgo; - this.cudnnBwdDataAlgo = builder.cudnnBwdDataAlgo; - this.cudnnAllowFallback = builder.cudnnAllowFallback; - if(builder instanceof Builder) { - this.cnn2dDataFormat = ((Builder)builder).dataFormat; - } - - initializeConstraints(builder); - } - - public boolean hasBias() { - return hasBias; - } - - @Override - public ConvolutionLayer clone() { - ConvolutionLayer clone = (ConvolutionLayer) super.clone(); - if (clone.kernelSize != null) { - clone.kernelSize = clone.kernelSize.clone(); - } - if (clone.stride != null) { - clone.stride = clone.stride.clone(); - } - if (clone.padding != null) { - clone.padding = clone.padding.clone(); - } - return clone; - } - - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - setNetConfiguration(conf); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - lconf.runInheritance(); - - LayerValidation.assertNInNOutSet("ConvolutionLayer", getLayerName(), layerIndex, getNIn(), getNOut()); - - org.deeplearning4j.nn.layers.convolution.ConvolutionLayer ret = - new org.deeplearning4j.nn.layers.convolution.ConvolutionLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public ParamInitializer initializer() { - return ConvolutionParamInitializer.getInstance(); - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getLayerName() - + "\"): Expected CNN input, got " + inputType); - } - - return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode, - nOut, layerIndex, getLayerName(), cnn2dDataFormat, ConvolutionLayer.class); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getLayerName() - + "\"): Expected CNN input, got " + inputType); - } - - if (!defaultValueOverriden || nIn <= 0 || override) { - InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - this.nIn = c.getChannels(); - this.cnn2dDataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); - } - - if(cnn2dDataFormat == null || override) - this.cnn2dDataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getLayerName() - + "\"): input is null"); - } - - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - val paramSize = initializer().numParams(this); - val updaterStateSize = (int) getIUpdater().stateSize(paramSize); - - InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType); - - //TODO convolution helper memory use... (CuDNN etc) - - //During forward pass: im2col array, mmul (result activations), in-place broadcast add - val im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() * kernelSize[0] - * kernelSize[1]; - - //During training: have im2col array, in-place gradient calculation, then epsilons... - //But: im2col array may be cached... - Map trainWorkingMemoryPerEx = new HashMap<>(); - Map cachedPerEx = new HashMap<>(); - - //During backprop: im2col array for forward pass (possibly cached) + the epsilon6d array required to calculate - // the 4d epsilons (equal size to input) - //Note that the eps6d array is same size as im2col - for (CacheMode cm : CacheMode.values()) { - long trainWorkingSizePerEx; - long cacheMemSizePerEx = 0; - if (cm == CacheMode.NONE) { - trainWorkingSizePerEx = 2 * im2colSizePerEx; - } else { - //im2col is cached, but epsNext2d/eps6d is not - cacheMemSizePerEx = im2colSizePerEx; - trainWorkingSizePerEx = im2colSizePerEx; - } - - if (getIDropout() != null) { - //Dup on the input before dropout, but only for training - trainWorkingSizePerEx += inputType.arrayElementsPerExample(); - } - - trainWorkingMemoryPerEx.put(cm, trainWorkingSizePerEx); - cachedPerEx.put(cm, cacheMemSizePerEx); - } - - return new LayerMemoryReport.Builder(layerName, ConvolutionLayer.class, inputType, outputType) - .standardMemory(paramSize, updaterStateSize) - //im2col caching -> only variable size caching - .workingMemory(0, im2colSizePerEx, MemoryReport.CACHE_MODE_ALL_ZEROS, trainWorkingMemoryPerEx) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, cachedPerEx).build(); - - } - - public static class Builder extends BaseConvBuilder { - - public Builder(int[] kernelSize, int[] stride, int[] padding) { - super(kernelSize, stride, padding); - } - - public Builder(int[] kernelSize, int[] stride) { - super(kernelSize, stride); - } - - public Builder(int... kernelSize) { - super(kernelSize); - } - - public Builder() { - super(); - } - - protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; - - @Override - protected boolean allowCausal() { - //Causal convolution - allowed for 1D only - return false; - } - - /** - * Size of the convolution rows/columns - * - * @param kernelSize the height and width of the kernel - */ - public Builder kernelSize(int... kernelSize) { - this.setKernelSize(kernelSize); - return this; - } - - public Builder stride(int... stride) { - this.setStride(stride); - return this; - } - - public Builder padding(int... padding) { - this.setPadding(padding); - return this; - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param format Format for activations (in and out) - */ - public Builder dataFormat(CNN2DFormat format){ - this.dataFormat = format; - return this; - } - - @Override - @SuppressWarnings("unchecked") - public ConvolutionLayer build() { - ConvolutionUtils.validateConvolutionModePadding(convolutionMode, padding); - ConvolutionUtils.validateCnnKernelStridePadding(kernelSize, stride, padding); - - return new ConvolutionLayer(this); - } - - /** - * Set kernel size for 3D convolutions in (depth, height, width) order - * - * @param kernelSize kernel size - */ - @Override - public void setKernelSize(int... kernelSize) { - this.kernelSize = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"); - } - - /** - * Set stride size for 3D convolutions in (depth, height, width) order - * - * @param stride kernel size - */ - @Override - public void setStride(int... stride) { - this.stride = ValidationUtils.validate2NonNegative(stride, false, "stride"); - } - - /** - * Set padding size for 3D convolutions in (depth, height, width) order - * - * @param padding kernel size - */ - @Override - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate2NonNegative(padding, false, "padding"); - } - - /** - * Set dilation size for 3D convolutions in (depth, height, width) order - * - * @param dilation kernel size - */ - @Override - public void setDilation(int... dilation) { - this.dilation = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); - } - - public void setDataFormat(CNN2DFormat dataFormat){ - this.dataFormat = dataFormat; - } - } - - @Getter - @Setter - public static abstract class BaseConvBuilder> extends FeedForwardLayer.Builder { - - protected int convolutionDim = 2; // 2D convolution by default - - /** - * If true (default): include bias parameters in the model. False: no bias. - * - */ - protected boolean hasBias = true; - - /** - * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more details - * Default is {@link ConvolutionMode}.Truncate. - * - */ - protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; - - /** - * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated convolutions, - * which are also known as atrous convolutions. - *

- * For more details, see: - * Yu and Koltun (2014) and - * Chen et al. (2014), as well as - * - * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
- * - */ - protected int[] dilation = new int[] {1, 1}; - public int[] kernelSize = new int[] {5, 5}; - protected int[] stride = new int[] {1, 1}; - protected int[] padding = new int[] {0, 0}; - - /** - * Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. - */ - protected AlgoMode cudnnAlgoMode = null; - protected FwdAlgo cudnnFwdAlgo; - protected BwdFilterAlgo cudnnBwdFilterAlgo; - protected BwdDataAlgo cudnnBwdDataAlgo; - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in - * (non-CuDNN) implementation for ConvolutionLayer will be used - * - */ - protected boolean cudnnAllowFallback = true; - - - protected BaseConvBuilder(int[] kernelSize, int[] stride, int[] padding, int[] dilation, int dim) { - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - this.setDilation(dilation); - this.setConvolutionDim(dim); - } - - protected BaseConvBuilder(int[] kernelSize, int[] stride, int[] padding, int[] dilation) { - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - this.setDilation(dilation); - } - - protected BaseConvBuilder(int[] kernelSize, int[] stride, int[] padding, int dim) { - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - this.setConvolutionDim(dim); - } - - protected BaseConvBuilder(int[] kernelSize, int[] stride, int[] padding) { - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - } - - protected BaseConvBuilder(int[] kernelSize, int[] stride, int dim) { - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setConvolutionDim(dim); - } - - - protected BaseConvBuilder(int[] kernelSize, int[] stride) { - this.setKernelSize(kernelSize); - this.setStride(stride); - } - - protected BaseConvBuilder(int dim, int... kernelSize) { - this.setKernelSize(kernelSize); - this.setConvolutionDim(dim); - } - - - protected BaseConvBuilder(int... kernelSize) { - this.setKernelSize(kernelSize); - } - - protected BaseConvBuilder() {} - - protected abstract boolean allowCausal(); - - protected void setConvolutionMode(ConvolutionMode convolutionMode){ - Preconditions.checkState(allowCausal() || convolutionMode != ConvolutionMode.Causal, "Causal convolution mode can only be used with 1D" + - " convolutional neural network layers"); - this.convolutionMode = convolutionMode; - } - - - /** - * If true (default): include bias parameters in the model. False: no bias. - * - * @param hasBias If true: include bias parameters in this model - */ - public T hasBias(boolean hasBias) { - this.setHasBias(hasBias); - return (T) this; - } - - /** - * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more details - * - * @param convolutionMode Convolution mode for layer - */ - public T convolutionMode(ConvolutionMode convolutionMode) { - this.setConvolutionMode(convolutionMode); - return (T) this; - } - - /** - * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated convolutions, - * which are also known as atrous convolutions. - *

- * For more details, see: - * Yu and Koltun (2014) and - * Chen et al. (2014), as well as - * - * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
- * - * @param dilation Dilation for kernel - */ - public T dilation(int... dilation) { - this.setDilation(dilation); - return (T) this; - } - - public T kernelSize(int... kernelSize) { - this.setKernelSize(kernelSize); - return (T) this; - } - - public T stride(int... stride) { - this.setStride(stride); - return (T) this; - } - - public T padding(int... padding) { - this.setPadding(padding); - return (T) this; - } - - /** - * Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. - */ - public T cudnnAlgoMode(AlgoMode cudnnAlgoMode) { - this.setCudnnAlgoMode(cudnnAlgoMode); - return (T) this; - } - - public T cudnnFwdMode(FwdAlgo cudnnFwdAlgo) { - this.setCudnnFwdAlgo(cudnnFwdAlgo); - return (T) this; - } - - public T cudnnBwdFilterMode(BwdFilterAlgo cudnnBwdFilterAlgo) { - this.setCudnnBwdFilterAlgo(cudnnBwdFilterAlgo); - return (T) this; - } - - public T cudnnBwdDataMode(BwdDataAlgo cudnnBwdDataAlgo) { - this.setCudnnBwdDataAlgo(cudnnBwdDataAlgo); - return (T) this; - } - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If true, the built-in - * (non-CuDNN) implementation for ConvolutionLayer will be used - * - * @deprecated Use {@link #helperAllowFallback(boolean)} - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - @Deprecated - public T cudnnAllowFallback(boolean allowFallback) { - this.setCudnnAllowFallback(allowFallback); - return (T) this; - } - - /** - * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed? - * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in - * (non-MKL/CuDNN) implementation for ConvolutionLayer will be used - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - public T helperAllowFallback(boolean allowFallback) { - this.cudnnAllowFallback = allowFallback; - return (T) this; - } + public B helperAllowFallback(boolean allowFallback) { + this.cudnnAllowFallback$value = allowFallback; + this.cudnnAllowFallback$set = true; + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java index d805561d0..cd9990875 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java @@ -20,10 +20,8 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; -import lombok.ToString; +import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -40,40 +38,69 @@ import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; import java.util.Map; +/** + * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of filters to be used + * in the net or in other words the channels The builder specifies the filter/kernel size, the stride and padding + * The pooling layer takes the kernel size + */ @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuild") public class Deconvolution2D extends ConvolutionLayer { - /** - * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of filters to be used - * in the net or in other words the channels The builder specifies the filter/kernel size, the stride and padding - * The pooling layer takes the kernel size - */ - protected Deconvolution2D(BaseConvBuilder builder) { - super(builder); - initializeConstraints(builder); - if(builder instanceof Builder){ - this.cnn2dDataFormat = ((Builder) builder).format; - } + +@Builder.Default +private CNN2DFormat format = CNN2DFormat.NCHW; + protected boolean allowCausal() { + //Causal convolution - allowed for 1D only + return false; } + public static abstract class Deconvolution2DBuilder> extends ConvolutionLayerBuilder { + public C build() { + C l = initBuild(); + l.initializeConstraints(); + return l; + } + + + @Override + public B kernelSize(int... kernelSize) { + super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize")); + return self(); + } + @Override + public B stride(int... stride) { + super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride")); + return self(); + } + @Override + public B padding(int... padding) { + super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding")); + return self(); + } + @Override + public B dilation(int... dilation) { + super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation")); + return self(); + } + } public boolean hasBias() { - return hasBias; + return isHasBias(); } @Override public Deconvolution2D clone() { Deconvolution2D clone = (Deconvolution2D) super.clone(); - if (clone.kernelSize != null) { - clone.kernelSize = clone.kernelSize.clone(); + if (clone.getKernelSize() != null) { + clone.setKernelSize( clone.getKernelSize().clone()); } - if (clone.stride != null) { - clone.stride = clone.stride.clone(); + if (clone.getStride() != null) { + clone.setStride( clone.getStride().clone()); } - if (clone.padding != null) { - clone.padding = clone.padding.clone(); + if (clone.getPadding() != null) { + clone.setPadding( clone.getPadding().clone()); } return clone; } @@ -82,8 +109,9 @@ public class Deconvolution2D extends ConvolutionLayer { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { setNetConfiguration(conf); - LayerValidation.assertNInNOutSet("Deconvolution2D", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut()); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret = new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType); @@ -105,98 +133,13 @@ public class Deconvolution2D extends ConvolutionLayer { @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getName() + "\"): Expected CNN input, got " + inputType); } - return InputTypeUtil.getOutputTypeDeconvLayer(inputType, kernelSize, stride, padding, dilation, convolutionMode, - nOut, layerIndex, getLayerName(), Deconvolution2DLayer.class); + return InputTypeUtil.getOutputTypeDeconvLayer(inputType, getKernelSize(), getStride(), getPadding(), getDilation(), getConvolutionMode(), + nOut, layerIndex, getName(), Deconvolution2DLayer.class); } - public static class Builder extends BaseConvBuilder { - - public Builder(int[] kernelSize, int[] stride, int[] padding) { - super(kernelSize, stride, padding); - } - - public Builder(int[] kernelSize, int[] stride) { - super(kernelSize, stride); - } - - public Builder(int... kernelSize) { - super(kernelSize); - } - - public Builder() { - super(); - } - - private CNN2DFormat format = CNN2DFormat.NCHW; - - public Builder dataFormat(CNN2DFormat format){ - this.format = format; - return this; - } - - @Override - protected boolean allowCausal() { - //Causal convolution - allowed for 1D only - return false; - } - - /** - * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more details - * - * @param convolutionMode Convolution mode for layer - */ - public Builder convolutionMode(ConvolutionMode convolutionMode) { - return super.convolutionMode(convolutionMode); - } - - /** - * Size of the convolution rows/columns - * - * @param kernelSize the height and width of the kernel - */ - public Builder kernelSize(int... kernelSize) { - this.setKernelSize(kernelSize); - return this; - } - - public Builder stride(int... stride) { - this.setStride(stride); - return this; - } - - public Builder padding(int... padding) { - this.setPadding(padding); - return this; - } - - @Override - public void setKernelSize(int... kernelSize) { - this.kernelSize = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"); - } - - @Override - public void setStride(int... stride) { - this.stride = ValidationUtils.validate2NonNegative(stride, false,"stride"); - } - - @Override - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate2NonNegative(padding, false, "padding"); - } - - @Override - public void setDilation(int... dilation) { - this.dilation = ValidationUtils.validate2NonNegative(dilation, false,"dilation"); - } - - @Override - public Deconvolution2D build() { - return new Deconvolution2D(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java index ea19c1148..e99ef284d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java @@ -20,10 +20,10 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; -import lombok.ToString; +import java.util.Collection; +import java.util.Map; +import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.ConvolutionMode; @@ -33,176 +33,154 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.layers.convolution.Deconvolution3DLayer; import org.deeplearning4j.nn.params.Deconvolution3DParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - +/** + * Deconvolution3D layer nIn in the input layer is the number of channels nOut is the number of + * filters to be used in the net or in other words the channels The builder specifies the + * filter/kernel size, the stride and padding The pooling layer takes the kernel size + */ @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class Deconvolution3D extends ConvolutionLayer { + /** + * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more + * details + * + * @param convolutionMode Convolution mode for layer + */ + @lombok.Builder.Default + private Convolution3D.DataFormat dataFormat = + Convolution3D.DataFormat.NCDHW; // in libnd4j: 1 - NCDHW, 0 - NDHWC - private Convolution3D.DataFormat dataFormat = Convolution3D.DataFormat.NCDHW; // in libnd4j: 1 - NCDHW, 0 - NDHWC + protected boolean allowCausal() { + // Causal convolution - allowed for 1D only + return false; + } - /** - * Deconvolution3D layer nIn in the input layer is the number of channels nOut is the number of filters to be used - * in the net or in other words the channels The builder specifies the filter/kernel size, the stride and padding - * The pooling layer takes the kernel size - */ - protected Deconvolution3D(Builder builder) { - super(builder); - this.dataFormat = builder.dataFormat; - initializeConstraints(builder); + public boolean hasBias() { + return isHasBias(); + } + + @Override + public Deconvolution3D clone() { + Deconvolution3D clone = (Deconvolution3D) super.clone(); + if (clone.getKernelSize() != null) { + clone.setKernelSize( clone.getKernelSize().clone()); + } + if (clone.getStride() != null) { + clone.setStride( clone.getStride().clone()); + } + if (clone.getPadding() != null) { + clone.setPadding( clone.getPadding().clone()); + } + return clone; + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + Deconvolution3DLayer ret = new Deconvolution3DLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return Deconvolution3DParamInitializer.getInstance(); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for Deconvolution3D layer (layer name=\"" + + getName() + + "\"): input is null"); } - public boolean hasBias() { - return hasBias; + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getName()); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { + throw new IllegalStateException( + "Invalid input for Deconvolution 3D layer (layer name=\"" + + getName() + + "\"): Expected CNN3D input, got " + + inputType); } - @Override - public Deconvolution3D clone() { - Deconvolution3D clone = (Deconvolution3D) super.clone(); - if (clone.kernelSize != null) { - clone.kernelSize = clone.kernelSize.clone(); - } - if (clone.stride != null) { - clone.stride = clone.stride.clone(); - } - if (clone.padding != null) { - clone.padding = clone.padding.clone(); - } - return clone; + if (nIn <= 0 || override) { + InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; + this.nIn = c.getChannels(); + } + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { + throw new IllegalStateException( + "Invalid input for Deconvolution layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); } - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("Deconvolution2D", getLayerName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - Deconvolution3DLayer ret = - new Deconvolution3DLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + return InputTypeUtil.getOutputTypeDeconv3dLayer( + inputType, + getKernelSize(), + getStride(), + getPadding(), + getDilation(), + getConvolutionMode(), + dataFormat, + nOut, + layerIndex, + getName(), + Deconvolution3DLayer.class); + } + + //private int[] kernelSize; + //private int[] stride; + //private int[] padding; + //private int[] dilation; + + public static abstract class Deconvolution3DBuilder< + C extends Deconvolution3D, B extends Deconvolution3DBuilder> + extends ConvolutionLayerBuilder { + public C build() { + C l = initBuild(); + l.initializeConstraints(); + return l; } - @Override - public ParamInitializer initializer() { - return Deconvolution3DParamInitializer.getInstance(); - } + } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input for Deconvolution3D layer (layer name=\"" + getLayerName() + "\"): input is null"); - } - - return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { - throw new IllegalStateException("Invalid input for Deconvolution 3D layer (layer name=\"" + getLayerName() + "\"): Expected CNN3D input, got " + inputType); - } - - if (nIn <= 0 || override) { - InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; - this.nIn = c.getChannels(); - } - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { - throw new IllegalStateException("Invalid input for Deconvolution layer (layer name=\"" + getLayerName() - + "\"): Expected CNN input, got " + inputType); - } - - return InputTypeUtil.getOutputTypeDeconv3dLayer(inputType, kernelSize, stride, padding, dilation, convolutionMode, - dataFormat, nOut, layerIndex, getLayerName(), Deconvolution3DLayer.class); - } - - public static class Builder extends BaseConvBuilder { - - private Convolution3D.DataFormat dataFormat = Convolution3D.DataFormat.NCDHW; // in libnd4j: 1 - NCDHW, 0 - NDHWC - - public Builder() { - super(new int[] {2, 2, 2}, new int[] {1, 1, 1}, new int[] {0, 0, 0}, new int[] {1, 1, 1}, 3); - } - - @Override - protected boolean allowCausal() { - //Causal convolution - allowed for 1D only - return false; - } - - /** - * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more details - * - * @param convolutionMode Convolution mode for layer - */ - public Builder convolutionMode(ConvolutionMode convolutionMode) { - return super.convolutionMode(convolutionMode); - } - - /** - * Size of the convolution rows/columns - * - * @param kernelSize the height and width of the kernel - */ - public Builder kernelSize(int... kernelSize) { - this.setKernelSize(kernelSize); - return this; - } - - public Builder stride(int... stride) { - this.setStride(stride); - return this; - } - - public Builder padding(int... padding) { - this.setPadding(padding); - return this; - } - - @Override - public void setKernelSize(int... kernelSize) { - this.kernelSize = ValidationUtils.validate3NonNegative(kernelSize, "kernelSize"); - } - - @Override - public void setStride(int... stride) { - this.stride = ValidationUtils.validate3NonNegative(stride, "stride"); - } - - @Override - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate3NonNegative(padding, "padding"); - } - - @Override - public void setDilation(int... dilation) { - this.dilation = ValidationUtils.validate3NonNegative(dilation, "dilation"); - } - - public Builder dataFormat(Convolution3D.DataFormat dataFormat){ - this.dataFormat = dataFormat; - return this; - } - - @Override - public Deconvolution3D build() { - return new Deconvolution3D(this); - } - } + public static Deconvolution3DBuilder builder() { + return innerBuilder() + .kernelSize(new int[] {2, 2, 2}) + .stride(new int[] {1, 1, 1}) + .padding(new int[] {0, 0, 0}) + .dilation(new int[] {1, 1, 1}) + .convolutionDim(3); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java index b1dd9856a..5a661065a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -33,132 +36,99 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - -/** - * Dense Layer - * Uses WeightInitXavier as default - */ +/** Dense Layer Uses WeightInitXavier as default */ @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class DenseLayer extends FeedForwardLayer { - private boolean hasLayerNorm = false; - private boolean hasBias = true; + /** If true (default = false): enable layer normalization on this layer */ + @lombok.Builder.Default private boolean hasLayerNorm = false; - private DenseLayer(Builder builder) { - super(builder); - this.hasBias = builder.hasBias; - this.hasLayerNorm = builder.hasLayerNorm; + @lombok.Builder.Default private boolean hasBias = true; - initializeConstraints(builder); + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + + LayerValidation.assertNInNOutSet( + "DenseLayerConfiguration", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + lconf.runInheritance(); + + org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret = + new org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer(lconf, networkDataType); + + if (getWeightInit() == null) setWeightInit(new WeightInitXavier()); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + return ret; + } + + @Override + public ParamInitializer initializer() { + return DefaultParamInitializer.getInstance(); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType outputType = getOutputType(-1, inputType); + + val numParams = initializer().numParams(this); + val updaterStateSize = (int) getIUpdater().stateSize(numParams); + + int trainSizeFixed = 0; + int trainSizeVariable = 0; + if (getDropOut() != null) { + if (false) { + // TODO drop connect + // Dup the weights... note that this does NOT depend on the minibatch size... + trainSizeVariable += 0; // TODO + } else { + // Assume we dup the input + trainSizeVariable += inputType.arrayElementsPerExample(); + } } - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + // Also, during backprop: we do a preOut call -> gives us activations size equal to the output + // size + // which is modified in-place by activation function backprop + // then we have 'epsilonNext' which is equivalent to input size + trainSizeVariable += outputType.arrayElementsPerExample(); - LayerValidation.assertNInNOutSet("DenseLayerConfiguration", getLayerName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - lconf.runInheritance(); + return new LayerMemoryReport.Builder(name, DenseLayer.class, inputType, outputType) + .standardMemory(numParams, updaterStateSize) + .workingMemory( + 0, + 0, + trainSizeFixed, + trainSizeVariable) // No additional memory (beyond activations) for inference + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, + MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching in DenseLayerConfiguration + .build(); + } - org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret = - new org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer(lconf, networkDataType); + public abstract static class DenseLayerBuilder< + C extends DenseLayer, B extends DenseLayerBuilder> + extends FeedForwardLayerBuilder { - if(getWeightInit() == null) setWeightInit(new WeightInitXavier()); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - return ret; + } + private static final class DenseLayerBuilderImpl extends DenseLayerBuilder { + public DenseLayer build() { + DenseLayer l = new DenseLayer(this); + l.initializeConstraints(); + return l; } + } - @Override - public ParamInitializer initializer() { - return DefaultParamInitializer.getInstance(); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - InputType outputType = getOutputType(-1, inputType); - - val numParams = initializer().numParams(this); - val updaterStateSize = (int) getIUpdater().stateSize(numParams); - - int trainSizeFixed = 0; - int trainSizeVariable = 0; - if (getIDropout() != null) { - if (false) { - //TODO drop connect - //Dup the weights... note that this does NOT depend on the minibatch size... - trainSizeVariable += 0; //TODO - } else { - //Assume we dup the input - trainSizeVariable += inputType.arrayElementsPerExample(); - } - } - - //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size - // which is modified in-place by activation function backprop - // then we have 'epsilonNext' which is equivalent to input size - trainSizeVariable += outputType.arrayElementsPerExample(); - - return new LayerMemoryReport.Builder(layerName, DenseLayer.class, inputType, outputType) - .standardMemory(numParams, updaterStateSize) - .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayerConfiguration - .build(); - } - - public boolean hasBias() { - return hasBias; - } - - public boolean hasLayerNorm(){ - return hasLayerNorm; - } - - @NoArgsConstructor - @Getter - @Setter - public static class Builder extends FeedForwardLayer.Builder { - - /** - * If true (default): include bias parameters in the model. False: no bias. - * - */ - private boolean hasBias = true; - - /** - * If true (default): include bias parameters in the model. False: no bias. - * - * @param hasBias If true: include bias parameters in this model - */ - public Builder hasBias(boolean hasBias) { - this.setHasBias(hasBias); - return this; - } - - /** - * If true (default = false): enable layer normalization on this layer - * - */ - private boolean hasLayerNorm = false; - public Builder hasLayerNorm(boolean hasLayerNorm){ - this.hasLayerNorm = hasLayerNorm; - return this; - } - - - @Override - @SuppressWarnings("unchecked") - public DenseLayer build() { - return new DenseLayer(this); - } - } - -} + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java index 307604ce0..394beeb73 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.*; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -35,193 +37,152 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.*; - @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class DepthwiseConvolution2D extends ConvolutionLayer { + /** + * Set channels multiplier for depth-wise convolution + * + * @param depthMultiplier integer value, for each input map we get depthMultiplier outputs in + * channels-wise step. + * @return Builder + */ + @Builder.Default protected int depthMultiplier = 1; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat dataFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default protected CNN2DFormat cnn2DFormat = CNN2DFormat.NCHW; - protected int depthMultiplier; + protected boolean allowCausal() { + // Causal convolution - allowed for 1D only + return false; + } - protected DepthwiseConvolution2D(Builder builder) { - super(builder); - Preconditions.checkState(builder.depthMultiplier > 0, "Depth multiplier must be > 0, got %s", builder.depthMultiplier); - this.depthMultiplier = builder.depthMultiplier; - this.nOut = this.nIn * this.depthMultiplier; - this.cnn2dDataFormat = builder.cnn2DFormat; + @Override + public DepthwiseConvolution2D clone() { + DepthwiseConvolution2D clone = (DepthwiseConvolution2D) super.clone(); + clone.depthMultiplier = depthMultiplier; + return clone; + } - initializeConstraints(builder); + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerValidation.assertNInNOutSet( + "DepthwiseConvolution2D", getName(), layerIndex, getNIn(), getNOut()); + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + DepthwiseConvolution2DLayer ret = new DepthwiseConvolution2DLayer(lconf, networkDataType); + + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + + return ret; + } + + @Override + public ParamInitializer initializer() { + return DepthwiseConvolutionParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalStateException( + "Invalid input for depth-wise convolution layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); + } + + return InputTypeUtil.getOutputTypeCnnLayers( + inputType, + getKernelSize(), + getStride(), + getPadding(), + getDilation(), + getConvolutionMode(), + nOut, + layerIndex, + getName(), + dataFormat, + DepthwiseConvolution2DLayer.class); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + super.setNIn(inputType, override); + + if (nOut == 0 || override) { + nOut = this.nIn * this.depthMultiplier; + } + this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + } + + public abstract static class DepthwiseConvolution2DBuilder< + C extends DepthwiseConvolution2D, B extends DepthwiseConvolution2DBuilder> + extends ConvolutionLayerBuilder { + public C build() { + Preconditions.checkState( + depthMultiplier$value > 0, + "Depth multiplier must be > 0, got %s", + depthMultiplier$value); + C l = this.initBuild(); + ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + ConvolutionUtils.validateCnnKernelStridePadding( + l.getKernelSize(), l.getStride(), l.getPadding()); + l.initializeConstraints(); + return l; } @Override - public DepthwiseConvolution2D clone() { - DepthwiseConvolution2D clone = (DepthwiseConvolution2D) super.clone(); - clone.depthMultiplier = depthMultiplier; - return clone; - } - - - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("DepthwiseConvolution2D", getLayerName(), layerIndex, getNIn(), getNOut()); - - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - DepthwiseConvolution2DLayer ret = new DepthwiseConvolution2DLayer(lconf, networkDataType); - - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - - return ret; + public B kernelSize(int... kernelSize) { + super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize")); + return self(); } @Override - public ParamInitializer initializer() { - return DepthwiseConvolutionParamInitializer.getInstance(); + public B stride(int... stride) { + super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride")); + return self(); } @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for depth-wise convolution layer (layer name=\"" - + getLayerName() + "\"): Expected CNN input, got " + inputType); - } - - return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode, - nOut, layerIndex, getLayerName(), cnn2dDataFormat, DepthwiseConvolution2DLayer.class); + public B padding(int... padding) { + super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding")); + return self(); } @Override - public void setNIn(InputType inputType, boolean override) { - super.setNIn(inputType, override); - - if(nOut == 0 || override){ - nOut = this.nIn * this.depthMultiplier; - } - this.cnn2dDataFormat = ((InputType.InputTypeConvolutional)inputType).getFormat(); + public B dilation(int... dilation) { + super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation")); + return self(); } - - @Getter - @Setter - public static class Builder extends BaseConvBuilder { - - /** - * Set channels multiplier for depth-wise convolution - * - */ - protected int depthMultiplier = 1; - protected CNN2DFormat cnn2DFormat = CNN2DFormat.NCHW; - - - public Builder(int[] kernelSize, int[] stride, int[] padding) { - super(kernelSize, stride, padding); - } - - public Builder(int[] kernelSize, int[] stride) { - super(kernelSize, stride); - } - - public Builder(int... kernelSize) { - super(kernelSize); - } - - public Builder() { - super(); - } - - @Override - protected boolean allowCausal() { - //Causal convolution - allowed for 1D only - return false; - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param format Format for activations (in and out) - */ - public Builder dataFormat(CNN2DFormat format){ - this.cnn2DFormat = format; - return this; - } - - /** - * Set channels multiplier for depth-wise convolution - * - * @param depthMultiplier integer value, for each input map we get depthMultiplier outputs in channels-wise - * step. - * @return Builder - */ - public Builder depthMultiplier(int depthMultiplier) { - this.setDepthMultiplier(depthMultiplier); - return this; - } - - /** - * Size of the convolution rows/columns - * - * @param kernelSize the height and width of the kernel - */ - public Builder kernelSize(int... kernelSize) { - this.setKernelSize(kernelSize); - return this; - } - - /** - * Stride of the convolution in rows/columns (height/width) dimensions - * - * @param stride Stride of the layer - */ - public Builder stride(int... stride) { - this.setStride(stride); - return this; - } - - /** - * Padding of the convolution in rows/columns (height/width) dimensions - * - * @param padding Padding of the layer - */ - public Builder padding(int... padding) { - this.setPadding(padding); - return this; - } - - @Override - public void setKernelSize(int... kernelSize) { - this.kernelSize = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"); - } - - @Override - public void setStride(int... stride) { - this.stride = ValidationUtils.validate2NonNegative(stride, false, "stride"); - } - - @Override - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate2NonNegative(padding, false, "padding"); - } - - @Override - public void setDilation(int... dilation) { - this.dilation = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); - } - - @Override - @SuppressWarnings("unchecked") - public DepthwiseConvolution2D build() { - ConvolutionUtils.validateConvolutionModePadding(convolutionMode, padding); - ConvolutionUtils.validateCnnKernelStridePadding(kernelSize, stride, padding); - - return new DepthwiseConvolution2D(this); - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java index 521dacd23..8c8e5d63d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java @@ -20,7 +20,11 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.List; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -36,129 +40,117 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.Collection; -import java.util.List; -import java.util.Map; - +/** + * Create a dropout layer with standard {@link Dropout}, with the specified probability of retaining + * the input activation. See {@link Dropout} for the full details + */ @Data -@NoArgsConstructor + @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class DropoutLayer extends FeedForwardLayer { - private DropoutLayer(Builder builder) { - super(builder); - setType(LayerType.DO); + public static DropoutLayerBuilder builder() { + return innerBuilder(); + } + + public static DropoutLayerBuilder builder(double dropout ) { + return innerBuilder() + .dropOut(dropout); + } + public static DropoutLayerBuilder builder(IDropout dropout ) { + return innerBuilder() + .dropOut(dropout); + } + @Override + public DropoutLayer clone() { + return (DropoutLayer) super.clone(); + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + setNetConfiguration(conf); + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.DropoutLayer ret = + new org.deeplearning4j.nn.layers.DropoutLayer(lconf, networkDataType); + + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input type: null for layer name \"" + getName() + "\""); } + return inputType; + } - public DropoutLayer(double activationRetainProb){ - this(new Builder().dropOut(activationRetainProb)); + @Override + public void setNIn(InputType inputType, boolean override) { + // No op: dropout layer doesn't have a fixed nIn value + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + // No input preprocessor required; dropout applies to any input type + return null; + } + + @Override + public List getRegularizationByParam(String paramName) { + // Not applicable + return null; + } + + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException("Dropout layer does not contain parameters"); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + val actElementsPerEx = inputType.arrayElementsPerExample(); + // During inference: not applied. During backprop: dup the input, in case it's used elsewhere + // But: this will be counted in the activations + // (technically inference memory is over-estimated as a result) + + return new LayerMemoryReport.Builder(name, DropoutLayer.class, inputType, inputType) + .standardMemory(0, 0) // No params + .workingMemory(0, 0, 0, 0) // No working mem, other than activations etc + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + public static abstract class DropoutLayerBuilder< + C extends DropoutLayer, B extends DropoutLayerBuilder> + extends FeedForwardLayerBuilder { + + public B dropout(IDropout dropout) { + super.dropOut(dropout); + return self(); } - - public DropoutLayer(IDropout dropout){ - this(new Builder().dropOut(dropout)); - } - - @Override - public DropoutLayer clone() { - return (DropoutLayer) super.clone(); - } - - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - setNetConfiguration(conf); - - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - org.deeplearning4j.nn.layers.DropoutLayer ret = new org.deeplearning4j.nn.layers.DropoutLayer(lconf, networkDataType); - - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input type: null for layer name \"" + getLayerName() + "\""); - } - return inputType; - } - - @Override - public void setNIn(InputType inputType, boolean override) { - //No op: dropout layer doesn't have a fixed nIn value - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - //No input preprocessor required; dropout applies to any input type - return null; - } - - @Override - public List getRegularizationByParam(String paramName) { - //Not applicable - return null; - } - - @Override - public boolean isPretrainParam(String paramName) { - throw new UnsupportedOperationException("Dropout layer does not contain parameters"); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - val actElementsPerEx = inputType.arrayElementsPerExample(); - //During inference: not applied. During backprop: dup the input, in case it's used elsewhere - //But: this will be counted in the activations - //(technically inference memory is over-estimated as a result) - - return new LayerMemoryReport.Builder(layerName, DropoutLayer.class, inputType, inputType).standardMemory(0, 0) //No params - .workingMemory(0, 0, 0, 0) //No working mem, other than activations etc - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); - } - - - @NoArgsConstructor - public static class Builder extends FeedForwardLayer.Builder { - - /** - * Create a dropout layer with standard {@link Dropout}, with the specified probability of retaining the input - * activation. See {@link Dropout} for the full details - * - * @param dropout Activation retain probability. - */ - public Builder(double dropout) { - this.dropOut(new Dropout(dropout)); - } - - /** - * @param dropout Specified {@link IDropout} instance for the dropout layer - */ - public Builder(IDropout dropout) { - this.dropOut(dropout); - } - - @Override - @SuppressWarnings("unchecked") - public DropoutLayer build() { - - return new DropoutLayer(this); - } - } - - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java index 36d719ddc..e18af8eb3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java @@ -21,6 +21,8 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.Accessors; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -29,10 +31,12 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.EmbeddingLayerParamInitializer; import org.deeplearning4j.nn.weights.IWeightInit; +import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.embeddings.ArrayEmbeddingInitializer; import org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer; import org.deeplearning4j.nn.weights.embeddings.WeightInitEmbedding; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -41,25 +45,81 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class EmbeddingLayer extends FeedForwardLayer { + /** + * If true: include bias parameters in the layer. False (default): no bias. + * @param hasBias If true: include bias parameters in this layer + */ + @Accessors @Builder.Default + private boolean hasBias = false; - private boolean hasBias = true; //Default for pre-0.9.2 implementations - - private EmbeddingLayer(Builder builder) { - super(builder); - this.hasBias = builder.hasBias; - initializeConstraints(builder); + /** + *Default to Identity activation - i.e., don't inherit. + * For example, if user sets ReLU as global default, they very likely don't intend to use it for Embedding layer also + * + */ + public static EmbeddingLayerBuilder builder() { + return innerBuilder() + .activation(Activation.IDENTITY); } + public static abstract class EmbeddingLayerBuilder> + extends FeedForwardLayerBuilder{ + public C build() { + C l = initBuild(); + l.initializeConstraints(); + return l; + } + + /** + * Weight initialization scheme to use, for initial weight values + * + * @param weightInit + * @see WeightInit + */ + @Override + public B weightInit(WeightInit weightInit) { + if(weightInit.getWeightInitFunction() instanceof WeightInitEmbedding){ + long[] shape = ((WeightInitEmbedding) weightInit.getWeightInitFunction()).shape(); + nIn(shape[0]); + nOut(shape[1]); + } + super.weightInit(weightInit); + return self(); + } + /** + * Initialize the embedding layer using values from the specified array. Note that the array should have shape + * [vocabSize, vectorSize]. After copying values from the array to initialize the network parameters, the input + * array will be discarded (so that, if necessary, it can be garbage collected) + * + * @param vectors Vectors to initialize the embedding layer with + */ + public B weightInit(INDArray vectors){ + weightInit(new ArrayEmbeddingInitializer(vectors)); + return self(); + } + + /** + * Initialize the embedding layer using the specified EmbeddingInitializer - such as a Word2Vec instance + * + * @param embeddingInitializer Source of the embedding layer weights + */ + public B weightInit(EmbeddingInitializer embeddingInitializer) { + var weightIn = new WeightInitEmbedding(embeddingInitializer); + super.weightInit(weightIn); + return self(); + } + } @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer ret = new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingLayer(lconf, networkDataType); + runInheritance(); ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); @@ -88,77 +148,9 @@ public class EmbeddingLayer extends FeedForwardLayer { //Inference: no working memory - just activations (pullRows) //Training: preout op, the only in-place ops on epsilon (from layer above) + assign ops - return new LayerMemoryReport.Builder(layerName, EmbeddingLayer.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, EmbeddingLayer.class, inputType, outputType) .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, actElementsPerEx) .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); } - - public boolean hasBias() { - return hasBias; - } - - @Getter - @Setter - public static class Builder extends FeedForwardLayer.Builder { - - /** - * If true: include bias parameters in the layer. False (default): no bias. - * - */ - private boolean hasBias = false; - - public Builder(){ - //Default to Identity activation - i.e., don't inherit. - //For example, if user sets ReLU as global default, they very likely don't intend to use it for Embedding layer also - this.activationFn = new ActivationIdentity(); - } - - - /** - * If true: include bias parameters in the layer. False (default): no bias. - * - * @param hasBias If true: include bias parameters in this layer - */ - public Builder hasBias(boolean hasBias) { - this.hasBias = hasBias; - return this; - } - - @Override - public Builder weightInit(IWeightInit weightInit) { - if(weightInit instanceof WeightInitEmbedding){ - long[] shape = ((WeightInitEmbedding) weightInit).shape(); - nIn(shape[0]); - nOut(shape[1]); - } - return super.weightInit(weightInit); - } - - /** - * Initialize the embedding layer using the specified EmbeddingInitializer - such as a Word2Vec instance - * - * @param embeddingInitializer Source of the embedding layer weights - */ - public Builder weightInit(EmbeddingInitializer embeddingInitializer){ - return weightInit(new WeightInitEmbedding(embeddingInitializer)); - } - - /** - * Initialize the embedding layer using values from the specified array. Note that the array should have shape - * [vocabSize, vectorSize]. After copying values from the array to initialize the network parameters, the input - * array will be discarded (so that, if necessary, it can be garbage collected) - * - * @param vectors Vectors to initialize the embedding layer with - */ - public Builder weightInit(INDArray vectors){ - return weightInit(new ArrayEmbeddingInitializer(vectors)); - } - - @Override - @SuppressWarnings("unchecked") - public EmbeddingLayer build() { - return new EmbeddingLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java index 16aeb1acd..09b908445 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -43,29 +44,89 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class EmbeddingSequenceLayer extends FeedForwardLayer { + /** + * Set input sequence length for this embedding layer. + * + * @param inputLength input sequence length + * @return Builder + */ + @Builder.Default private int inputLength = 1; // By default only use one index to embed + /** + * If true: include bias parameters in the layer. False (default): no bias. + * + */ + @Builder.Default private boolean hasBias = false; - private int inputLength = 1; // By default only use one index to embed - private boolean hasBias = false; - private boolean inferInputLength = false; // use input length as provided by input data - private RNNFormat outputFormat = RNNFormat.NCW; //Default value for older deserialized models + /** + * Set input sequence inference mode for embedding layer. + * + * @param inferInputLength whether to infer input length + * @return Builder + */ + @Builder.Default private boolean inferInputLength = false; // use input length as provided by input data + @Builder.Default private RNNFormat outputDataFormat = RNNFormat.NCW; //Default value for older deserialized models - private EmbeddingSequenceLayer(Builder builder) { - super(builder); - this.hasBias = builder.hasBias; - this.inputLength = builder.inputLength; - this.inferInputLength = builder.inferInputLength; - this.outputFormat = builder.outputFormat; - initializeConstraints(builder); +public static abstract class EmbeddingSequenceLayerBuilder> +extends FeedForwardLayerBuilder { + public C build() { + C l = initBuild(); + l.initializeConstraints(); + return l; } + public B weightInit(IWeightInit weightInit){ + if(weightInit instanceof WeightInitEmbedding){ + long[] shape = ((WeightInitEmbedding) weightInit).shape(); + nIn(shape[0]); + nOut(shape[1]); + } + super.weightInit( weightInit); + return self(); + } + + /** + * Initialize the embedding layer using values from the specified array. Note that the array should have shape + * [vocabSize, vectorSize]. After copying values from the array to initialize the network parameters, the input + * array will be discarded (so that, if necessary, it can be garbage collected) + * + * @param vectors Vectors to initialize the embedding layer with + */ + public B weightInit(INDArray vectors){ + weightInit(new ArrayEmbeddingInitializer(vectors)); + return self(); + } + + /** + * Initialize the embedding layer using the specified EmbeddingInitializer - such as a Word2Vec instance + * + * @param embeddingInitializer Source of the embedding layer weights + */ + public B weightInit(EmbeddingInitializer embeddingInitializer) { + var weightIn = new WeightInitEmbedding(embeddingInitializer); + super.weightInit(weightIn); + return self(); + } + +} + /** + * //Default to Identity activation - i.e., don't inherit. + * //For example, if user sets ReLU as global default, they very likely don't intend to use it for Embedding layer also + * + */ + public static EmbeddingSequenceLayerBuilder builder() { + return innerBuilder().activation(new ActivationIdentity()); + } + + @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingSequenceLayer ret = new org.deeplearning4j.nn.layers.feedforward.embedding.EmbeddingSequenceLayer(lconf, networkDataType); @@ -82,9 +143,9 @@ public class EmbeddingSequenceLayer extends FeedForwardLayer { public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || (inputType.getType() != InputType.Type.FF && inputType.getType() != InputType.Type.RNN)) { throw new IllegalStateException("Invalid input for Embedding layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect FF/RNN input type. Got: " + inputType); + + ", layer name = \"" + getName() + "\"): expect FF/RNN input type. Got: " + inputType); } - return InputType.recurrent(nOut, inputLength, outputFormat); + return InputType.recurrent(nOut, inputLength, outputDataFormat); } @Override @@ -100,7 +161,7 @@ public class EmbeddingSequenceLayer extends FeedForwardLayer { val numParams = initializer().numParams(this); val updaterStateSize = (int) getIUpdater().stateSize(numParams); - return new LayerMemoryReport.Builder(layerName, EmbeddingSequenceLayer.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, EmbeddingSequenceLayer.class, inputType, outputType) .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, actElementsPerEx) .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); @@ -114,7 +175,7 @@ public class EmbeddingSequenceLayer extends FeedForwardLayer { public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType == null) { throw new IllegalStateException( - "Invalid input for layer (layer name = \"" + getLayerName() + "\"): input type is null"); + "Invalid input for layer (layer name = \"" + getName() + "\"): input type is null"); } if(inputType.getType() == InputType.Type.RNN){ @@ -142,115 +203,4 @@ public class EmbeddingSequenceLayer extends FeedForwardLayer { } } - - @Getter - @Setter - public static class Builder extends FeedForwardLayer.Builder { - - public Builder(){ - //Default to Identity activation - i.e., don't inherit. - //For example, if user sets ReLU as global default, they very likely don't intend to use it for Embedding layer also - this.activationFn = new ActivationIdentity(); - } - - /** - * If true: include bias parameters in the layer. False (default): no bias. - * - */ - private boolean hasBias = false; - - /** - * Set input sequence length for this embedding layer. - * - */ - private int inputLength = 1; - - /** - * Set input sequence inference mode for embedding layer. - * - */ - private boolean inferInputLength = true; - - private RNNFormat outputFormat = RNNFormat.NCW; //Default value for older deserialized models - - public Builder outputDataFormat(RNNFormat format){ - this.outputFormat = format; - return this; - } - - /** - * If true: include bias parameters in the layer. False (default): no bias. - * - * @param hasBias If true: include bias parameters in this layer - */ - public Builder hasBias(boolean hasBias) { - this.setHasBias(hasBias); - return this; - } - - /** - * Set input sequence length for this embedding layer. - * - * @param inputLength input sequence length - * @return Builder - */ - public Builder inputLength(int inputLength) { - this.setInputLength(inputLength); - return this; - } - - - /** - * Set input sequence inference mode for embedding layer. - * - * @param inferInputLength whether to infer input length - * @return Builder - */ - public Builder inferInputLength(boolean inferInputLength) { - this.setInferInputLength(inferInputLength); - return this; - } - - @Override - public Builder weightInit(IWeightInit weightInit) { - this.setWeightInitFn(weightInit); - return this; - } - - - public void setWeightInitFn(IWeightInit weightInit){ - if(weightInit instanceof WeightInitEmbedding){ - long[] shape = ((WeightInitEmbedding) weightInit).shape(); - nIn(shape[0]); - nOut(shape[1]); - } - this.weightInit = weightInit; - } - - /** - * Initialize the embedding layer using the specified EmbeddingInitializer - such as a Word2Vec instance - * - * @param embeddingInitializer Source of the embedding layer weights - */ - public Builder weightInit(EmbeddingInitializer embeddingInitializer){ - return weightInit(new WeightInitEmbedding(embeddingInitializer)); - } - - /** - * Initialize the embedding layer using values from the specified array. Note that the array should have shape - * [vocabSize, vectorSize]. After copying values from the array to initialize the network parameters, the input - * array will be discarded (so that, if necessary, it can be garbage collected) - * - * @param vectors Vectors to initialize the embedding layer with - */ - public Builder weightInit(INDArray vectors){ - return weightInit(new ArrayEmbeddingInitializer(vectors)); - } - - @Override - @SuppressWarnings("unchecked") - public EmbeddingSequenceLayer build() { - return new EmbeddingSequenceLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java index de733add8..a4fba86c9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java @@ -21,7 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; -import net.brutex.ai.dnn.api.LayerType; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.DataFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -29,31 +29,54 @@ import org.deeplearning4j.nn.conf.preprocessor.Cnn3DToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor; -@Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@NoArgsConstructor +@SuperBuilder public abstract class FeedForwardLayer extends BaseLayerConfiguration { - + public static abstract class FeedForwardLayerBuilder> + extends BaseLayerConfigurationBuilder { +} + /** + * Number of inputs for the layer (usually the size of the last layer).
Note that for Convolutional layers, + * this is the input channels, otherwise is the previous layer size. + * + */ + @Getter protected long nIn; + + public void setNIn(int in) { + this.nIn = in; + } + public void setNIn(long in) { + this.nIn = in; + } + /** + * Number of inputs for the layer (usually the size of the last layer).
Note that for Convolutional layers, + * this is the input channels, otherwise is the previous layer size. + * + */ + @Getter @Setter protected long nOut; protected DataFormat timeDistributedFormat; - public FeedForwardLayer(Builder builder) { - super(builder); - this.nIn = builder.nIn; - this.nOut = builder.nOut; - setType(LayerType.FC); + protected FeedForwardLayer(FeedForwardLayerBuilder b) { + super(b); + this.nIn = b.nIn; + this.nOut = b.nOut; + this.timeDistributedFormat = b.timeDistributedFormat; } - - +// + // { //Initializer block + // setType(LayerType.FC); + //} @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || (inputType.getType() != InputType.Type.FF && inputType.getType() != InputType.Type.CNNFlat)) { throw new IllegalStateException("Invalid input type (layer index = " + layerIndex + ", layer name=\"" - + getLayerName() + "\"): expected FeedForward input type. Got: " + inputType); + + getName() + "\"): expected FeedForward input type. Got: " + inputType); } return InputType.feedForward(nOut, timeDistributedFormat); @@ -63,7 +86,7 @@ public abstract class FeedForwardLayer extends BaseLayerConfiguration { public void setNIn(InputType inputType, boolean override) { if (inputType == null || (inputType.getType() != InputType.Type.FF && inputType.getType() != InputType.Type.CNNFlat && inputType.getType() != InputType.Type.RNN)) { - throw new IllegalStateException("Invalid input type (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input type (layer name=\"" + getName() + "\"): expected FeedForward input type. Got: " + inputType); } @@ -90,7 +113,7 @@ public abstract class FeedForwardLayer extends BaseLayerConfiguration { public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType == null) { throw new IllegalStateException( - "Invalid input for layer (layer name = \"" + getLayerName() + "\"): input type is null"); + "Invalid input for layer (layer name = \"" + getName() + "\"): input type is null"); } switch (inputType.getType()) { @@ -120,76 +143,4 @@ public abstract class FeedForwardLayer extends BaseLayerConfiguration { return false; //No pretrain params in standard FF layers } - @Getter - @Setter - public abstract static class Builder> extends BaseLayerConfiguration.Builder { - - /** - * Number of inputs for the layer (usually the size of the last layer).
Note that for Convolutional layers, - * this is the input channels, otherwise is the previous layer size. - * - */ - protected long nIn = 0; - - /** - * Number of inputs for the layer (usually the size of the last layer).
Note that for Convolutional layers, - * this is the input channels, otherwise is the previous layer size. - * - */ - protected long nOut = 0; - - /** - * Number of inputs for the layer (usually the size of the last layer).
Note that for Convolutional layers, - * this is the input channels, otherwise is the previous layer size. - * - * @param nIn Number of inputs for the layer - */ - public T nIn(int nIn) { - this.setNIn(nIn); - return (T) this; - } - - /** - * Number of inputs for the layer (usually the size of the last layer).
Note that for Convolutional layers, - * this is the input channels, otherwise is the previous layer size. - * - * @param nIn Number of inputs for the layer - */ - public T nIn(long nIn) { - this.setNIn(nIn); - return (T) this; - } - - /** - * Number of outputs - used to set the layer size (number of units/nodes for the current layer). Note that this - * is equivalent to {@link #units(int)} - * - * @param nOut Number of outputs / layer size - */ - public T nOut(int nOut) { - this.setNOut(nOut); - return (T) this; - } - - /** - * Number of outputs - used to set the layer size (number of units/nodes for the current layer). Note that this - * is equivalent to {@link #units(int)} - * - * @param nOut Number of outputs / layer size - */ - public T nOut(long nOut) { - this.setNOut((int) nOut); - return (T) this; - } - - /** - * Set the number of units / layer size for this layer.
This is equivalent to {@link #nOut(int)} - * - * @param units Size of the layer (number of units) / nOut - * @see #nOut(int) - */ - public T units(int units) { - return nOut(units); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java index 6d95ae93b..88b6afc3f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -40,36 +41,86 @@ import java.util.Map; @Data @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class GlobalPoolingLayer extends NoParamLayer { - private PoolingType poolingType; + public static abstract class GlobalPoolingLayerBuilder> extends + NoParamLayerBuilder { + + /** + * Pooling dimensions. Note: most of the time, this doesn't need to be set, and the defaults can be used. + * Default for RNN data: pooling dimension 2 (time). Default for CNN data: pooling dimensions 2,3 (height and + * width) Default for CNN3D data: pooling dimensions 2,3,4 (depth, height and width) + * + * @param poolingDimensions Pooling dimensions to use + */ + public B poolingDimensions(int... poolingDimensions) { + this.poolingDimensions = poolingDimensions; + return self(); + } + + /** + * P-norm constant. Only used if using {@link PoolingType#PNORM} for the pooling type + * + * @param pnorm P-norm constant + */ + public B pnorm(int pnorm) { + if (pnorm <= 0) { + throw new IllegalArgumentException("Invalid input: p-norm value must be greater than 0. Got: " + pnorm); + } + ValidationUtils.validateNonNegative(pnorm, "pnorm"); + pnorm$value = pnorm; + pnorm$set = true; + return self(); + } + } + /** + * Pooling type for global pooling + */ + @Builder.Default private PoolingType poolingType = PoolingType.MAX; + /** + * Pooling dimensions. Note: most of the time, this doesn't need to be set, and the defaults can be used. + * Default for RNN data: pooling dimension 2 (time). Default for CNN data: pooling dimensions 2,3 (height and + * width) Default for CNN3D data: pooling dimensions 2,3,4 (depth, height and width) + * + */ private int[] poolingDimensions; - private int pnorm; - private boolean collapseDimensions = true; + /** + * P-norm constant. Only used if using {@link PoolingType#PNORM} for the pooling type + * + */ + @Builder.Default private int pnorm = 2; + /** + * Whether to collapse dimensions when pooling or not. Usually you *do* want to do this. Default: true. If + * true:
- 3d (time series) input with shape [miniBatchSize, vectorSize, timeSeriesLength] -> 2d output + * [miniBatchSize, vectorSize]
- 4d (CNN) input with shape [miniBatchSize, channels, height, width] -> 2d + * output [miniBatchSize, channels]
- 5d (CNN3D) input with shape [miniBatchSize, channels, depth, height, + * width] -> 2d output [miniBatchSize, channels]
+ * + * + * If false:
- 3d (time series) input with shape [miniBatchSize, vectorSize, timeSeriesLength] -> 3d output + * [miniBatchSize, vectorSize, 1]
- 4d (CNN) input with shape [miniBatchSize, channels, height, width] -> 2d + * output [miniBatchSize, channels, 1, 1]
- 5d (CNN3D) input with shape [miniBatchSize, channels, depth, + * height, width] -> 2d output [miniBatchSize, channels, 1, 1, 1]
+ * + */ + @Builder.Default private boolean collapseDimensions = true; - private GlobalPoolingLayer(Builder builder) { - super(builder); - this.poolingType = builder.poolingType; - this.poolingDimensions = builder.poolingDimensions; - this.collapseDimensions = builder.collapseDimensions; - this.pnorm = builder.pnorm; - this.layerName = builder.layerName; + public static GlobalPoolingLayerBuilder builder() { + return innerBuilder(); } - public GlobalPoolingLayer() { - this(PoolingType.MAX); + public static GlobalPoolingLayerBuilder builder(PoolingType poolingType) { + return innerBuilder() + .poolingType(poolingType); } - public GlobalPoolingLayer(PoolingType poolingType) { - this(new GlobalPoolingLayer.Builder().poolingType(poolingType)); - } - - @Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.pooling.GlobalPoolingLayer ret = new org.deeplearning4j.nn.layers.pooling.GlobalPoolingLayer(lconf, networkDataType); @@ -181,7 +232,7 @@ public class GlobalPoolingLayer extends NoParamLayer { fwdTrainInferenceWorkingPerEx = inputType.arrayElementsPerExample(); } - return new LayerMemoryReport.Builder(layerName, GlobalPoolingLayer.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, GlobalPoolingLayer.class, inputType, outputType) .standardMemory(0, 0) //No params //Train + Inference: no additional working memory (except pnorm) - the reduction is the output activations .workingMemory(0, fwdTrainInferenceWorkingPerEx, 0, fwdTrainInferenceWorkingPerEx) @@ -189,114 +240,5 @@ public class GlobalPoolingLayer extends NoParamLayer { .build(); } - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - /** - * Pooling type for global pooling - */ - private PoolingType poolingType = PoolingType.MAX; - - /** - * Pooling dimensions. Note: most of the time, this doesn't need to be set, and the defaults can be used. - * Default for RNN data: pooling dimension 2 (time). Default for CNN data: pooling dimensions 2,3 (height and - * width) Default for CNN3D data: pooling dimensions 2,3,4 (depth, height and width) - * - */ - private int[] poolingDimensions; - - /** - * P-norm constant. Only used if using {@link PoolingType#PNORM} for the pooling type - * - */ - private int pnorm = 2; - - /** - * Whether to collapse dimensions when pooling or not. Usually you *do* want to do this. Default: true. If - * true:
- 3d (time series) input with shape [miniBatchSize, vectorSize, timeSeriesLength] -> 2d output - * [miniBatchSize, vectorSize]
- 4d (CNN) input with shape [miniBatchSize, channels, height, width] -> 2d - * output [miniBatchSize, channels]
- 5d (CNN3D) input with shape [miniBatchSize, channels, depth, height, - * width] -> 2d output [miniBatchSize, channels]
- * - * - * If false:
- 3d (time series) input with shape [miniBatchSize, vectorSize, timeSeriesLength] -> 3d output - * [miniBatchSize, vectorSize, 1]
- 4d (CNN) input with shape [miniBatchSize, channels, height, width] -> 2d - * output [miniBatchSize, channels, 1, 1]
- 5d (CNN3D) input with shape [miniBatchSize, channels, depth, - * height, width] -> 2d output [miniBatchSize, channels, 1, 1, 1]
- * - */ - private boolean collapseDimensions = true; - - public Builder() { - - } - - public Builder(PoolingType poolingType) { - this.setPoolingType(poolingType); - } - - /** - * Pooling dimensions. Note: most of the time, this doesn't need to be set, and the defaults can be used. - * Default for RNN data: pooling dimension 2 (time). Default for CNN data: pooling dimensions 2,3 (height and - * width) Default for CNN3D data: pooling dimensions 2,3,4 (depth, height and width) - * - * @param poolingDimensions Pooling dimensions to use - */ - public Builder poolingDimensions(int... poolingDimensions) { - this.setPoolingDimensions(poolingDimensions); - return this; - } - - /** - * @param poolingType Pooling type for global pooling - */ - public Builder poolingType(PoolingType poolingType) { - this.setPoolingType(poolingType); - return this; - } - - /** - * Whether to collapse dimensions when pooling or not. Usually you *do* want to do this. Default: true. If - * true:
- 3d (time series) input with shape [miniBatchSize, vectorSize, timeSeriesLength] -> 2d output - * [miniBatchSize, vectorSize]
- 4d (CNN) input with shape [miniBatchSize, channels, height, width] -> 2d - * output [miniBatchSize, channels]
- 5d (CNN3D) input with shape [miniBatchSize, channels, depth, height, - * width] -> 2d output [miniBatchSize, channels]
- * - * - * If false:
- 3d (time series) input with shape [miniBatchSize, vectorSize, timeSeriesLength] -> 3d output - * [miniBatchSize, vectorSize, 1]
- 4d (CNN) input with shape [miniBatchSize, channels, height, width] -> 2d - * output [miniBatchSize, channels, 1, 1]
- 5d (CNN3D) input with shape [miniBatchSize, channels, depth, - * height, width] -> 2d output [miniBatchSize, channels, 1, 1, 1]
- * - * @param collapseDimensions Whether to collapse the dimensions or not - */ - public Builder collapseDimensions(boolean collapseDimensions) { - this.setCollapseDimensions(collapseDimensions); - return this; - } - - /** - * P-norm constant. Only used if using {@link PoolingType#PNORM} for the pooling type - * - * @param pnorm P-norm constant - */ - public Builder pnorm(int pnorm) { - if (pnorm <= 0) { - throw new IllegalArgumentException("Invalid input: p-norm value must be greater than 0. Got: " + pnorm); - } - this.setPnorm(pnorm); - return this; - } - - public void setPnorm(int pnorm){ - ValidationUtils.validateNonNegative(pnorm, "pnorm"); - this.pnorm = pnorm; - } - - @SuppressWarnings("unchecked") - public GlobalPoolingLayer build() { - return new GlobalPoolingLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java index 792d735c3..1110e15a6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -30,7 +31,6 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.recurrent.LSTMHelpers; import org.deeplearning4j.nn.params.GravesBidirectionalLSTMParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.api.buffer.DataType; @@ -39,33 +39,51 @@ import org.nd4j.linalg.api.ndarray.INDArray; import java.util.*; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) @Deprecated +@SuperBuilder(buildMethodName = "initBuild") public class GravesBidirectionalLSTM extends BaseRecurrentLayer { - private double forgetGateBiasInit; - private IActivation gateActivationFn = new ActivationSigmoid(); + public static abstract class GravesBidirectionalLSTMBuilder> extends BaseRecurrentLayerBuilder { + public C build() { + C l = this.initBuild(); + l.initializeConstraints(); + return l; + } + } + /** + * Set forget gate bias initalizations. Values in range 1-5 can potentially help with learning or longer-term + * dependencies. + */ + @Builder.Default + private double forgetGateBiasInit = 1.0; + /** + * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or hard sigmoid, + * for example + * + */ + @Builder.Default + private IActivation gateActivationFunction = new ActivationSigmoid(); + /** + * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? + * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in + * (non-CuDNN) implementation for GravesBidirectionalLSTM will be used + * + */ + @Builder.Default protected boolean helperAllowFallback = true; - private GravesBidirectionalLSTM(Builder builder) { - super(builder); - this.forgetGateBiasInit = builder.forgetGateBiasInit; - this.gateActivationFn = builder.gateActivationFn; - this.helperAllowFallback = builder.helperAllowFallback; - - initializeConstraints(builder); - } @Override - protected void initializeConstraints(LayerConfiguration.Builder builder) { - super.initializeConstraints(builder); - if (((Builder) builder).recurrentConstraints != null) { + protected void initializeConstraints() { + super.initializeConstraints(); + if (getRecurrentConstraints() != null) { if (constraints == null) { constraints = new ArrayList<>(); } - for (LayerConstraint c : ((Builder) builder).recurrentConstraints) { + for (LayerConstraint c : getRecurrentConstraints()) { LayerConstraint c2 = c.clone(); Set s = new HashSet<>(); s.add(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS); @@ -81,6 +99,7 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.recurrent.GravesBidirectionalLSTM ret = new org.deeplearning4j.nn.layers.recurrent.GravesBidirectionalLSTM(lconf, networkDataType); ret.addTrainingListeners(trainingListeners); @@ -102,89 +121,5 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer { return LSTMHelpers.getMemoryReport(this, inputType); } - @AllArgsConstructor - @NoArgsConstructor - @Getter - @Setter - public static class Builder extends BaseRecurrentLayer.Builder { - - /** - * Set forget gate bias initalizations. Values in range 1-5 can potentially help with learning or longer-term - * dependencies. - */ - private double forgetGateBiasInit = 1.0; - - /** - * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or hard sigmoid, - * for example - * - */ - private IActivation gateActivationFn = new ActivationSigmoid(); - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in - * (non-CuDNN) implementation for GravesBidirectionalLSTM will be used - * - */ - protected boolean helperAllowFallback = true; - - /** - * Set forget gate bias initalizations. Values in range 1-5 can potentially help with learning or longer-term - * dependencies. - */ - public Builder forgetGateBiasInit(double biasInit) { - this.setForgetGateBiasInit(biasInit); - return this; - } - - /** - * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or hard sigmoid, - * for example - * - * @param gateActivationFn Activation function for the LSTM gates - */ - public Builder gateActivationFunction(String gateActivationFn) { - return gateActivationFunction(Activation.fromString(gateActivationFn)); - } - - /** - * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or hard sigmoid, - * for example - * - * @param gateActivationFn Activation function for the LSTM gates - */ - public Builder gateActivationFunction(Activation gateActivationFn) { - return gateActivationFunction(gateActivationFn.getActivationFunction()); - } - - /** - * Activation function for the LSTM gates. Note: This should be bounded to range 0-1: sigmoid or hard sigmoid, - * for example - * - * @param gateActivationFn Activation function for the LSTM gates - */ - public Builder gateActivationFunction(IActivation gateActivationFn) { - this.setGateActivationFn(gateActivationFn); - return this; - } - - /** - * When using a helper (CuDNN or MKLDNN in some cases) and an error is encountered, should fallback to the non-helper implementation be allowed? - * If set to false, an exception in the helper will be propagated back to the user. If false, the built-in - * (non-helper) implementation for GravesBidirectionalLSTM will be used - * - * @param allowFallback Whether fallback to non-helper implementation should be used - */ - public Builder helperAllowFallback(boolean allowFallback) { - this.setHelperAllowFallback(allowFallback); - return this; - } - - @SuppressWarnings("unchecked") - public GravesBidirectionalLSTM build() { - return new GravesBidirectionalLSTM(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java index 9c50ccba4..c5521f8b4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java @@ -20,7 +20,12 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -35,83 +40,74 @@ import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; - @Deprecated -@Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class GravesLSTM extends AbstractLSTM { - private double forgetGateBiasInit; - private IActivation gateActivationFn = new ActivationSigmoid(); + private double forgetGateBiasInit; + @Builder.Default @Getter private IActivation gateActivationFunction = new ActivationSigmoid(); - private GravesLSTM(Builder builder) { - super(builder); - this.forgetGateBiasInit = builder.forgetGateBiasInit; - this.gateActivationFn = builder.gateActivationFn; - - initializeConstraints(builder); + @Override + protected void initializeConstraints() { + super.initializeConstraints(); + if (getRecurrentConstraints() != null) { + if (constraints == null) { + constraints = new ArrayList<>(); + } + for (LayerConstraint c : getRecurrentConstraints()) { + LayerConstraint c2 = c.clone(); + c2.setParams(Collections.singleton(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY)); + constraints.add(c2); + } } + } - @Override - protected void initializeConstraints(LayerConfiguration.Builder builder) { - super.initializeConstraints(builder); - if (((Builder) builder).recurrentConstraints != null) { - if (constraints == null) { - constraints = new ArrayList<>(); - } - for (LayerConstraint c : ((Builder) builder).recurrentConstraints) { - LayerConstraint c2 = c.clone(); - c2.setParams(Collections.singleton(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY)); - constraints.add(c2); - } - } + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerValidation.assertNInNOutSet("GravesLSTM", getName(), layerIndex, getNIn(), getNOut()); + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + lconf.setNetConfiguration(conf); + runInheritance(); + + org.deeplearning4j.nn.layers.recurrent.GravesLSTM ret = + new org.deeplearning4j.nn.layers.recurrent.GravesLSTM(lconf, networkDataType); + + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return GravesLSTMParamInitializer.getInstance(); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + // TODO - CuDNN etc + return LSTMHelpers.getMemoryReport(this, inputType); + } + + public abstract static class GravesLSTMBuilder< + C extends GravesLSTM, B extends GravesLSTMBuilder> + extends AbstractLSTMBuilder { + public C build() { + C l = initBuild(); + l.initializeConstraints(); + return l; } - - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("GravesLSTM", getLayerName(), layerIndex, getNIn(), getNOut()); - - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - lconf.setNetConfiguration(conf); - runInheritance(); - - org.deeplearning4j.nn.layers.recurrent.GravesLSTM ret = - new org.deeplearning4j.nn.layers.recurrent.GravesLSTM(lconf, networkDataType); - - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public ParamInitializer initializer() { - return GravesLSTMParamInitializer.getInstance(); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - //TODO - CuDNN etc - return LSTMHelpers.getMemoryReport(this, inputType); - } - - @AllArgsConstructor - public static class Builder extends AbstractLSTM.Builder { - - @SuppressWarnings("unchecked") - public GravesLSTM build() { - return new GravesLSTM(this); - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java index 85c440c18..19b736993 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -30,8 +31,7 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.recurrent.LSTMHelpers; import org.deeplearning4j.nn.params.LSTMParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.IActivation; -import org.nd4j.linalg.activations.impl.ActivationSigmoid; +import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -41,29 +41,29 @@ import java.util.Collections; import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class LSTM extends AbstractLSTM { private double forgetGateBiasInit; - private IActivation gateActivationFn = new ActivationSigmoid(); - private LSTM(Builder builder) { - super(builder); - this.forgetGateBiasInit = builder.forgetGateBiasInit; - this.gateActivationFn = builder.gateActivationFn; - initializeConstraints(builder); + public static abstract class LSTMBuilder> extends AbstractLSTMBuilder { + @Override public C build() { + C l = this.initBuild(); + l.initializeConstraints(); + return l; + } } @Override - protected void initializeConstraints(LayerConfiguration.Builder builder) { - super.initializeConstraints(builder); - if (((Builder) builder).recurrentConstraints != null) { + protected void initializeConstraints() { + super.initializeConstraints(); + if (recurrentConstraints != null) { if (constraints == null) { constraints = new ArrayList<>(); } - for (LayerConstraint c : ((Builder) builder).recurrentConstraints) { + for (LayerConstraint c : recurrentConstraints) { LayerConstraint c2 = c.clone(); c2.setParams(Collections.singleton(LSTMParamInitializer.RECURRENT_WEIGHT_KEY)); constraints.add(c2); @@ -74,8 +74,10 @@ public class LSTM extends AbstractLSTM { @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("LSTM", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerValidation.assertNInNOutSet("LSTM", getName(), layerIndex, getNIn(), getNOut()); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.recurrent.LSTM ret = new org.deeplearning4j.nn.layers.recurrent.LSTM(lconf, networkDataType); ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); @@ -96,15 +98,4 @@ public class LSTM extends AbstractLSTM { //TODO - CuDNN etc return LSTMHelpers.getMemoryReport(this, inputType); } - - @NoArgsConstructor - public static class Builder extends AbstractLSTM.Builder { - - - @SuppressWarnings("unchecked") - public LSTM build() { - return new LSTM(this); - } - } - } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java index 394012c4f..06bcad3ef 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java @@ -20,21 +20,13 @@ package org.deeplearning4j.nn.conf.layers; -import com.fasterxml.jackson.annotation.JsonIdentityInfo; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.annotation.ObjectIdGenerators; import java.io.Serializable; import java.lang.reflect.Field; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import lombok.Data; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.NonNull; -import lombok.Setter; +import java.util.*; +import lombok.*; +import lombok.experimental.SuperBuilder; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.ILayerConfiguration; import net.brutex.ai.dnn.api.LayerType; @@ -48,370 +40,329 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -/** - * A neural network layer. - * - */ - +/** A neural network layer. */ @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") -@Data +@EqualsAndHashCode @NoArgsConstructor -@JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") +// @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") @Slf4j -public abstract class LayerConfiguration implements ILayerConfiguration, Serializable, Cloneable { // ITraininableLayerConfiguration +@SuperBuilder +public abstract class LayerConfiguration + implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration - protected String layerName; - @Getter - protected List variables = new ArrayList<>(); - protected List constraints; - protected IWeightNoise weightNoise; - private IDropout iDropout; - /** - * The type of the layer, basically defines the base class and its properties - */ - @Getter @Setter @NonNull - private LayerType type = LayerType.UNKNOWN; - @Getter @Setter - private NeuralNetConfiguration netConfiguration; - @Getter @Setter - private IActivation activationFn; + @Getter @Setter protected String name; + @Getter @Setter protected List allParamConstraints; + @Getter @Setter protected List weightConstraints; + @Getter @Setter protected List biasConstraints; + @Getter @Setter protected List constraints; + @Getter @Setter protected IWeightNoise weightNoise; + @Builder.Default private @Getter @Setter LinkedHashSet variables = new LinkedHashSet<>(); + @Getter @Setter private IDropout dropOut; + /** The type of the layer, basically defines the base class and its properties */ + @Builder.Default @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN; + /** + * A reference to the neural net configuration. This field is excluded from json serialization as + * well as from equals check to avoid circular referenced. + * + *

-- GETTER -- Get the neural net configuration this layer configuration is part of + * + * @return neural net configuration + *

-- SETTER -- Set the neural net configuration for this layer + * @param neural net configuration + */ + @Getter @Setter @JsonIgnore @EqualsAndHashCode.Exclude + private NeuralNetConfiguration netConfiguration; + /** + * From an Activation, we can derive the IActivation (function) using {@link + * Activation#getActivationFunction()} but not vice versa. The default is Identity Activation. + */ + @Builder.Default @Getter @Setter private IActivation activation = Activation.IDENTITY; - public LayerConfiguration(Builder builder) { - this.layerName = builder.layerName; - this.iDropout = builder.iDropout; - } - - public void addVariable(String s) {variables.add(s);} - - public String toJson() { - throw new RuntimeException("toJson is not implemented for LayerConfiguration"); - } - - /** - * Initialize the weight constraints. Should be called last, in the outer-most constructor - */ - protected void initializeConstraints(Builder builder) { - //Note: this has to be done AFTER all constructors have finished - otherwise the required - // fields may not yet be set yet - List allConstraints = new ArrayList<>(); - if (builder.allParamConstraints != null && !initializer().paramKeys(this).isEmpty()) { - for (LayerConstraint c : builder.allParamConstraints) { - LayerConstraint c2 = c.clone(); - c2.setParams(new HashSet<>(initializer().paramKeys(this))); - allConstraints.add(c2); - } - } - - if (builder.weightConstraints != null && !initializer().weightKeys(this).isEmpty()) { - for (LayerConstraint c : builder.weightConstraints) { - LayerConstraint c2 = c.clone(); - c2.setParams(new HashSet<>(initializer().weightKeys(this))); - allConstraints.add(c2); - } - } - - if (builder.biasConstraints != null && !initializer().biasKeys(this).isEmpty()) { - for (LayerConstraint c : builder.biasConstraints) { - LayerConstraint c2 = c.clone(); - c2.setParams(new HashSet<>(initializer().biasKeys(this))); - allConstraints.add(c2); - } - } - if (!allConstraints.isEmpty()) { - this.constraints = allConstraints; - } else { - this.constraints = null; - } - this.iDropout = builder.iDropout; - } - - /** - * Reset the learning related configs of the layer to default. When instantiated with a global - * neural network configuration the parameters specified in the neural network configuration - * will be used. For internal use with the transfer learning API. Users should not have to call - * this method directly. - */ - public void resetLayerDefaultConfig() { - //clear the learning related params for all layers in the origConf and set to defaults - this.iDropout = null; - this.constraints = null; - } - - /** - * Migration workaround //TODO To be removed - * - * @return a layer configuration - */ - @Deprecated - public LayerConfiguration getLayer() { - return this; - } - - @Override - public LayerConfiguration clone() { - try { - LayerConfiguration ret = (LayerConfiguration) super.clone(); - //Let's check for any INDArray fields and dup them (in case cloned layer will be used in different threads on CUDA... - // we don't want it being relocated contantly between devices) - Class c = getClass(); - while (c != Object.class) { - Field[] fields = c.getDeclaredFields(); - for (Field f : fields) { - if (f.getType() == INDArray.class) { - f.setAccessible(true); - INDArray toClone; - try { - toClone = (INDArray) f.get(this); - } catch (Exception e) { - throw new RuntimeException(e); - } - if (toClone != null) { - try { - f.set(this, toClone.dup()); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - } - } - - c = c.getSuperclass(); - } - - return ret; - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); - } - } - - public abstract org.deeplearning4j.nn.api.Layer instantiate( @NonNull NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType); - - /** - * @return The parameter initializer for this model - */ - public abstract ParamInitializer initializer(); - - /** - * For a given type of input to this layer, what is the type of the output? - * - * @param layerIndex Index of the layer - * @param inputType Type of input for the layer - * @return Type of output from the layer - * @throws IllegalStateException if input type is invalid for this layer - */ - public abstract InputType getOutputType(int layerIndex, InputType inputType); - - /** - * Set the nIn value (number of inputs, or input channels for CNNs) based on the given input - * type - * - * @param inputType Input type for this layer - * @param override If false: only set the nIn value if it's not already set. If true: set it - * regardless of whether it's already set or not. - * @throws IllegalStateException if input type is invalid for this layer - */ - public abstract void setNIn(InputType inputType, boolean override); - - /** - * For the given type of input to this layer, what preprocessor (if any) is required?
- * Returns null if no preprocessor is required, otherwise returns an appropriate {@link - * InputPreProcessor} for this layer, such as a {@link org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor} - * - * @param inputType InputType to this layer - * @return Null if no preprocessor is required, otherwise the type of preprocessor necessary for - * this layer/input combination - * @throws IllegalStateException if input type is invalid for this layer - */ - public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); - - /** - * Get the regularization types (l1/l2/weight decay) for the given parameter. Different parameters may have different - * regularization types. - * - * @param paramName Parameter name ("W", "b" etc) - * @return Regularization types (if any) for the specified parameter - */ - public abstract List getRegularizationByParam(String paramName); - - /** - * Is the specified parameter a layerwise pretraining only parameter?
For example, visible - * bias params in an autoencoder (or, decoder params in a variational autoencoder) aren't used - * during supervised backprop.
Layers (like DenseLayerConfiguration, etc) with no pretrainable parameters - * will return false for all (valid) inputs. - * - * @param paramName Parameter name/key - * @return True if the parameter is for layerwise pretraining only, false otherwise - */ - public abstract boolean isPretrainParam(String paramName); - - /** - * Get the updater for the given parameter. Typically, the same updater will be used for all - * parameters, but this is not necessarily the case - * - * @param paramName Parameter name - * @return IUpdater for the parameter - */ - public IUpdater getUpdaterByParam(String paramName) { - throw new UnsupportedOperationException( - "Not supported: all layers with parameters should override this method"); - } - - public IUpdater getIUpdater() { - throw new UnsupportedOperationException( - "Not supported: all layers with parameters should override this method"); - } - - public void setIUpdater(IUpdater iUpdater) { - log.warn("Setting an IUpdater on {} with name {} has no effect.", getClass().getSimpleName(), getLayerName()); - } - - /** - * This is a report of the estimated memory consumption for the given layer - * - * @param inputType Input type to the layer. Memory consumption is often a function of the input - * type - * @return Memory report for the layer - */ - public abstract LayerMemoryReport getMemoryReport(InputType inputType); - - public void clearVariables() { - this.variables.clear(); + /** + * Get the activation interface (function) from the activation. The activation must have been set + * prior to this call. + * + * @return activation function + */ + public IActivation getActivationFn() { + if (activation == null) + throw new RuntimeException( + "No activation is set for this Layer. Cannot return activation function."); + return activation; } - /** - * Inherit setting from neural network for those settings, that are not already set or do have - * a layer(type) specific default. This implementation does not require the neural network configuration to be - * the same as the one returned from this layers {@link #getNetConfiguration()}. - * - * @param conf a neural net configration to inherit parameters from - * - */ - public void runInheritance(@NonNull NeuralNetConfiguration conf) { - if(this.activationFn == null ) this.activationFn = conf.getActivation(); - if(this.iDropout == null ) this.iDropout = conf.getIdropOut(); - if(this.weightNoise == null) this.weightNoise = conf.getWeightNoise(); + public void addVariable(String s) { + variables.add(s); + } + + public String toJson() { + throw new RuntimeException("toJson is not implemented for LayerConfiguration"); + } + + /** Initialize the weight constraints. Should be called last, in the outer-most constructor */ + protected void initializeConstraints() { + // Note: this has to be done AFTER all constructors have finished - otherwise the required + // fields may not yet be set yet + List allConstraints = new ArrayList<>(); + if (allParamConstraints != null && !initializer().paramKeys(this).isEmpty()) { + for (LayerConstraint c : allParamConstraints) { + LayerConstraint c2 = c.clone(); + c2.setParams(new HashSet<>(initializer().paramKeys(this))); + allConstraints.add(c2); + } } - /** Runs {@link #runInheritance(NeuralNetConfiguration)} using the layers configurations embedded neural net - * configuration (the one returned from {@link #getNetConfiguration()}. - */ - public void runInheritance() { - runInheritance(getNetConfiguration()); + if (weightConstraints != null && !initializer().weightKeys(this).isEmpty()) { + for (LayerConstraint c : weightConstraints) { + LayerConstraint c2 = c.clone(); + c2.setParams(new HashSet<>(initializer().weightKeys(this))); + allConstraints.add(c2); + } } - @SuppressWarnings("unchecked") - @Getter - @Setter - public abstract static class Builder> { + if (biasConstraints != null && !initializer().biasKeys(this).isEmpty()) { + for (LayerConstraint c : biasConstraints) { + LayerConstraint c2 = c.clone(); + c2.setParams(new HashSet<>(initializer().biasKeys(this))); + allConstraints.add(c2); + } + } + if (!allConstraints.isEmpty()) { + this.constraints = allConstraints; + } else { + this.constraints = null; + } + } - protected String layerName; + /** + * Reset the learning related configs of the layer to default. When instantiated with a global + * neural network configuration the parameters specified in the neural network configuration will + * be used. For internal use with the transfer learning API. Users should not have to call this + * method directly. + */ + public void resetLayerDefaultConfig() { + // clear the learning related params for all layers in the origConf and set to defaults + this.dropOut = null; + this.constraints = null; + } - protected List allParamConstraints; + /** + * Migration workaround //TODO To be removed + * + * @return a layer configuration + */ + @Deprecated + @JsonIgnore + public LayerConfiguration getLayer() { + return this; + } - protected List weightConstraints; - - protected List biasConstraints; - - protected IDropout iDropout; - - /** - * ILayer name assigns layer string name. Allows easier differentiation between layers. - */ - public T name(String layerName) { - this.setLayerName(layerName); - return (T) this; - } - - /** - * Dropout probability. This is the probability of retaining each input activation - * value for a layer. dropOut(x) will keep an input activation with probability x, and set - * to 0 with probability 1-x.
dropOut(0.0) is a special value / special case - when set - * to 0.0., dropout is disabled (not applied). Note that a dropout value of 1.0 is - * functionally equivalent to no dropout: i.e., 100% probability of retaining each input - * activation.
When useDropConnect(boolean) is set to true (false by default), this - * method sets the drop connect probability instead. - *

- * Note 1: Dropout is applied at training time only - and is automatically not applied at - * test time (for evaluation, etc)
Note 2: This sets the probability per-layer. Care - * should be taken when setting lower values for complex networks (too much information may - * be lost with aggressive (very low) dropout values).
Note 3: Frequently, dropout is - * not applied to (or, has higher retain probability for) input (first layer) layers. - * Dropout is also often not applied to output layers. This needs to be handled MANUALLY by - * the user - set .dropout(0) on those layers when using global dropout setting.
Note 4: - * Implementation detail (most users can ignore): DL4J uses inverted dropout, as described - * here: - * http://cs231n.github.io/neural-networks-2/ - *

- * - * @param inputRetainProbability Dropout probability (probability of retaining each input - * activation value for a layer) - * @see #dropOut(IDropout) - */ - public T dropOut(double inputRetainProbability) { - if (inputRetainProbability == 0.0) { - return dropOut(null); + @Override + public LayerConfiguration clone() { + try { + LayerConfiguration ret = (LayerConfiguration) super.clone(); + // Let's check for any INDArray fields and dup them (in case cloned layer will be used in + // different threads on CUDA... + // we don't want it being relocated contantly between devices) + Class c = getClass(); + while (c != Object.class) { + Field[] fields = c.getDeclaredFields(); + for (Field f : fields) { + if (f.getType() == INDArray.class) { + f.setAccessible(true); + INDArray toClone; + try { + toClone = (INDArray) f.get(this); + } catch (Exception e) { + throw new RuntimeException(e); } - return dropOut(new Dropout(inputRetainProbability)); + if (toClone != null) { + try { + f.set(this, toClone.dup()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } } - /** - * Set the dropout for all layers in this network - * - * @param dropout Dropout, such as {@link Dropout}, {@link org.deeplearning4j.nn.conf.dropout.GaussianDropout}, - * {@link org.deeplearning4j.nn.conf.dropout.GaussianNoise} etc - */ - public T dropOut(IDropout dropout) { - this.setIDropout(dropout); - return (T) this; - } + c = c.getSuperclass(); + } - /** - * Set constraints to be applied to this layer. Default: no constraints.
Constraints can - * be used to enforce certain conditions (non-negativity of parameters, max-norm - * regularization, etc). These constraints are applied at each iteration, after the - * parameters have been updated. - * - * @param constraints Constraints to apply to all parameters of this layer - */ - public T constrainAllParameters(LayerConstraint... constraints) { - this.setAllParamConstraints(Arrays.asList(constraints)); - return (T) this; - } - - /** - * Set constraints to be applied to bias parameters of this layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of - * parameters, max-norm regularization, etc). These constraints are applied at each - * iteration, after the parameters have been updated. - * - * @param constraints Constraints to apply to all bias parameters of this layer - */ - public T constrainBias(LayerConstraint... constraints) { - this.setBiasConstraints(Arrays.asList(constraints)); - return (T) this; - } - - /** - * Set constraints to be applied to the weight parameters of this layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of - * parameters, max-norm regularization, etc). These constraints are applied at each - * iteration, after the parameters have been updated. - * - * @param constraints Constraints to apply to all weight parameters of this layer - */ - public T constrainWeights(LayerConstraint... constraints) { - this.setWeightConstraints(Arrays.asList(constraints)); - return (T) this; - } - - public abstract E build(); + return ret; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); } + } + + public abstract org.deeplearning4j.nn.api.Layer instantiate( + @NonNull NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType); + + /** + * @return The parameter initializer for this model + */ + public abstract ParamInitializer initializer(); + + /** + * For a given type of input to this layer, what is the type of the output? + * + * @param layerIndex Index of the layer + * @param inputType Type of input for the layer + * @return Type of output from the layer + * @throws IllegalStateException if input type is invalid for this layer + */ + public abstract InputType getOutputType(int layerIndex, InputType inputType); + + /** + * Set the nIn value (number of inputs, or input channels for CNNs) based on the given input type + * + * @param inputType Input type for this layer + * @param override If false: only set the nIn value if it's not already set. If true: set it + * regardless of whether it's already set or not. + * @throws IllegalStateException if input type is invalid for this layer + */ + public abstract void setNIn(InputType inputType, boolean override); + + /** + * For the given type of input to this layer, what preprocessor (if any) is required?
+ * Returns null if no preprocessor is required, otherwise returns an appropriate {@link + * InputPreProcessor} for this layer, such as a {@link + * org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor} + * + * @param inputType InputType to this layer + * @return Null if no preprocessor is required, otherwise the type of preprocessor necessary for + * this layer/input combination + * @throws IllegalStateException if input type is invalid for this layer + */ + public abstract InputPreProcessor getPreProcessorForInputType(InputType inputType); + + /** + * Get the regularization types (l1/l2/weight decay) for the given parameter. Different parameters + * may have different regularization types. + * + * @param paramName Parameter name ("W", "b" etc) + * @return Regularization types (if any) for the specified parameter + */ + public abstract List getRegularizationByParam(String paramName); + + /** + * Is the specified parameter a layerwise pretraining only parameter?
+ * For example, visible bias params in an autoencoder (or, decoder params in a variational + * autoencoder) aren't used during supervised backprop.
+ * Layers (like DenseLayerConfiguration, etc) with no pretrainable parameters will return false + * for all (valid) inputs. + * + * @param paramName Parameter name/key + * @return True if the parameter is for layerwise pretraining only, false otherwise + */ + public abstract boolean isPretrainParam(String paramName); + + /** + * Get the updater for the given parameter. Typically, the same updater will be used for all + * parameters, but this is not necessarily the case + * + * @param paramName Parameter name + * @return IUpdater for the parameter + */ + public IUpdater getUpdaterByParam(String paramName) { + throw new UnsupportedOperationException( + "Not supported: all layers with parameters should override this method"); + } + + public IUpdater getIUpdater() { + throw new UnsupportedOperationException( + "Not supported: all layers with parameters should override this method"); + } + + public void setIUpdater(IUpdater iUpdater) { + log.warn( + "Setting an IUpdater on {} with name {} has no effect.", getClass().getSimpleName(), name); + } + + /** + * This is a report of the estimated memory consumption for the given layer + * + * @param inputType Input type to the layer. Memory consumption is often a function of the input + * type + * @return Memory report for the layer + */ + public abstract LayerMemoryReport getMemoryReport(InputType inputType); + + public void clearVariables() { + this.variables.clear(); + } + + /** + * Inherit setting from neural network for those settings, that are not already set or do have a + * layer(type) specific default. This implementation does not require the neural network + * configuration to be the same as the one returned from this layers {@link + * #getNetConfiguration()}. + * + * @param conf a neural net configration to inherit parameters from + */ + public void runInheritance(@NonNull NeuralNetConfiguration conf) { + if (this.activation == null) this.activation = conf.getActivation(); + if (this.dropOut == null) this.dropOut = conf.getIdropOut(); + if (this.weightNoise == null) this.weightNoise = conf.getWeightNoise(); + } + + /** + * Runs {@link #runInheritance(NeuralNetConfiguration)} using the layers configurations embedded + * neural net configuration (the one returned from {@link #getNetConfiguration()}. + */ + public void runInheritance() { + runInheritance(getNetConfiguration()); + } + + public abstract static class LayerConfigurationBuilder< + C extends LayerConfiguration, B extends LayerConfigurationBuilder> { + + public B dropOut(double d) { + this.dropOut(new Dropout(d)); + return self(); + } + + public B dropOut(IDropout d) { + this.dropOut = d; + return self(); + } + + /** + * Set constraints to be applied to all layers. Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated.
+ * Note: values set by this method will be applied to all applicable layers in the network, + * unless a different value is explicitly set on a given layer. In other words: values set via + * this method are used as the default value, and can be overridden on a per-layer basis. + * + * @param constraints Constraints to apply to all bias parameters of all layers + */ + public B constrainBias(LayerConstraint... constraints) { + biasConstraints = Arrays.asList(constraints); + return self(); + } + + /** + * we are doing this to avoid BUG https://github.com/projectlombok/lombok/issues/3419 as some + * child classes may specify their own buildMethodName in @SuperBuilder, but we use only + * "initBuild" here consequently + * @return + */ + public C initBuild() { + return build(); + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java index fcde1b127..1fd8db106 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java @@ -203,9 +203,9 @@ public class LayerValidation { } } - if (bLayer.getIDropout() == null) { + if (bLayer.getDropOut() == null) { - bLayer.setIDropout(iDropout); + bLayer.setDropOut(iDropout); } } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java index f469a7f8c..a68ae9872 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; @@ -32,223 +34,158 @@ import org.nd4j.autodiff.samediff.SDIndex; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Map; @Data @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class LearnedSelfAttentionLayer extends SameDiffLayer { - private long nIn; - private long nOut; - private int nHeads; - private long headSize; - private boolean projectInput; - private int nQueries; + private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; + private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; + private static final String WEIGHT_KEY_VALUE_PROJECTION = "Wv"; + private static final String WEIGHT_KEY_OUT_PROJECTION = "Wo"; + private static final String WEIGHT_QUERIES = "Q"; + /** Number of inputs to the layer (input size) */ + private int nIn; + /** Number of outputs (output size) */ + private int nOut; + /** Number of Attention Heads */ + private int nHeads; + /** Size of attention heads */ + private int headSize; + /** Project input before applying attention or not. */ + private boolean projectInput; + /** Number of queries to learn */ + private int nQueries; - private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; - private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; - private static final String WEIGHT_KEY_VALUE_PROJECTION = "Wv"; - private static final String WEIGHT_KEY_OUT_PROJECTION = "Wo"; - private static final String WEIGHT_QUERIES = "Q"; - private LearnedSelfAttentionLayer(){/*No arg constructor for serialization*/} + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); + } - protected LearnedSelfAttentionLayer(Builder builder){ - super(builder); - nIn = builder.nIn; - nOut = builder.nOut; - nHeads = builder.nHeads; - headSize = builder.headSize == 0 ? nOut / nHeads : builder.headSize; - projectInput = builder.projectInput; - nQueries = builder.nQueries; + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for Learned Self Attention layer (layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW,getLayerName()); + if (nIn <= 0 || override) { + InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; + this.nIn = (int) r.getSize(); + } + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for Learned Self Attention layer (layer index = " + + layerIndex + + ", layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); } - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for Learned Self Attention layer (layer name = \"" + getLayerName() - + "\"): expect RNN input type with size > 0. Got: " + inputType); - } - - if (nIn <= 0 || override) { - InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; - this.nIn = r.getSize(); - } + if (projectInput) { + return InputType.recurrent(nOut, nQueries); + } else { + return InputType.recurrent(nIn, nQueries); } + } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for Learned Self Attention layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect RNN input type with size > 0. Got: " - + inputType); - } + @Override + public void defineParameters(SDLayerParams params) { + params.clear(); - if(projectInput){ - return InputType.recurrent(nOut, nQueries); - }else{ - return InputType.recurrent(nIn, nQueries); - } + params.addWeightParam(WEIGHT_QUERIES, 1, nIn, nQueries); + + if (projectInput) { + params.addWeightParam(WEIGHT_KEY_QUERY_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_KEY_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_VALUE_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_OUT_PROJECTION, nHeads * headSize, nOut); } + } - @Override - public void defineParameters(SDLayerParams params) { - params.clear(); - - params.addWeightParam(WEIGHT_QUERIES, 1, nIn, nQueries); - - if(projectInput){ - params.addWeightParam(WEIGHT_KEY_QUERY_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_KEY_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_VALUE_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_OUT_PROJECTION, nHeads * headSize, nOut); + @Override + public void initializeParameters(Map params) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + for (Map.Entry e : params.entrySet()) { + if (e.getKey().equals(WEIGHT_KEY_OUT_PROJECTION)) { + WeightInitUtil.initWeights( + nIn, headSize, e.getValue().shape(), weightInit, null, 'c', e.getValue()); + } else if (e.getKey().equals(WEIGHT_QUERIES)) { + WeightInitUtil.initWeights( + nIn, nQueries, e.getValue().shape(), weightInit, null, 'c', e.getValue()); + } else { + WeightInitUtil.initWeights( + nHeads * headSize, nOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); } + } } + } - @Override - public void initializeParameters(Map params) { - try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - for (Map.Entry e : params.entrySet()) { - if(e.getKey().equals(WEIGHT_KEY_OUT_PROJECTION)){ - WeightInitUtil.initWeights(nIn, headSize, e.getValue().shape(), weightInit, null, 'c', e.getValue()); - }else if(e.getKey().equals(WEIGHT_QUERIES)){ - WeightInitUtil.initWeights(nIn, nQueries, e.getValue().shape(), weightInit, null, 'c', e.getValue()); - }else{ - WeightInitUtil.initWeights(nHeads * headSize, nOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); - } - } - } + @Override + public SDVariable defineLayer( + SameDiff sameDiff, + SDVariable layerInput, + Map paramTable, + SDVariable mask) { + val baseQueries = paramTable.get(WEIGHT_QUERIES); + val batchSize = layerInput.shape().get(SDIndex.point(0)); + val tileAxis = + sameDiff.scatterUpdate( + sameDiff.onesLike(layerInput.shape()), sameDiff.constant(0), batchSize); + + val queries = sameDiff.tile(baseQueries, tileAxis); + + if (projectInput) { + val Wq = paramTable.get(WEIGHT_KEY_QUERY_PROJECTION); + val Wk = paramTable.get(WEIGHT_KEY_KEY_PROJECTION); + val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); + val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); + + return sameDiff.nn.multiHeadDotProductAttention( + getName(), queries, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true); + } else { + return sameDiff.nn.dotProductAttention( + getName(), queries, layerInput, layerInput, mask, true); } + } + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + // No further mask propagation here, as the results have taken any mask into account, like in a + // global pooling layer + return null; + } - @Override - public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable, SDVariable mask) { - val baseQueries = paramTable.get(WEIGHT_QUERIES); - val batchSize = layerInput.shape().get(SDIndex.point(0)); - val tileAxis = sameDiff.scatterUpdate(sameDiff.onesLike(layerInput.shape()), sameDiff.constant(0), batchSize); + public static abstract class LearnedSelfAttentionLayerBuilder< + C extends LearnedSelfAttentionLayer, B extends LearnedSelfAttentionLayerBuilder> + extends SameDiffLayerBuilder { + public C build() { + Preconditions.checkArgument( + this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); + Preconditions.checkArgument( + this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); + Preconditions.checkArgument( + !this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); + Preconditions.checkArgument( + this.nOut % nHeads == 0 || headSize > 0, + "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); + Preconditions.checkArgument(this.nQueries > 0, "You must set numQueries."); - val queries = sameDiff.tile(baseQueries, tileAxis); - - if(projectInput){ - val Wq = paramTable.get(WEIGHT_KEY_QUERY_PROJECTION); - val Wk = paramTable.get(WEIGHT_KEY_KEY_PROJECTION); - val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); - val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); - - return sameDiff.nn.multiHeadDotProductAttention(getLayerName(), queries, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true); - }else{ - return sameDiff.nn.dotProductAttention(getLayerName(), queries, layerInput, layerInput, mask, true); - } - } - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - // No further mask propagation here, as the results have taken any mask into account, like in a global pooling layer - return null; - } - - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder { - - /** - * Number of inputs to the layer (input size) - */ - private int nIn; - - /** - * Number of outputs (output size) - */ - private int nOut; - - /** - * Number of Attention Heads - */ - private int nHeads; - - /** - * Size of attention heads - */ - private int headSize; - - /** - * Project input before applying attention or not. - */ - private boolean projectInput; - - - /** - * Number of queries to learn - */ - private int nQueries; - - /** - * @param nIn Number of inputs to the layer (input size) - */ - public Builder nIn(int nIn) { - this.nIn = nIn; - return this; - } - - /** - * @param nOut Number of outputs (output size) - */ - public Builder nOut(int nOut) { - this.nOut = nOut; - return this; - } - - /** - * Number of Attention Heads - */ - public Builder nHeads(int nHeads){ - this.nHeads = nHeads; - return this; - } - - /** - * Size of attention heads - */ - public Builder headSize(int headSize){ - this.headSize = headSize; - return this; - } - - /** - * Project input before applying attention or not. - */ - public Builder projectInput(boolean projectInput){ - this.projectInput = projectInput; - return this; - } - - /** - * Number of queries to learn - */ - public Builder nQueries(int nQueries){ - this.nQueries = nQueries; - return this; - } - - @Override - @SuppressWarnings("unchecked") - public LearnedSelfAttentionLayer build() { - Preconditions.checkArgument(this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); - Preconditions.checkArgument(this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); - Preconditions.checkArgument(!this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); - Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); - Preconditions.checkArgument(this.nQueries > 0, "You must set numQueries."); - - return new LearnedSelfAttentionLayer(this); - } + return initBuild(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java index 75397400b..1dbf9a9c5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java @@ -20,7 +20,11 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.List; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -35,248 +39,148 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.Collection; -import java.util.List; -import java.util.Map; - @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class LocalResponseNormalization extends LayerConfiguration { - // Defaults as per http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf - //Set defaults here as well as in builder, in case users use no-arg constructor instead of builder - protected double n = 5; // # adjacent kernal maps - protected double k = 2; // constant (e.g. scale) - protected double beta = 0.75; // decay rate - protected double alpha = 1e-4; // decay rate - protected boolean cudnnAllowFallback = true; - protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; + /** Number of adjacent kernel maps to use when doing LRN. default: 5 */ + protected @lombok.Builder.Default double n = 5; // # adjacent kernal maps + /** LRN scaling constant k. Default: 2 */ + protected @lombok.Builder.Default double k = 2; // constant (e.g. scale) + /** Scaling constant beta. Default: 0.75 */ + protected @lombok.Builder.Default double beta = 0.75; // decay rate + /** LRN scaling constant alpha. Default: 1e-4 */ + protected @lombok.Builder.Default double alpha = 1e-4; // decay rate - private LocalResponseNormalization(Builder builder) { - super(builder); - this.k = builder.k; - this.n = builder.n; - this.alpha = builder.alpha; - this.beta = builder.beta; - this.cudnnAllowFallback = builder.cudnnAllowFallback; - this.dataFormat = builder.dataFormat; + // Defaults as per http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf + // Set defaults here as well as in builder, in case users use no-arg constructor instead of + // builder + // defaults based on AlexNet model + /** + * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation + * be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If + * false, the built-in (non-CuDNN) implementation for BatchNormalization will be used + */ + protected @lombok.Builder.Default boolean cudnnAllowFallback = true; + + protected @lombok.Builder.Default CNN2DFormat dataFormat = CNN2DFormat.NCHW; + + public static LocalResponseNormalizationBuilder builder() { + return innerBuilder(); + } + + public static LocalResponseNormalizationBuilder builder( + double k, double n, double alpha, double beta) { + return innerBuilder().k(k).n(n).alpha(alpha).beta(beta); + } + + public static LocalResponseNormalizationBuilder builder( + double k, double alpha, double beta) { + return innerBuilder().k(k).alpha(alpha).beta(beta); + } + + @Override + public LocalResponseNormalization clone() { + LocalResponseNormalization clone = (LocalResponseNormalization) super.clone(); + return clone; + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization ret = + new org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization( + lconf, networkDataType); + + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalStateException( + "Invalid input type for LRN layer (layer index = " + + layerIndex + + ", layer name = \"" + + getName() + + "\"): Expected input of type CNN, got " + + inputType); + } + return inputType; + } + + @Override + public void setNIn(InputType inputType, boolean override) { + Preconditions.checkState( + inputType.getType() == InputType.Type.CNN, + "Only CNN input types can be used with LocalResponseNormalisation, got %s", + inputType); + this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input type for LRN layer (layer name = \"" + getName() + "\"): null"); } - @Override - public LocalResponseNormalization clone() { - LocalResponseNormalization clone = (LocalResponseNormalization) super.clone(); - return clone; + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); + } + + @Override + public List getRegularizationByParam(String paramName) { + return null; + } + + @Override + public boolean isPretrainParam(String paramName) { + return false; // No params in LRN + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + val actElementsPerEx = inputType.arrayElementsPerExample(); + + // Forward pass: 3x input size as working memory, in addition to output activations + // Backward pass: 2x input size as working memory, in addition to epsilons + + return new LayerMemoryReport.Builder(name, DenseLayer.class, inputType, inputType) + .standardMemory(0, 0) + .workingMemory(0, 2 * actElementsPerEx, 0, 3 * actElementsPerEx) + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, + MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching in DenseLayerConfiguration + .build(); + } + + public static abstract class LocalResponseNormalizationBuilder< + C extends LocalResponseNormalization, B extends LocalResponseNormalizationBuilder> + extends LayerConfigurationBuilder { + public B helperAllowFallback(boolean b) { + this.cudnnAllowFallback$value = b; + this.cudnnAllowFallback$set = true; + return self(); } - - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization ret = - new org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization(lconf, networkDataType); - - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } - - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException( - "Invalid input type for LRN layer (layer index = " + layerIndex + ", layer name = \"" - + getLayerName() + "\"): Expected input of type CNN, got " + inputType); - } - return inputType; - } - - @Override - public void setNIn(InputType inputType, boolean override) { - Preconditions.checkState(inputType.getType() == InputType.Type.CNN, "Only CNN input types can be used with LocalResponseNormalisation, got %s", inputType); - this.dataFormat = ((InputType.InputTypeConvolutional)inputType).getFormat(); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException( - "Invalid input type for LRN layer (layer name = \"" + getLayerName() + "\"): null"); - } - - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); - } - - @Override - public List getRegularizationByParam(String paramName) { - return null; - } - - @Override - public boolean isPretrainParam(String paramName) { - return false; //No params in LRN - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - val actElementsPerEx = inputType.arrayElementsPerExample(); - - //Forward pass: 3x input size as working memory, in addition to output activations - //Backward pass: 2x input size as working memory, in addition to epsilons - - return new LayerMemoryReport.Builder(layerName, DenseLayer.class, inputType, inputType).standardMemory(0, 0) - .workingMemory(0, 2 * actElementsPerEx, 0, 3 * actElementsPerEx) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayerConfiguration - .build(); - } - - @AllArgsConstructor - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - - // defaults based on AlexNet model - - /** - * LRN scaling constant k. Default: 2 - * - */ - private double k = 2; - - /** - * Number of adjacent kernel maps to use when doing LRN. default: 5 - * - */ - private double n = 5; - - /** - * LRN scaling constant alpha. Default: 1e-4 - * - */ - private double alpha = 1e-4; - - /** - * Scaling constant beta. Default: 0.75 - * - */ - private double beta = 0.75; - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in - * (non-CuDNN) implementation for BatchNormalization will be used - * - */ - protected boolean cudnnAllowFallback = true; - - protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; - - public Builder(double k, double n, double alpha, double beta) { - this(k, n, alpha, beta, true, CNN2DFormat.NCHW); - } - - public Builder(double k, double alpha, double beta) { - this.setK(k); - this.setAlpha(alpha); - this.setBeta(beta); - } - - public Builder() {} - - /** - * LRN scaling constant k. Default: 2 - * - * @param k Scaling constant - */ - public Builder k(double k) { - this.setK(k); - return this; - } - - /** - * Number of adjacent kernel maps to use when doing LRN. default: 5 - * - * @param n Number of adjacent kernel maps - */ - public Builder n(double n) { - this.setN(n); - return this; - } - - /** - * LRN scaling constant alpha. Default: 1e-4 - * - * @param alpha Scaling constant - */ - public Builder alpha(double alpha) { - this.setAlpha(alpha); - return this; - } - - /** - * Scaling constant beta. Default: 0.75 - * - * @param beta Scaling constant - */ - public Builder beta(double beta) { - this.setBeta(beta); - return this; - } - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If true, the built-in - * (non-CuDNN) implementation for BatchNormalization will be used - * - * @deprecated Use {@link #helperAllowFallback(boolean)} - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - @Deprecated - public Builder cudnnAllowFallback(boolean allowFallback) { - this.setCudnnAllowFallback(allowFallback); - return this; - } - - /** - * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed? - * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in - * (non-MKL/CuDNN) implementation for LocalResponseNormalizationLayer will be used - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - public Builder helperAllowFallback(boolean allowFallback) { - this.cudnnAllowFallback = allowFallback; - return this; - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param dataFormat Format for activations (in and out) - */ - public Builder dataFormat(CNN2DFormat dataFormat){ - this.dataFormat = dataFormat; - return this; - } - - @Override - public LocalResponseNormalization build() { - return new LocalResponseNormalization(this); - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java index ea679c9d4..bb4883b35 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.*; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -41,332 +44,232 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -import java.util.*; @Data @EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties({"paramShapes"}) +@SuperBuilder(buildMethodName = "initBuild") public class LocallyConnected1D extends SameDiffLayer { - private static final List WEIGHT_KEYS = Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); - private static final List BIAS_KEYS = Collections.singletonList(ConvolutionParamInitializer.BIAS_KEY); - private static final List PARAM_KEYS = - Arrays.asList(ConvolutionParamInitializer.BIAS_KEY, ConvolutionParamInitializer.WEIGHT_KEY); - - private long nIn; - private long nOut; - private Activation activation; - private int kernel; - private int stride; - private int padding; - private int paddingR; //Right/bottom padding - private ConvolutionMode cm; - private int dilation; - private boolean hasBias; - private int inputSize; - private int outputSize; - private int featureDim; - - protected LocallyConnected1D(Builder builder) { - super(builder); - this.nIn = builder.nIn; - this.nOut = builder.nOut; - this.activation = builder.activation; - this.kernel = builder.kernel; - this.stride = builder.stride; - this.padding = builder.padding; - this.cm = builder.cm; - this.dilation = builder.dilation; - this.hasBias = builder.hasBias; - this.inputSize = builder.inputSize; - this.featureDim = kernel * (int) nIn; + public static abstract class LocallyConnected1DBuilder> extends + SameDiffLayerBuilder { + public C build() { + Convolution1DUtils.validateConvolutionModePadding(convolutionMode$value, padding$value); + Convolution1DUtils.validateCnn1DKernelStridePadding(kernelSize$value, stride$value, padding$value); + C l = initBuild(); + return l; } + } - private LocallyConnected1D() { - //No arg constructor for Jackson/JSON serialization + private static final List WEIGHT_KEYS = + Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); + private static final List BIAS_KEYS = + Collections.singletonList(ConvolutionParamInitializer.BIAS_KEY); + private static final List PARAM_KEYS = + Arrays.asList(ConvolutionParamInitializer.BIAS_KEY, ConvolutionParamInitializer.WEIGHT_KEY); + /** + * @param nIn Number of inputs to the layer (input size) + */ + private long nIn; + /** + * @param nOut Number of outputs (output size) + */ + private long nOut; + /** + * @param activation Activation function for the layer + */ + @Builder.Default private Activation activation = Activation.TANH; + /** Kernel size for the layer */ + @Builder.Default private int kernelSize = 2; + /** Stride for the layer */ + @Builder.Default private int stride = 1; + /** Padding for the layer. Not used if {@link ConvolutionMode#Same} is set */ + @Builder.Default private int padding = 0; + + private int paddingR; // Right/bottom padding + /** Convolution mode for the layer. See {@link ConvolutionMode} for details */ + @Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Same; + + /** Dilation for the layer */ + @Builder.Default private int dilation = 1; + + /** If true (default is false) the layer will have a bias */ + @Builder.Default private boolean hasBias = true; + + private int inputSize; + private int outputSize; + private int featureDim; + + public void computeOutputSize() { + int nIn = (int) getNIn(); + if (inputSize == 0) { + throw new IllegalArgumentException("Input size has to be set for Locally connected layers"); } + int[] inputShape = new int[] {1, nIn, inputSize}; + INDArray dummyInputForShapeInference = Nd4j.ones(inputShape); - public void computeOutputSize() { - int nIn = (int) getNIn(); - if (inputSize == 0) { - throw new IllegalArgumentException("Input size has to be set for Locally connected layers"); - } - int[] inputShape = new int[] {1, nIn, inputSize}; - INDArray dummyInputForShapeInference = Nd4j.ones(inputShape); + if (convolutionMode == ConvolutionMode.Same) { + this.outputSize = + Convolution1DUtils.getOutputSize( + dummyInputForShapeInference, kernelSize, stride, 0, convolutionMode, dilation); + this.padding = + Convolution1DUtils.getSameModeTopLeftPadding( + outputSize, inputSize, kernelSize, stride, dilation); + this.paddingR = + Convolution1DUtils.getSameModeBottomRightPadding( + outputSize, inputSize, kernelSize, stride, dilation); + } else { + this.outputSize = + Convolution1DUtils.getOutputSize( + dummyInputForShapeInference, kernelSize, stride, padding, convolutionMode, dilation); + } + } - if (cm == ConvolutionMode.Same) { - this.outputSize = Convolution1DUtils.getOutputSize(dummyInputForShapeInference, kernel, stride, 0, cm, - dilation); - this.padding = Convolution1DUtils.getSameModeTopLeftPadding(outputSize, inputSize, kernel, stride, dilation); - this.paddingR = Convolution1DUtils.getSameModeBottomRightPadding(outputSize, inputSize, kernel, stride, dilation); + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalArgumentException( + "Provided input type for locally connected 1D layers has to be " + + "of CNN1D/RNN type, got: " + + inputType); + } + // dynamically compute input size from input type + InputType.InputTypeRecurrent rnnType = (InputType.InputTypeRecurrent) inputType; + this.inputSize = (int) rnnType.getTimeSeriesLength(); + computeOutputSize(); + + return InputTypeUtil.getOutputTypeCnn1DLayers( + inputType, + kernelSize, + stride, + padding, + 1, + convolutionMode, + nOut, + layerIndex, + getName(), + LocallyConnected1D.class); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (nIn <= 0 || override) { + InputType.InputTypeRecurrent c = (InputType.InputTypeRecurrent) inputType; + this.nIn = c.getSize(); + } + if (featureDim <= 0 || override) { + InputType.InputTypeRecurrent c = (InputType.InputTypeRecurrent) inputType; + this.featureDim = kernelSize * (int) c.getSize(); + } + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); + } + + @Override + public void defineParameters(SDLayerParams params) { + Preconditions.checkState( + featureDim > 0, "Cannot initialize layer: Feature dimension is set to %s", featureDim); + params.clear(); + val weightsShape = new long[] {outputSize, featureDim, nOut}; + params.addWeightParam(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); + if (hasBias) { + val biasShape = new long[] {nOut}; + params.addBiasParam(ConvolutionParamInitializer.BIAS_KEY, biasShape); + } + } + + @Override + public void initializeParameters(Map params) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + for (Map.Entry e : params.entrySet()) { + if (ConvolutionParamInitializer.BIAS_KEY.equals(e.getKey())) { + e.getValue().assign(0); } else { - this.outputSize = Convolution1DUtils.getOutputSize(dummyInputForShapeInference, kernel, stride, padding, cm, - dilation); + double fanIn = nIn * kernelSize; + double fanOut = nOut * kernelSize / ((double) stride); + WeightInitUtil.initWeights( + fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); } + } + } + } + + @Override + public SDVariable defineLayer( + SameDiff sameDiff, + SDVariable layerInput, + Map paramTable, + SDVariable mask) { + SDVariable w = + paramTable.get(ConvolutionParamInitializer.WEIGHT_KEY); // (outH, featureDim, nOut) + + int outH = outputSize; + int sH = stride; + int kH = kernelSize; + + if (padding > 0 || (convolutionMode == ConvolutionMode.Same && paddingR > 0)) { + // Note: for same mode, bottom/right padding can be 1 more than top/left padding + // NCW format. + if (convolutionMode == ConvolutionMode.Same) { + layerInput = + sameDiff + .nn() + .pad( + layerInput, + sameDiff.constant( + Nd4j.createFromArray(new int[][] {{0, 0}, {0, 0}, {padding, paddingR}})), + PadMode.CONSTANT, + 0); + } else { + layerInput = + sameDiff + .nn() + .pad( + layerInput, + sameDiff.constant( + Nd4j.createFromArray(new int[][] {{0, 0}, {0, 0}, {padding, padding}})), + PadMode.CONSTANT, + 0); + } } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalArgumentException("Provided input type for locally connected 1D layers has to be " - + "of CNN1D/RNN type, got: " + inputType); - } - // dynamically compute input size from input type - InputType.InputTypeRecurrent rnnType = (InputType.InputTypeRecurrent) inputType; - this.inputSize = (int) rnnType.getTimeSeriesLength(); - computeOutputSize(); - - return InputTypeUtil.getOutputTypeCnn1DLayers(inputType, kernel, stride, padding, 1, cm, nOut, layerIndex, - getLayerName(), LocallyConnected1D.class); + SDVariable[] inputArray = new SDVariable[outH]; + for (int i = 0; i < outH; i++) { + SDVariable slice = + layerInput.get( + SDIndex.all(), // miniBatch + SDIndex.all(), // nIn + SDIndex.interval(i * sH, i * sH + kH) // kernel + ); + inputArray[i] = sameDiff.reshape(slice, 1, -1, featureDim); } + SDVariable concatOutput = sameDiff.concat(0, inputArray); // (outH, miniBatch, featureDim) - @Override - public void setNIn(InputType inputType, boolean override) { - if (nIn <= 0 || override) { - InputType.InputTypeRecurrent c = (InputType.InputTypeRecurrent) inputType; - this.nIn = c.getSize(); - } - if(featureDim <= 0 || override){ - InputType.InputTypeRecurrent c = (InputType.InputTypeRecurrent) inputType; - this.featureDim = kernel * (int) c.getSize(); - } + SDVariable mmulResult = sameDiff.mmul(concatOutput, w); // (outH, miniBatch, nOut) + + SDVariable result = sameDiff.permute(mmulResult, 1, 2, 0); // (miniBatch, nOut, outH) + + if (hasBias) { + SDVariable b = paramTable.get(ConvolutionParamInitializer.BIAS_KEY); + SDVariable biasAddedResult = sameDiff.nn().biasAdd(result, b, true); + return activation.asSameDiff("out", sameDiff, biasAddedResult); + } else { + return activation.asSameDiff("out", sameDiff, result); } + } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getLayerName()); + @Override + public void applyGlobalConfigToLayer( + NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { + NeuralNetConfiguration global_conf = globalConfig.build(); + if (activation == null) { + activation = SameDiffLayerUtils.fromIActivation(global_conf.getActivation()); } - - @Override - public void defineParameters(SDLayerParams params) { - Preconditions.checkState(featureDim > 0, "Cannot initialize layer: Feature dimension is set to %s", featureDim); - params.clear(); - val weightsShape = new long[] {outputSize, featureDim, nOut}; - params.addWeightParam(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); - if (hasBias) { - val biasShape = new long[] {nOut}; - params.addBiasParam(ConvolutionParamInitializer.BIAS_KEY, biasShape); - } - } - - @Override - public void initializeParameters(Map params) { - try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - for (Map.Entry e : params.entrySet()) { - if (ConvolutionParamInitializer.BIAS_KEY.equals(e.getKey())) { - e.getValue().assign(0); - } else { - double fanIn = nIn * kernel; - double fanOut = nOut * kernel / ((double) stride); - WeightInitUtil.initWeights(fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', - e.getValue()); - } - } - } - } - - @Override - public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable, SDVariable mask) { - SDVariable w = paramTable.get(ConvolutionParamInitializer.WEIGHT_KEY); // (outH, featureDim, nOut) - - int outH = outputSize; - int sH = stride; - int kH = kernel; - - if(padding > 0 || (cm == ConvolutionMode.Same && paddingR > 0)){ - //Note: for same mode, bottom/right padding can be 1 more than top/left padding - //NCW format. - if(cm == ConvolutionMode.Same) { - layerInput = sameDiff.nn().pad(layerInput, - sameDiff.constant(Nd4j.createFromArray(new int[][]{{0, 0}, {0, 0}, {padding, paddingR}})), PadMode.CONSTANT, 0); - } else { - layerInput = sameDiff.nn().pad(layerInput, - sameDiff.constant(Nd4j.createFromArray(new int[][]{{0, 0}, {0, 0}, {padding, padding}})), PadMode.CONSTANT, 0); - } - } - - SDVariable[] inputArray = new SDVariable[outH]; - for (int i = 0; i < outH; i++) { - SDVariable slice = layerInput.get(SDIndex.all(), // miniBatch - SDIndex.all(), // nIn - SDIndex.interval(i * sH, i * sH + kH) // kernel - ); - inputArray[i] = sameDiff.reshape(slice, 1, -1, featureDim); - } - SDVariable concatOutput = sameDiff.concat(0, inputArray); // (outH, miniBatch, featureDim) - - SDVariable mmulResult = sameDiff.mmul(concatOutput, w); // (outH, miniBatch, nOut) - - SDVariable result = sameDiff.permute(mmulResult, 1, 2, 0); // (miniBatch, nOut, outH) - - if (hasBias) { - SDVariable b = paramTable.get(ConvolutionParamInitializer.BIAS_KEY); - SDVariable biasAddedResult = sameDiff.nn().biasAdd(result, b, true); - return activation.asSameDiff("out", sameDiff, biasAddedResult); - } else { - return activation.asSameDiff("out", sameDiff, result); - } - - } - - @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { - NeuralNetConfiguration global_conf = globalConfig.build(); - if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(global_conf.getActivation()); - } - if (cm == null) { - cm = global_conf.getConvolutionMode(); - } - } - - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder { - - /** - * Number of inputs to the layer (input size) - */ - private int nIn; - - /** - * Number of outputs (output size) - */ - private int nOut; - - /** - * Activation function for the layer - */ - private Activation activation = Activation.TANH; - - /** - * Kernel size for the layer - */ - private int kernel = 2; - - /** - * Stride for the layer - */ - private int stride = 1; - - /** - * Padding for the layer. Not used if {@link ConvolutionMode#Same} is set - */ - private int padding = 0; - - /** - * Dilation for the layer - */ - private int dilation = 1; - - /** - * Input filter size for this locally connected 1D layer - * - */ - @Setter(AccessLevel.NONE) - private int inputSize; - - /** - * Convolution mode for the layer. See {@link ConvolutionMode} for details - */ - private ConvolutionMode cm = ConvolutionMode.Same; - - /** - * If true (default is false) the layer will have a bias - */ - private boolean hasBias = true; - - /** - * @param nIn Number of inputs to the layer (input size) - */ - public Builder nIn(int nIn) { - this.setNIn(nIn); - return this; - } - - /** - * @param nOut Number of outputs (output size) - */ - public Builder nOut(int nOut) { - this.setNOut(nOut); - return this; - } - - /** - * @param activation Activation function for the layer - */ - public Builder activation(Activation activation) { - this.setActivation(activation); - return this; - } - - /** - * @param k Kernel size for the layer - */ - public Builder kernelSize(int k) { - this.setKernel(k); - return this; - } - - /** - * @param s Stride for the layer - */ - public Builder stride(int s) { - this.setStride(s); - return this; - } - - /** - * @param p Padding for the layer. Not used if {@link ConvolutionMode#Same} is set - */ - public Builder padding(int p) { - this.setPadding(p); - return this; - } - - /** - * @param cm Convolution mode for the layer. See {@link ConvolutionMode} for details - */ - public Builder convolutionMode(ConvolutionMode cm) { - this.setCm(cm); - return this; - } - - /** - * @param d Dilation for the layer - */ - public Builder dilation(int d) { - this.setDilation(d); - return this; - } - - /** - * @param hasBias If true (default is false) the layer will have a bias - */ - public Builder hasBias(boolean hasBias) { - this.setHasBias(hasBias); - return this; - } - - /** - * Set input filter size for this locally connected 1D layer - * - * @param inputSize height of the input filters - * @return Builder - */ - public Builder setInputSize(int inputSize) { - this.inputSize = inputSize; - return this; - } - - @Override - @SuppressWarnings("unchecked") - public LocallyConnected1D build() { - Convolution1DUtils.validateConvolutionModePadding(cm, padding); - Convolution1DUtils.validateCnn1DKernelStridePadding(kernel, stride, padding); - return new LocallyConnected1D(this); - } + if (convolutionMode == null) { + convolutionMode = global_conf.getConvolutionMode(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java index 5dd5ec62e..95cb18054 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java @@ -20,14 +20,17 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.*; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer; import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; +import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.nn.weights.WeightInitUtil; @@ -38,398 +41,319 @@ import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.enums.PadMode; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -import java.util.*; @Data @EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties({"paramShapes"}) +@SuperBuilder(buildMethodName = "initBuild") public class LocallyConnected2D extends SameDiffLayer { - private static final List WEIGHT_KEYS = Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); - private static final List BIAS_KEYS = Collections.singletonList(ConvolutionParamInitializer.BIAS_KEY); - private static final List PARAM_KEYS = - Arrays.asList(ConvolutionParamInitializer.BIAS_KEY, ConvolutionParamInitializer.WEIGHT_KEY); + private static final List WEIGHT_KEYS = + Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); + private static final List BIAS_KEYS = + Collections.singletonList(ConvolutionParamInitializer.BIAS_KEY); + private static final List PARAM_KEYS = + Arrays.asList(ConvolutionParamInitializer.BIAS_KEY, ConvolutionParamInitializer.WEIGHT_KEY); + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; + /** Number of inputs to the layer (input size) */ + private int nIn; + /** Number of outputs (output size) */ + private int nOut; + /** Activation function for the layer */ + @Builder.Default private Activation activation = Activation.TANH; + /** Kernel size for the layer. Must be 2 values (height/width) */ + @Builder.Default private int[] kernel = new int[] {2, 2}; + /** Stride for the layer. Must be 2 values (height/width) */ + @Builder.Default private int[] stride = new int[] {1, 1}; + /** + * Padding for the layer. Not used if {@link ConvolutionMode#Same} is set. Must be 2 values + * (height/width) + */ + @Builder.Default private int[] padding = new int[] {0, 0}; - private long nIn; - private long nOut; - private Activation activation; - private int[] kernel; - private int[] stride; - private int[] padding; - private int[] paddingBr; - private ConvolutionMode cm; - private int[] dilation; - private boolean hasBias; - private int[] inputSize; - private int[] outputSize; - private int featureDim; - protected CNN2DFormat format = CNN2DFormat.NCHW; + private int[] paddingBr; + /** Convolution mode for the layer. See {@link ConvolutionMode} for details */ + @Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Same; + /** Dilation for the layer. Must be 2 values (height/width) */ + @Builder.Default private int[] dilation = new int[] {1, 1}; + /** + * @param hasBias If true (default is false) the layer will have a bias + */ + @Builder.Default private boolean hasBias = false; + /** + * Set input filter size (h,w) for this locally connected 2D layer + * + * @param inputSize pair of height and width of the input filters to this layer + * @return Builder + */ + private int[] inputSize; - protected LocallyConnected2D(Builder builder) { - super(builder); - this.nIn = builder.nIn; - this.nOut = builder.nOut; - this.activation = builder.activation; - this.kernel = builder.kernel; - this.stride = builder.stride; - this.padding = builder.padding; - this.cm = builder.cm; - this.dilation = builder.dilation; - this.hasBias = builder.hasBias; - this.inputSize = builder.inputSize; - this.featureDim = kernel[0] * kernel[1] * (int) nIn; - this.format = builder.format; + private int[] outputSize; + + private int featureDim; + + public void computeOutputSize() { + int nIn = (int) getNIn(); + + if (inputSize == null) { + throw new IllegalArgumentException( + "Input size has to be specified for locally connected layers."); } - private LocallyConnected2D() { - //No arg constructor for Jackson/JSON serialization + boolean nchw = dataFormat == CNN2DFormat.NCHW; + + int[] inputShape = + nchw + ? new int[] {1, nIn, inputSize[0], inputSize[1]} + : new int[] {1, inputSize[0], inputSize[1], nIn}; + INDArray dummyInputForShapeInference = Nd4j.ones(inputShape); + + if (convolutionMode == ConvolutionMode.Same) { + this.outputSize = + ConvolutionUtils.getOutputSize( + dummyInputForShapeInference, + kernel, + stride, + null, + convolutionMode, + dilation, + dataFormat); + this.padding = + ConvolutionUtils.getSameModeTopLeftPadding( + outputSize, inputSize, kernel, stride, dilation); + this.paddingBr = + ConvolutionUtils.getSameModeBottomRightPadding( + outputSize, inputSize, kernel, stride, dilation); + } else { + this.outputSize = + ConvolutionUtils.getOutputSize( + dummyInputForShapeInference, + kernel, + stride, + padding, + convolutionMode, + dilation, + dataFormat); } + } - public void computeOutputSize() { - int nIn = (int) getNIn(); + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalArgumentException( + "Provided input type for locally connected 2D layers has to be " + + "of CNN type, got: " + + inputType); + } + // dynamically compute input size from input type + InputType.InputTypeConvolutional cnnType = (InputType.InputTypeConvolutional) inputType; + this.inputSize = new int[] {(int) cnnType.getHeight(), (int) cnnType.getWidth()}; + computeOutputSize(); - if (inputSize == null) { - throw new IllegalArgumentException("Input size has to be specified for locally connected layers."); - } + return InputTypeUtil.getOutputTypeCnnLayers( + inputType, + kernel, + stride, + padding, + new int[] {1, 1}, + convolutionMode, + nOut, + layerIndex, + getName(), + dataFormat, + LocallyConnected2D.class); + } - boolean nchw = format == CNN2DFormat.NCHW; + @Override + public void setNIn(InputType inputType, boolean override) { + if (nIn <= 0 || override) { + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + this.nIn = (int) c.getChannels(); + this.featureDim = kernel[0] * kernel[1] * (int) nIn; + } + this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + } - int[] inputShape = nchw ? new int[] {1, nIn, inputSize[0], inputSize[1]} : new int[] {1, inputSize[0], inputSize[1], nIn}; - INDArray dummyInputForShapeInference = Nd4j.ones(inputShape); + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); + } - if (cm == ConvolutionMode.Same) { - this.outputSize = ConvolutionUtils.getOutputSize(dummyInputForShapeInference, kernel, stride, null, cm, - dilation, format); - this.padding = ConvolutionUtils.getSameModeTopLeftPadding(outputSize, inputSize, kernel, stride, dilation); - this.paddingBr = ConvolutionUtils.getSameModeBottomRightPadding(outputSize, inputSize, kernel, stride, dilation); + @Override + public void defineParameters(SDLayerParams params) { + params.clear(); + val weightsShape = new long[] {(long) outputSize[0] * outputSize[1], featureDim, nOut}; + params.addWeightParam(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); + if (hasBias) { + val biasShape = new long[] {nOut}; + params.addBiasParam(ConvolutionParamInitializer.BIAS_KEY, biasShape); + } + } + + @Override + public void initializeParameters(Map params) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + for (Map.Entry e : params.entrySet()) { + if (ConvolutionParamInitializer.BIAS_KEY.equals(e.getKey())) { + e.getValue().assign(0); } else { - this.outputSize = ConvolutionUtils.getOutputSize(dummyInputForShapeInference, kernel, stride, padding, cm, - dilation, format); + double fanIn = nIn * kernel[0] * kernel[1]; + double fanOut = nOut * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]); + WeightInitUtil.initWeights( + fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); } + } + } + } + + @Override + public SDVariable defineLayer( + SameDiff sameDiff, + SDVariable layerInput, + Map paramTable, + SDVariable mask) { + + SDVariable w = paramTable.get(ConvolutionParamInitializer.WEIGHT_KEY); + + long[] inputShape = layerInput.getShape(); + long miniBatch = inputShape[0]; + int outH = outputSize[0]; + int outW = outputSize[1]; + int sH = stride[0]; + int sW = stride[1]; + int kH = kernel[0]; + int kW = kernel[1]; + + boolean nchw = dataFormat == CNN2DFormat.NCHW; + if (!nchw) layerInput = layerInput.permute(0, 3, 1, 2); // NHWC to NCHW + + if (padding[0] > 0 + || padding[1] > 0 + || (convolutionMode == ConvolutionMode.Same && (paddingBr[0] > 0 || paddingBr[1] > 0))) { + // Note: for same mode, bottom/right padding can be 1 more than top/left padding + // NCHW format + if (convolutionMode == ConvolutionMode.Same) { + layerInput = + sameDiff + .nn() + .pad( + layerInput, + sameDiff.constant( + Nd4j.createFromArray( + new int[][] { + {0, 0}, {0, 0}, {padding[0], paddingBr[0]}, {padding[1], paddingBr[1]} + })), + PadMode.CONSTANT, + 0.0); + } else { + layerInput = + sameDiff + .nn() + .pad( + layerInput, + sameDiff.constant( + Nd4j.createFromArray( + new int[][] { + {0, 0}, {0, 0}, {padding[0], padding[0]}, {padding[1], padding[1]} + })), + PadMode.CONSTANT, + 0.0); + } } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalArgumentException("Provided input type for locally connected 2D layers has to be " - + "of CNN type, got: " + inputType); - } - // dynamically compute input size from input type - InputType.InputTypeConvolutional cnnType = (InputType.InputTypeConvolutional) inputType; - this.inputSize = new int[] {(int) cnnType.getHeight(), (int) cnnType.getWidth()}; - computeOutputSize(); - - return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, padding, new int[] {1, 1}, cm, nOut, - layerIndex, getLayerName(), format, LocallyConnected2D.class); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - if (nIn <= 0 || override) { - InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - this.nIn = c.getChannels(); - this.featureDim = kernel[0] * kernel[1] * (int) nIn; - } - this.format = ((InputType.InputTypeConvolutional)inputType).getFormat(); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); - } - - @Override - public void defineParameters(SDLayerParams params) { - params.clear(); - val weightsShape = new long[] {(long) outputSize[0] * outputSize[1], featureDim, nOut}; - params.addWeightParam(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); - if (hasBias) { - val biasShape = new long[] {nOut}; - params.addBiasParam(ConvolutionParamInitializer.BIAS_KEY, biasShape); - } - } - - @Override - public void initializeParameters(Map params) { - try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - for (Map.Entry e : params.entrySet()) { - if (ConvolutionParamInitializer.BIAS_KEY.equals(e.getKey())) { - e.getValue().assign(0); - } else { - double fanIn = nIn * kernel[0] * kernel[1]; - double fanOut = nOut * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]); - WeightInitUtil.initWeights(fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', - e.getValue()); - } - } - } - } - - @Override - public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable, SDVariable mask) { - - SDVariable w = paramTable.get(ConvolutionParamInitializer.WEIGHT_KEY); - - long[] inputShape = layerInput.getShape(); - long miniBatch = inputShape[0]; - int outH = outputSize[0]; - int outW = outputSize[1]; - int sH = stride[0]; - int sW = stride[1]; - int kH = kernel[0]; - int kW = kernel[1]; - - boolean nchw = format == CNN2DFormat.NCHW; - if(!nchw) - layerInput = layerInput.permute(0,3,1,2); //NHWC to NCHW - - if(padding[0] > 0 || padding[1] > 0 || (cm == ConvolutionMode.Same && (paddingBr[0] > 0 || paddingBr[1] > 0))){ - //Note: for same mode, bottom/right padding can be 1 more than top/left padding - //NCHW format - if(cm == ConvolutionMode.Same){ - layerInput = sameDiff.nn().pad(layerInput, - sameDiff.constant(Nd4j.createFromArray(new int[][]{{0,0},{0,0},{padding[0], paddingBr[0]}, {padding[1], paddingBr[1]}})), PadMode.CONSTANT, 0.0); - } else { - layerInput = sameDiff.nn().pad(layerInput, - sameDiff.constant(Nd4j.createFromArray(new int[][]{{0,0},{0,0},{padding[0], padding[0]}, {padding[1], padding[1]}})), PadMode.CONSTANT, 0.0); - } - } - - SDVariable[] inputArray = new SDVariable[outH * outW]; - for (int i = 0; i < outH; i++) { - for (int j = 0; j < outW; j++) { - SDVariable slice = layerInput.get(SDIndex.all(), // miniBatch - SDIndex.all(), // nIn - SDIndex.interval(i * sH, i * sH + kH), // kernel height - SDIndex.interval(j * sW, j * sW + kW) // kernel width + SDVariable[] inputArray = new SDVariable[outH * outW]; + for (int i = 0; i < outH; i++) { + for (int j = 0; j < outW; j++) { + SDVariable slice = + layerInput.get( + SDIndex.all(), // miniBatch + SDIndex.all(), // nIn + SDIndex.interval(i * sH, i * sH + kH), // kernel height + SDIndex.interval(j * sW, j * sW + kW) // kernel width ); - inputArray[i * outH + j] = sameDiff.reshape(slice, 1, miniBatch, featureDim); - } - } - SDVariable concatOutput = sameDiff.concat(0, inputArray); // (outH * outW, miniBatch, featureDim) + inputArray[i * outH + j] = sameDiff.reshape(slice, 1, miniBatch, featureDim); + } + } + SDVariable concatOutput = + sameDiff.concat(0, inputArray); // (outH * outW, miniBatch, featureDim) - SDVariable mmulResult = sameDiff.mmul(concatOutput, w); // (outH * outW, miniBatch, nOut) + SDVariable mmulResult = sameDiff.mmul(concatOutput, w); // (outH * outW, miniBatch, nOut) - SDVariable reshapeResult = sameDiff.reshape(mmulResult, outH, outW, miniBatch, nOut); + SDVariable reshapeResult = sameDiff.reshape(mmulResult, outH, outW, miniBatch, nOut); - SDVariable permutedResult = nchw ? reshapeResult.permute(2, 3, 0, 1) : reshapeResult.permute(2, 0, 1, 3); // (mb, nOut, outH, outW) or (mb, outH, outW, nOut) + SDVariable permutedResult = + nchw + ? reshapeResult.permute(2, 3, 0, 1) + : reshapeResult.permute(2, 0, 1, 3); // (mb, nOut, outH, outW) or (mb, outH, outW, nOut) - if (hasBias) { - SDVariable b = paramTable.get(ConvolutionParamInitializer.BIAS_KEY); - SDVariable biasAddedResult = sameDiff.nn().biasAdd(permutedResult, b, nchw); - return activation.asSameDiff("out", sameDiff, biasAddedResult); - } else { - return activation.asSameDiff("out", sameDiff, permutedResult); - } + if (hasBias) { + SDVariable b = paramTable.get(ConvolutionParamInitializer.BIAS_KEY); + SDVariable biasAddedResult = sameDiff.nn().biasAdd(permutedResult, b, nchw); + return activation.asSameDiff("out", sameDiff, biasAddedResult); + } else { + return activation.asSameDiff("out", sameDiff, permutedResult); + } + } + + @Override + public void applyGlobalConfigToLayer( + NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { + NeuralNetConfiguration gconf = globalConfig.build(); + if (activation == null) { + activation = SameDiffLayerUtils.fromIActivation(gconf.getActivation()); + } + if (convolutionMode == null) { + convolutionMode = gconf.getConvolutionMode(); + } + } + + public static abstract class LocallyConnected2DBuilder< + C extends LocallyConnected2D, B extends LocallyConnected2DBuilder> + extends SameDiffLayerBuilder { + public C build() { + featureDim(kernel$value[0] * kernel$value[1] * (int) nIn); + C l = initBuild(); + return l; } - @Override - public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { - NeuralNetConfiguration gconf = globalConfig.build(); - if (activation == null) { - activation = SameDiffLayerUtils.fromIActivation(gconf.getActivation()); - } - if (cm == null) { - cm = gconf.getConvolutionMode(); - } + public B kernelSize(int ... kernel) { + this.kernel$value = ValidationUtils.validate2NonNegative(kernel, false, "kernel"); + this.kernel$set = true; + return self(); } - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder { - - /** - * Number of inputs to the layer (input size) - */ - private int nIn; - - /** - * Number of outputs (output size) - */ - private int nOut; - - /** - * Activation function for the layer - */ - private Activation activation = Activation.TANH; - - /** - * Kernel size for the layer. Must be 2 values (height/width) - */ - @Setter(AccessLevel.NONE) - private int[] kernel = new int[] {2, 2}; - - /** - * Stride for the layer. Must be 2 values (height/width) - */ - @Setter(AccessLevel.NONE) - private int[] stride = new int[] {1, 1}; - - /** - * Padding for the layer. Not used if {@link ConvolutionMode#Same} is set. Must be 2 values (height/width) - */ - @Setter(AccessLevel.NONE) - private int[] padding = new int[] {0, 0}; - - /** - * Dilation for the layer. Must be 2 values (height/width) - */ - @Setter(AccessLevel.NONE) - private int[] dilation = new int[] {1, 1}; - - /** - * Set input filter size (h,w) for this locally connected 2D layer - * - */ - @Setter(AccessLevel.NONE) - private int[] inputSize; - - /** - * Convolution mode for the layer. See {@link ConvolutionMode} for details - */ - private ConvolutionMode cm = ConvolutionMode.Same; - - /** - * If true (default is false) the layer will have a bias - */ - private boolean hasBias = true; - - protected CNN2DFormat format = CNN2DFormat.NCHW; - - - /** - * @param kernel Kernel size for the layer. Must be 2 values (height/width) - */ - public void setKernel(int... kernel) { - this.kernel = ValidationUtils.validate2NonNegative(kernel, false, "kernel"); - } - - /** - * @param stride Stride for the layer. Must be 2 values (height/width) - */ - public void setStride(int... stride) { - this.stride = ValidationUtils.validate2NonNegative(stride, false, "stride"); - } - - /** - * @param padding Padding for the layer. Not used if {@link ConvolutionMode#Same} is set. Must be 2 values (height/width) - */ - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate2NonNegative(padding, false, "padding"); - } - - /** - * @param dilation Dilation for the layer. Must be 2 values (height/width) - */ - public void setDilation(int... dilation) { - this.dilation = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); - } - - /** - * @param nIn Number of inputs to the layer (input size) - */ - public Builder nIn(int nIn) { - this.setNIn(nIn); - return this; - } - - /** - * @param nOut Number of outputs (output size) - */ - public Builder nOut(int nOut) { - this.setNOut(nOut); - return this; - } - - /** - * @param activation Activation function for the layer - */ - public Builder activation(Activation activation) { - this.setActivation(activation); - return this; - } - - /** - * @param k Kernel size for the layer. Must be 2 values (height/width) - */ - public Builder kernelSize(int... k) { - this.setKernel(k); - return this; - } - - /** - * @param s Stride for the layer. Must be 2 values (height/width) - */ - public Builder stride(int... s) { - this.setStride(s); - return this; - } - - /** - * @param p Padding for the layer. Not used if {@link ConvolutionMode#Same} is set. Must be 2 values (height/width) - */ - public Builder padding(int... p) { - this.setPadding(p); - return this; - } - - /** - * @param cm Convolution mode for the layer. See {@link ConvolutionMode} for details - */ - public Builder convolutionMode(ConvolutionMode cm) { - this.setCm(cm); - return this; - } - - /** - * @param d Dilation for the layer. Must be 2 values (height/width) - */ - public Builder dilation(int... d) { - this.setDilation(d); - return this; - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param format Format for activations (in and out) - */ - public Builder dataFormat(CNN2DFormat format){ - this.format = format; - return this; - } - - /** - * @param hasBias If true (default is false) the layer will have a bias - */ - public Builder hasBias(boolean hasBias) { - this.setHasBias(hasBias); - return this; - } - - /** - * Set input filter size (h,w) for this locally connected 2D layer - * - * @param inputSize pair of height and width of the input filters to this layer - * @return Builder - */ - public Builder setInputSize(int... inputSize) { - this.inputSize = ValidationUtils.validate2(inputSize, false, "inputSize"); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public LocallyConnected2D build() { - ConvolutionUtils.validateConvolutionModePadding(cm, padding); - ConvolutionUtils.validateCnnKernelStridePadding(kernel, stride, padding); - return new LocallyConnected2D(this); - } + public B inputSize(int ... size) { + this.inputSize = size; + return self(); } + + public B stride(int ... stride) { + this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride"); + this.stride$set = true; + return self(); + } + + public B padding(int ... padding) { + this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding"); + this.padding$set = true; + return self(); + } + + public B dilation(int ... dilation) { + this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); + this.dilation$set = true; + return self(); + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java index 226d3255d..2384f506c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java @@ -20,10 +20,13 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -32,100 +35,75 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.linalg.lossfunctions.LossFunctions; - -import java.util.Collection; -import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class LossLayer extends FeedForwardLayer { - protected ILossFunction lossFn; + protected ILossFunction lossFunction; - protected LossLayer(Builder builder) { - super(builder); - this.lossFn = builder.lossFn; + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.LossLayer ret = + new org.deeplearning4j.nn.layers.LossLayer(lconf, networkDataType); + + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException("LossLayer does not contain parameters"); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + // During inference and training: dup the input array. But, this counts as *activations* not + // working memory + return new LayerMemoryReport.Builder(name, LossLayer.class, inputType, inputType) + .standardMemory(0, 0) // No params + .workingMemory(0, 0, 0, 0) + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + public static abstract class LossLayerBuilder< + C extends LossLayer, B extends LossLayerBuilder> + extends FeedForwardLayerBuilder { + @Override + public B nIn(long nIn) { + throw new UnsupportedOperationException( + "Ths layer has no parameters, thus nIn will always equal nOut."); } @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - org.deeplearning4j.nn.layers.LossLayer ret = new org.deeplearning4j.nn.layers.LossLayer(lconf, networkDataType); - - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public boolean isPretrainParam(String paramName) { - throw new UnsupportedOperationException("LossLayer does not contain parameters"); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - //During inference and training: dup the input array. But, this counts as *activations* not working memory - return new LayerMemoryReport.Builder(layerName, LossLayer.class, inputType, inputType).standardMemory(0, 0) //No params - .workingMemory(0, 0, 0, 0) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); - } - - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } - - public static class Builder extends BaseOutputLayer.Builder { - - public Builder() { - this.activation(Activation.IDENTITY); - } - - /** - * @param lossFunction Loss function for the loss layer - */ - public Builder(LossFunctions.LossFunction lossFunction) { - lossFunction(lossFunction); - this.activation(Activation.IDENTITY); - } - - /** - * @param lossFunction Loss function for the loss layer - */ - public Builder(ILossFunction lossFunction) { - this.setLossFn(lossFunction); - this.activation(Activation.IDENTITY); - } - - @Override - @SuppressWarnings("unchecked") - public Builder nIn(int nIn) { - throw new UnsupportedOperationException("Ths layer has no parameters, thus nIn will always equal nOut."); - } - - @Override - @SuppressWarnings("unchecked") - public Builder nOut(int nOut) { - throw new UnsupportedOperationException("Ths layer has no parameters, thus nIn will always equal nOut."); - } - - @Override - @SuppressWarnings("unchecked") - public LossLayer build() { - return new LossLayer(this); - } + public B nOut(long nOut) { + throw new UnsupportedOperationException( + "Ths layer has no parameters, thus nIn will always equal nOut."); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java index 57a58f42c..78ae44d93 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java @@ -20,54 +20,50 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.NoArgsConstructor; +import java.util.List; +import lombok.experimental.SuperBuilder; import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.List; - -@NoArgsConstructor +@SuperBuilder public abstract class NoParamLayer extends LayerConfiguration { - protected NoParamLayer(Builder builder) { + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } - super(builder); - setType(LayerType.POOL); - } + @Override + public void setNIn(InputType inputType, boolean override) { + // No op in most no param layers + } - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } + @Override + public List getRegularizationByParam(String paramName) { + // No parameters -> no regularization of parameters + return null; + } - @Override - public void setNIn(InputType inputType, boolean override) { - //No op in most no param layers - } + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException( + getClass().getSimpleName() + " does not contain parameters"); + } - @Override - public List getRegularizationByParam(String paramName){ - //No parameters -> no regularization of parameters - return null; - } - - @Override - public boolean isPretrainParam(String paramName) { - throw new UnsupportedOperationException(getClass().getSimpleName() + " does not contain parameters"); - } - -/** -* + /** * @return -*/ - @Override - public IUpdater getIUpdater() { - return Updater.NONE.getIUpdaterWithDefaultConfig(); - } + */ + @Override + public IUpdater getIUpdater() { + return Updater.NONE.getIUpdaterWithDefaultConfig(); + } + + public static abstract class NoParamLayerBuilder> + extends LayerConfigurationBuilder + {} } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java index 2884ac424..48e4b9f1a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java @@ -20,87 +20,77 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; -import lombok.ToString; +import java.util.Collection; +import java.util.Map; + +import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.impl.ActivationSoftmax; +import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; - -import java.util.Collection; -import java.util.Map; +import org.nd4j.linalg.lossfunctions.LossFunctions; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class OutputLayer extends BaseOutputLayer { - protected OutputLayer(Builder builder) { - super(builder); - initializeConstraints(builder); + { // Set default activation function to softmax (to match default loss function MCXENT) + setActivation(Activation.SOFTMAX.getActivationFunction()); + } + + public static OutputLayerBuilder builder() { + return innerBuilder(); + } + + public static OutputLayerBuilder builder(@NonNull LossFunctions.LossFunction lossFn) { + return innerBuilder().lossFunction(lossFn); + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerValidation.assertNInNOutSet("OutputLayer", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + + org.deeplearning4j.nn.layers.OutputLayer ret = + new org.deeplearning4j.nn.layers.OutputLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return DefaultParamInitializer.getInstance(); + } + + public static abstract class OutputLayerBuilder< + C extends OutputLayer, B extends OutputLayerBuilder> + extends BaseOutputLayerBuilder { + public C build() { + C l = this.initBuild(); + l.initializeConstraints(); + return l; } - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("OutputLayer", getLayerName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - runInheritance(); - org.deeplearning4j.nn.layers.OutputLayer ret = new org.deeplearning4j.nn.layers.OutputLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public ParamInitializer initializer() { - return DefaultParamInitializer.getInstance(); - } - - public static class Builder extends BaseOutputLayer.Builder { - - public Builder() { - //Set default activation function to softmax (to match default loss function MCXENT) - this.setActivationFn(new ActivationSoftmax()); - } - - /** - * @param lossFunction Loss function for the output layer - */ - public Builder(LossFunction lossFunction) { - super.lossFunction(lossFunction); - //Set default activation function to softmax (for consistent behaviour with no-arg constructor) - this.setActivationFn(new ActivationSoftmax()); - } - - /** - * @param lossFunction Loss function for the output layer - */ - public Builder(ILossFunction lossFunction) { - this.setLossFn(lossFunction); - //Set default activation function to softmax (for consistent behaviour with no-arg constructor) - this.setActivationFn(new ActivationSoftmax()); - } - - @Override - @SuppressWarnings("unchecked") - public OutputLayer build() { - return new OutputLayer(this); - } - } + } } - diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java index 249339df9..d48d88708 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -34,138 +37,133 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class PReLULayer extends BaseLayerConfiguration { + /** + * Explicitly set input shape of incoming activations so that parameters can be initialized + * properly. This explicitly excludes the mini-batch dimension. + */ + @Builder.Default private long[] inputShape = null; + /** + * Set the broadcasting axes of PReLU's alpha parameter. + * + *

For instance, given input data of shape [mb, channels, height, width], setting axes to [2,3] + * will set alpha to shape [channels, 1, 1] and broadcast alpha across height and width dimensions + * of each channel. + */ + @Builder.Default private long[] sharedAxes = null; - private long[] inputShape = null; - private long[] sharedAxes = null; + private int nIn; + private int nOut; - private int nIn; - private int nOut; + public static PReLULayerBuilder builder() { + return innerBuilder() + .weightInit(new WeightInitConstant(0)); // Default to 0s, and don't inherit global default + } - private PReLULayer(Builder builder) { - super(builder); - this.inputShape = builder.inputShape; - this.sharedAxes = builder.sharedAxes; - initializeConstraints(builder); + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.feedforward.PReLU ret = + new org.deeplearning4j.nn.layers.feedforward.PReLU(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input type: null for layer name \"" + getName() + "\""); + } + return inputType; + } + + @Override + public void setNIn(InputType inputType, boolean override) { + // not needed + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + // None needed + return null; + } + + @Override + public boolean isPretrainParam(String paramName) { + return false; + } + + @Override + public ParamInitializer initializer() { + return PReLUParamInitializer.getInstance(inputShape, sharedAxes); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType outputType = getOutputType(-1, inputType); + + val numParams = initializer().numParams(this); + val updaterStateSize = (int) getIUpdater().stateSize(numParams); + + return new LayerMemoryReport.Builder(name, PReLULayer.class, inputType, outputType) + .standardMemory(numParams, updaterStateSize) + .workingMemory(0, 0, 0, 0) + .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) + .build(); + } + + public static abstract class PReLULayerBuilder< + C extends PReLULayer, B extends PReLULayerBuilder> + extends BaseLayerConfigurationBuilder { + public C build() { + C l = initBuild(); + l.initializeConstraints(); + return l; } - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - org.deeplearning4j.nn.layers.feedforward.PReLU ret = new org.deeplearning4j.nn.layers.feedforward.PReLU(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + /** + * Explicitly set input shape of incoming activations so that parameters can be initialized + * properly. This explicitly excludes the mini-batch dimension. + * + * @param shape shape of input data + */ + public B inputShape(long... shape) { + this.inputShape$value = shape; + this.inputShape$set = true; + return self(); } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input type: null for layer name \"" + getLayerName() + "\""); - } - return inputType; + /** + * Set the broadcasting axes of PReLU's alpha parameter. + * + *

For instance, given input data of shape [mb, channels, height, width], setting axes to + * [2,3] will set alpha to shape [channels, 1, 1] and broadcast alpha across height and width + * dimensions of each channel. + * + * @param axes shared/broadcasting axes + * @return Builder + */ + public B sharedAxes(long... axes) { + this.sharedAxes$value = axes; + this.sharedAxes$set = true; + return self(); } - - @Override - public void setNIn(InputType inputType, boolean override) { - // not needed - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - // None needed - return null; - } - - @Override - public boolean isPretrainParam(String paramName) { - return false; - } - - @Override - public ParamInitializer initializer() { - return PReLUParamInitializer.getInstance(inputShape, sharedAxes); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - InputType outputType = getOutputType(-1, inputType); - - val numParams = initializer().numParams(this); - val updaterStateSize = (int) getIUpdater().stateSize(numParams); - - return new LayerMemoryReport.Builder(layerName, PReLULayer.class, inputType, outputType) - .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, 0) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS).build(); - } - - @Getter - @Setter - public static class Builder extends FeedForwardLayer.Builder { - - public Builder(){ - //Default to 0s, and don't inherit global default - this.weightInit = new WeightInitConstant(0); - } - - /** - * Explicitly set input shape of incoming activations so that parameters can be initialized properly. This - * explicitly excludes the mini-batch dimension. - * - */ - private long[] inputShape = null; - - /** - * Set the broadcasting axes of PReLU's alpha parameter. - * - * For instance, given input data of shape [mb, channels, height, width], setting axes to [2,3] will set alpha - * to shape [channels, 1, 1] and broadcast alpha across height and width dimensions of each channel. - * - */ - private long[] sharedAxes = null; - - /** - * Explicitly set input shape of incoming activations so that parameters can be initialized properly. This - * explicitly excludes the mini-batch dimension. - * - * @param shape shape of input data - */ - public Builder inputShape(long... shape) { - this.setInputShape(shape); - return this; - } - - /** - * Set the broadcasting axes of PReLU's alpha parameter. - * - * For instance, given input data of shape [mb, channels, height, width], setting axes to [2,3] will set alpha - * to shape [channels, 1, 1] and broadcast alpha across height and width dimensions of each channel. - * - * @param axes shared/broadcasting axes - * @return Builder - */ - public Builder sharedAxes(long... axes) { - this.setSharedAxes(axes); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public PReLULayer build() { - return new PReLULayer(this); - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java index e06f825dc..3ad05de09 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java @@ -24,6 +24,7 @@ import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; +import lombok.experimental.SuperBuilder; /** * 1D Pooling (subsampling) layer. Equivalent to {@link Subsampling1DLayer} @@ -31,9 +32,9 @@ import lombok.ToString; * Supports the following pooling types: MAX, AVG, SUM, PNORM, NONE * @author Max Pumperla */ -@Data -@NoArgsConstructor + @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class Pooling1D extends Subsampling1DLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java index 87bdd59e0..ddc3f837d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java @@ -24,6 +24,7 @@ import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; +import lombok.experimental.SuperBuilder; /** * 2D Pooling (subsampling) layer. Equivalent to {@link SubsamplingLayer} @@ -31,9 +32,9 @@ import lombok.ToString; * Supports the following pooling types: MAX, AVG, SUM, PNORM, NONE * @author Max Pumperla */ -@Data -@NoArgsConstructor + @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class Pooling2D extends SubsamplingLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java index fc0c256f7..ce8101738 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeConvolutional; @@ -37,388 +39,386 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig; import org.nd4j.linalg.factory.Nd4j; -import java.util.Map; - @Data -@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class PrimaryCapsules extends SameDiffLayer { - private int[] kernelSize; - private int[] stride; - private int[] padding; - private int[] dilation; - private int inputChannels; - private int channels; + private static final String WEIGHT_PARAM = "weight"; + private static final String BIAS_PARAM = "bias"; + /** + * Sets the kernel size of the 2d convolution + * + * @param kernelSize + * @return + */ + @Builder.Default private int[] kernelSize = new int[] {9, 9}; + /** + * Sets the stride of the 2d convolution + * + * @param stride + * @return + */ + @Builder.Default private int[] stride = new int[] {2, 2}; + /** + * Sets the padding of the 2d convolution + * + * @param padding + * @return + */ + @Builder.Default private int[] padding = new int[] {0, 0}; + /** + * Sets the dilation of the 2d convolution + * + * @param dilation + * @return + */ + @Builder.Default private int[] dilation = new int[] {1, 1}; - private boolean hasBias; + private int inputChannels; + /** + * Sets the number of channels to use in the 2d convolution. + * + *

Note that the actual number of channels is channels * capsuleDimensions + * + *

Does the same thing as nOut() + * + * @param channels + * @return + */ + @Builder.Default private int channels = 32; - private int capsules; - private int capsuleDimensions; + @Builder.Default private boolean hasBias = true; + /** + * Usually inferred automatically. + * + * @param capsules + * @return + */ + private int capsules; + /** + * Sets the number of dimensions to use in the capsules. + * + * @param capsuleDimensions + * @return + */ + private int capsuleDimensions; + /** + * The convolution mode to use in the 2d convolution + * + * @param convolutionMode + * @return + */ + @Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + /** + * Whether to use a ReLU activation on the 2d convolution + * + * @param useRelu + * @return + */ + @Builder.Default private boolean useRelU = false; + /** + * Use a LeakyReLU activation on the 2d convolution + * + * @param leak the alpha value for the LeakyReLU activation. + * @return + */ + @Builder.Default private double useLeakyReLU = 0; - private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + public static PrimaryCapsulesBuilder builder() { + return innerBuilder(); + } - private boolean useRelu = false; - private double leak = 0; + public static PrimaryCapsulesBuilder builder( + int capsuleDimensions, + int channels, + int[] kernelSize, + int[] stride, + int[] padding, + int[] dilation, + ConvolutionMode convolutionMode) { + return innerBuilder() + .capsuleDimensions(capsuleDimensions) + .channels(channels) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + .dilation(dilation) + .convolutionMode(convolutionMode); + } - private static final String WEIGHT_PARAM = "weight"; - private static final String BIAS_PARAM = "bias"; + public static PrimaryCapsulesBuilder builder( + int capsuleDimensions, + int channels, + int[] kernelSize, + int[] stride, + int[] padding, + int[] dilation) { + return innerBuilder() + .capsuleDimensions(capsuleDimensions) + .channels(channels) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + .dilation(dilation); + } - public PrimaryCapsules(Builder builder){ - super(builder); + public static PrimaryCapsulesBuilder builder( + int capsuleDimensions, int channels, int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .capsuleDimensions(capsuleDimensions) + .channels(channels) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding); + } - this.kernelSize = builder.kernelSize; - this.stride = builder.stride; - this.padding = builder.padding; - this.dilation = builder.dilation; - this.channels = builder.channels; - this.hasBias = builder.hasBias; - this.capsules = builder.capsules; - this.capsuleDimensions = builder.capsuleDimensions; - this.convolutionMode = builder.convolutionMode; - this.useRelu = builder.useRelu; - this.leak = builder.leak; + public static PrimaryCapsulesBuilder builder( + int capsuleDimensions, int channels, int[] kernelSize, int[] stride) { + return innerBuilder() + .capsuleDimensions(capsuleDimensions) + .channels(channels) + .kernelSize(kernelSize) + .stride(stride); + } - if(capsuleDimensions <= 0 || channels <= 0){ - throw new IllegalArgumentException("Invalid configuration for Primary Capsules (layer name = \"" - + layerName + "\"):" - + " capsuleDimensions and channels must be > 0. Got: " - + capsuleDimensions + ", " + channels); - } + public static PrimaryCapsulesBuilder builder( + int capsuleDimensions, int channels, int[] kernelSize) { + return innerBuilder() + .capsuleDimensions(capsuleDimensions) + .channels(channels) + .kernelSize(kernelSize); + } - if(capsules < 0){ - throw new IllegalArgumentException("Invalid configuration for Capsule ILayer (layer name = \"" - + layerName + "\"):" - + " capsules must be >= 0 if set. Got: " - + capsules); - } + public static PrimaryCapsulesBuilder builder(int capsuleDimensions, int channels) { + return innerBuilder().capsuleDimensions(capsuleDimensions).channels(channels); + } + @Override + public SDVariable defineLayer( + SameDiff SD, SDVariable input, Map paramTable, SDVariable mask) { + Conv2DConfig conf = + Conv2DConfig.builder() + .kH(kernelSize[0]) + .kW(kernelSize[1]) + .sH(stride[0]) + .sW(stride[1]) + .pH(padding[0]) + .pW(padding[1]) + .dH(dilation[0]) + .dW(dilation[1]) + .isSameMode(convolutionMode == ConvolutionMode.Same) + .build(); + + SDVariable conved; + + if (hasBias) { + conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), paramTable.get(BIAS_PARAM), conf); + } else { + conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), conf); } - @Override - public SDVariable defineLayer(SameDiff SD, SDVariable input, Map paramTable, SDVariable mask) { - Conv2DConfig conf = Conv2DConfig.builder() - .kH(kernelSize[0]).kW(kernelSize[1]) - .sH(stride[0]).sW(stride[1]) - .pH(padding[0]).pW(padding[1]) - .dH(dilation[0]).dW(dilation[1]) - .isSameMode(convolutionMode == ConvolutionMode.Same) - .build(); - - SDVariable conved; - - if(hasBias){ - conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), paramTable.get(BIAS_PARAM), conf); - } else { - conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), conf); - } - - if(useRelu){ - if(leak == 0) { - conved = SD.nn.relu(conved, 0); - } else { - conved = SD.nn.leakyRelu(conved, leak); - } - } - - SDVariable reshaped = conved.reshape(-1, capsules, capsuleDimensions); - return CapsuleUtils.squash(SD, reshaped, 2); + if (useRelU) { + if (useLeakyReLU == 0) { + conved = SD.nn.relu(conved, 0); + } else { + conved = SD.nn.leakyRelu(conved, useLeakyReLU); + } } - @Override - public void defineParameters(SDLayerParams params) { - params.clear(); - params.addWeightParam(WEIGHT_PARAM, - kernelSize[0], kernelSize[1], inputChannels, (long) capsuleDimensions * channels); + SDVariable reshaped = conved.reshape(-1, capsules, capsuleDimensions); + return CapsuleUtils.squash(SD, reshaped, 2); + } - if(hasBias){ - params.addBiasParam(BIAS_PARAM, (long) capsuleDimensions * channels); + @Override + public void defineParameters(SDLayerParams params) { + params.clear(); + params.addWeightParam( + WEIGHT_PARAM, + kernelSize[0], + kernelSize[1], + inputChannels, + (long) capsuleDimensions * channels); + + if (hasBias) { + params.addBiasParam(BIAS_PARAM, (long) capsuleDimensions * channels); + } + } + + @Override + public void initializeParameters(Map params) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + for (Map.Entry e : params.entrySet()) { + if (BIAS_PARAM.equals(e.getKey())) { + e.getValue().assign(0); + } else if (WEIGHT_PARAM.equals(e.getKey())) { + double fanIn = inputChannels * kernelSize[0] * kernelSize[1]; + double fanOut = + capsuleDimensions + * channels + * kernelSize[0] + * kernelSize[1] + / ((double) stride[0] * stride[1]); + WeightInitUtil.initWeights( + fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); } + } + } + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != Type.CNN) { + throw new IllegalStateException( + "Invalid input for Primary Capsules layer (layer name = \"" + + name + + "\"): expect CNN input. Got: " + + inputType); } - @Override - public void initializeParameters(Map params) { - try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - for (Map.Entry e : params.entrySet()) { - if (BIAS_PARAM.equals(e.getKey())) { - e.getValue().assign(0); - } else if(WEIGHT_PARAM.equals(e.getKey())){ - double fanIn = inputChannels * kernelSize[0] * kernelSize[1]; - double fanOut = capsuleDimensions * channels * kernelSize[0] * kernelSize[1] / ((double) stride[0] * stride[1]); - WeightInitUtil.initWeights(fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', - e.getValue()); - } - } - } + if (capsules > 0) { + return InputType.recurrent(capsules, capsuleDimensions); + } else { + + InputTypeConvolutional out = + (InputTypeConvolutional) + InputTypeUtil.getOutputTypeCnnLayers( + inputType, + kernelSize, + stride, + padding, + dilation, + convolutionMode, + (long) capsuleDimensions * channels, + -1, + getName(), + PrimaryCapsules.class); + + return InputType.recurrent( + (int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions), + capsuleDimensions); + } + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != Type.CNN) { + throw new IllegalStateException( + "Invalid input for Primary Capsules layer (layer name = \"" + + name + + "\"): expect CNN input. Got: " + + inputType); } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != Type.CNN) { - throw new IllegalStateException("Invalid input for Primary Capsules layer (layer name = \"" - + layerName + "\"): expect CNN input. Got: " + inputType); - } + InputTypeConvolutional ci = (InputTypeConvolutional) inputType; - if(capsules > 0){ - return InputType.recurrent(capsules, capsuleDimensions); - } else { + this.inputChannels = (int) ci.getChannels(); - InputTypeConvolutional out = (InputTypeConvolutional) InputTypeUtil - .getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode, - (long) capsuleDimensions * channels, -1, getLayerName(), PrimaryCapsules.class); + if (capsules <= 0 || override) { - return InputType.recurrent((int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions), - capsuleDimensions); - } + InputTypeConvolutional out = + (InputTypeConvolutional) + InputTypeUtil.getOutputTypeCnnLayers( + inputType, + kernelSize, + stride, + padding, + dilation, + convolutionMode, + (long) capsuleDimensions * channels, + -1, + getName(), + PrimaryCapsules.class); + + this.capsules = + (int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions); + } + } + + public static abstract class PrimaryCapsulesBuilder< + C extends PrimaryCapsules, B extends PrimaryCapsulesBuilder> + extends SameDiffLayerBuilder { + + public B kernelSize(int... kernelSize) { + this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize"); + this.kernelSize$set = true; + return self(); } - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != Type.CNN) { - throw new IllegalStateException("Invalid input for Primary Capsules layer (layer name = \"" - + layerName + "\"): expect CNN input. Got: " + inputType); - } - - InputTypeConvolutional ci = (InputTypeConvolutional) inputType; - - this.inputChannels = (int) ci.getChannels(); - - if(capsules <= 0 || override) { - - InputTypeConvolutional out = (InputTypeConvolutional) InputTypeUtil - .getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode, - (long) capsuleDimensions * channels, -1, getLayerName(), PrimaryCapsules.class); - - this.capsules = (int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions); - } + public B stride(int... stride) { + this.stride$value = ValidationUtils.validate2NonNegative(stride, true, "stride"); + this.stride$set = true; + return self(); } - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder{ - - @Setter(AccessLevel.NONE) - private int[] kernelSize = new int[]{9, 9}; - - @Setter(AccessLevel.NONE) - private int[] stride = new int[]{2, 2}; - - @Setter(AccessLevel.NONE) - private int[] padding = new int[]{0, 0}; - - @Setter(AccessLevel.NONE) - private int[] dilation = new int[]{1, 1}; - - private int channels = 32; - - private boolean hasBias = true; - - private int capsules; - private int capsuleDimensions; - - private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; - - private boolean useRelu = false; - private double leak = 0; - - - public void setKernelSize(int... kernelSize){ - this.kernelSize = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize"); - } - - public void setStride(int... stride){ - this.stride = ValidationUtils.validate2NonNegative(stride, true, "stride"); - } - - public void setPadding(int... padding){ - this.padding = ValidationUtils.validate2NonNegative(padding, true, "padding"); - } - - public void setDilation(int... dilation){ - this.dilation = ValidationUtils.validate2NonNegative(dilation, true, "dilation"); - } - - - public Builder(int capsuleDimensions, int channels, - int[] kernelSize, int[] stride, int[] padding, int[] dilation, - ConvolutionMode convolutionMode){ - this.capsuleDimensions = capsuleDimensions; - this.channels = channels; - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - this.setDilation(dilation); - this.convolutionMode = convolutionMode; - } - - public Builder(int capsuleDimensions, int channels, - int[] kernelSize, int[] stride, int[] padding, int[] dilation){ - this(capsuleDimensions, channels, kernelSize, stride, padding, dilation, ConvolutionMode.Truncate); - } - - public Builder(int capsuleDimensions, int channels, - int[] kernelSize, int[] stride, int[] padding){ - this(capsuleDimensions, channels, kernelSize, stride, padding, new int[]{1, 1}, ConvolutionMode.Truncate); - } - - public Builder(int capsuleDimensions, int channels, - int[] kernelSize, int[] stride){ - this(capsuleDimensions, channels, kernelSize, stride, new int[]{0, 0}, new int[]{1, 1}, ConvolutionMode.Truncate); - } - - public Builder(int capsuleDimensions, int channels, - int[] kernelSize){ - this(capsuleDimensions, channels, kernelSize, new int[]{2, 2}, new int[]{0, 0}, new int[]{1, 1}, ConvolutionMode.Truncate); - } - - public Builder(int capsuleDimensions, int channels){ - this(capsuleDimensions, channels, new int[]{9, 9}, new int[]{2, 2}, new int[]{0, 0}, new int[]{1, 1}, ConvolutionMode.Truncate); - } - - /** - * Sets the kernel size of the 2d convolution - * - * @see ConvolutionLayer.Builder#kernelSize(int...) - * @param kernelSize - * @return - */ - public Builder kernelSize(int... kernelSize){ - this.setKernelSize(kernelSize); - return this; - } - - /** - * Sets the stride of the 2d convolution - * - * @see ConvolutionLayer.Builder#stride(int...) - * @param stride - * @return - */ - public Builder stride(int... stride){ - this.setStride(stride); - return this; - } - - /** - * Sets the padding of the 2d convolution - * - * @see ConvolutionLayer.Builder#padding(int...) - * @param padding - * @return - */ - public Builder padding(int... padding){ - this.setPadding(padding); - return this; - } - - /** - * Sets the dilation of the 2d convolution - * - * @see ConvolutionLayer.Builder#dilation(int...) - * @param dilation - * @return - */ - public Builder dilation(int... dilation){ - this.setDilation(dilation); - return this; - } - - /** - * Sets the number of channels to use in the 2d convolution. - * - * Note that the actual number of channels is channels * capsuleDimensions - * - * Does the same thing as nOut() - * - * @param channels - * @return - */ - public Builder channels(int channels){ - this.channels = channels; - return this; - } - - /** - * Sets the number of channels to use in the 2d convolution. - * - * Note that the actual number of channels is channels * capsuleDimensions - * - * Does the same thing as channels() - * - * @param nOut - * @return - */ - public Builder nOut(int nOut){ - return channels(nOut); - } - - /** - * Sets the number of dimensions to use in the capsules. - * @param capsuleDimensions - * @return - */ - public Builder capsuleDimensions(int capsuleDimensions){ - this.capsuleDimensions = capsuleDimensions; - return this; - } - - /** - * Usually inferred automatically. - * @param capsules - * @return - */ - public Builder capsules(int capsules){ - this.capsules = capsules; - return this; - } - - public Builder hasBias(boolean hasBias){ - this.hasBias = hasBias; - return this; - } - - /** - * The convolution mode to use in the 2d convolution - * @param convolutionMode - * @return - */ - public Builder convolutionMode(ConvolutionMode convolutionMode){ - this.convolutionMode = convolutionMode; - return this; - } - - /** - * Whether to use a ReLU activation on the 2d convolution - * @param useRelu - * @return - */ - public Builder useReLU(boolean useRelu){ - this.useRelu = useRelu; - return this; - } - - /** - * Use a ReLU activation on the 2d convolution - * @return - */ - public Builder useReLU(){ - return useReLU(true); - } - - /** - * Use a LeakyReLU activation on the 2d convolution - * @param leak the alpha value for the LeakyReLU activation. - * @return - */ - public Builder useLeakyReLU(double leak){ - this.useRelu = true; - this.leak = leak; - return this; - } - - @Override - public E build() { - return (E) new PrimaryCapsules(this); - } + public B padding(int... padding) { + this.padding$value = ValidationUtils.validate2NonNegative(padding, true, "padding"); + this.padding$set = true; + return self(); } + + public B dilation(int... dilation) { + this.dilation$value = ValidationUtils.validate2NonNegative(dilation, true, "dilation"); + this.dilation$set = true; + return self(); + } + /** + * Sets the number of channels to use in the 2d convolution. + * + *

Note that the actual number of channels is channels * capsuleDimensions + * + *

Does the same thing as channels() + * + * @param nOut + * @return + */ + public B nOut(int nOut) { + return channels(nOut); + } + /** + * Use a ReLU activation on the 2d convolution + * + * @return + */ + public B useReLU() { + return useRelU(true); + } + + /** + * Use a LeakyReLU activation on the 2d convolution. Implies {@link #useReLU()} set true. + * + * @param leak the alpha value for the LeakyReLU activation. + * @return + */ + public B useLeakyReLU(double leak) { + this.useRelU(true); + this.useLeakyReLU$value = leak; + this.useLeakyReLU$set = true; + return self(); + } + + public C build() { + C l = initBuild(); + if (capsuleDimensions <= 0 || channels$value <= 0) { + throw new IllegalArgumentException( + "Invalid configuration for Primary Capsules (layer name = \"" + + l.getName() + + "\"):" + + " capsuleDimensions and channels must be > 0. Got: " + + capsuleDimensions + + ", " + + channels$value); + } + + if (capsules < 0) { + throw new IllegalArgumentException( + "Invalid configuration for Capsule ILayer (layer name = \"" + + l.getName() + + "\"):" + + " capsules must be >= 0 if set. Got: " + + capsules); + } + return l; + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java index a1bbd9f83..73f71ff83 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; @@ -42,14 +43,61 @@ import java.util.Map; @Data @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class RecurrentAttentionLayer extends SameDiffLayer { - private long nIn; - private long nOut; + + public static abstract class RecurrentAttentionLayerBuilder> + extends SameDiffLayerBuilder { + + public C build() { + Preconditions.checkArgument(this.projectInput$value || this.nHeads == 1, "projectInput must be true when nHeads != 1"); + Preconditions.checkArgument(this.projectInput$value || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); + Preconditions.checkArgument(!this.projectInput$value || nOut != 0, "nOut must be specified when projectInput is true"); + Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); + + C l = initBuild(); + return l; + } + } + + /** + * Number of inputs to the layer (input size) + */ + private int nIn; + + /** + * Number of outputs (output size) + */ + private int nOut; + + /** + * Number of Attention Heads + */ private int nHeads; - private long headSize; - private boolean projectInput; - private Activation activation; - private boolean hasBias; + + /** + * Size of attention heads + */ + private int headSize; + + /** + * Project input before applying attention or not. + */ + @Builder.Default + private boolean projectInput = true; + + /** + * If true (default is true) the layer will have a bias + */ + @Builder.Default + private boolean hasBias = true; + + /** + * Activation function for the layer + */ + @Builder.Default + private Activation activation = Activation.TANH; + private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; @@ -60,34 +108,23 @@ public class RecurrentAttentionLayer extends SameDiffLayer { private static final String RECURRENT_WEIGHT_KEY = SimpleRnnParamInitializer.RECURRENT_WEIGHT_KEY; private int timeSteps; - private RecurrentAttentionLayer(){/*No arg constructor for serialization*/} - protected RecurrentAttentionLayer(Builder builder){ - super(builder); - nIn = builder.nIn; - nOut = builder.nOut; - nHeads = builder.nHeads; - headSize = builder.headSize == 0 ? nOut / nHeads : builder.headSize; - projectInput = builder.projectInput; - activation = builder.activation; - hasBias = builder.hasBias; - } @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getLayerName()); + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); } @Override public void setNIn(InputType inputType, boolean override) { if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for Recurrent Attention layer (layer name = \"" + getLayerName() + throw new IllegalStateException("Invalid input for Recurrent Attention layer (layer name = \"" + getName() + "\"): expect RNN input type with size > 0. Got: " + inputType); } if (nIn <= 0 || override) { InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; - this.nIn = r.getSize(); + this.nIn = (int) r.getSize(); } } @@ -95,7 +132,7 @@ public class RecurrentAttentionLayer extends SameDiffLayer { public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.RNN) { throw new IllegalStateException("Invalid input for Recurrent Attention layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect RNN input type with size > 0. Got: " + + ", layer name = \"" + getName() + "\"): expect RNN input type with size > 0. Got: " + inputType); } @@ -189,9 +226,9 @@ public class RecurrentAttentionLayer extends SameDiffLayer { val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); - attn = sameDiff.nn.multiHeadDotProductAttention(getLayerName()+"_attention_"+i, prev, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true); + attn = sameDiff.nn.multiHeadDotProductAttention(getName()+"_attention_"+i, prev, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true); }else{ - attn = sameDiff.nn.dotProductAttention(getLayerName()+"_attention_"+i, prev, layerInput, layerInput, mask, true); + attn = sameDiff.nn.dotProductAttention(getName()+"_attention_"+i, prev, layerInput, layerInput, mask, true); } attn = sameDiff.squeeze(attn, 2); @@ -206,109 +243,5 @@ public class RecurrentAttentionLayer extends SameDiffLayer { return sameDiff.concat(2, outputSlices); } - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder { - /** - * Number of inputs to the layer (input size) - */ - private int nIn; - - /** - * Number of outputs (output size) - */ - private int nOut; - - /** - * Number of Attention Heads - */ - private int nHeads; - - /** - * Size of attention heads - */ - private int headSize; - - /** - * Project input before applying attention or not. - */ - private boolean projectInput = true; - - /** - * If true (default is true) the layer will have a bias - */ - private boolean hasBias = true; - - /** - * Activation function for the layer - */ - private Activation activation = Activation.TANH; - - /** - * @param nIn Number of inputs to the layer (input size) - */ - public Builder nIn(int nIn) { - this.nIn = nIn; - return this; - } - - /** - * @param nOut Number of outputs (output size) - */ - public Builder nOut(int nOut) { - this.nOut = nOut; - return this; - } - - /** - * Number of Attention Heads - */ - public Builder nHeads(int nHeads){ - this.nHeads = nHeads; - return this; - } - - /** - * Size of attention heads - */ - public Builder headSize(int headSize){ - this.headSize = headSize; - return this; - } - - /** - * Project input before applying attention or not. - */ - public Builder projectInput(boolean projectInput){ - this.projectInput = projectInput; - return this; - } - - /** - * @param hasBias If true (default is true) the layer will have a bias - */ - public Builder hasBias(boolean hasBias) { - this.hasBias = hasBias; - return this; - } - - /** - * @param activation Activation function for the layer - */ - public Builder activation(Activation activation) { - this.activation = activation; - return this; - } - - @Override - @SuppressWarnings("unchecked") - public RecurrentAttentionLayer build() { - Preconditions.checkArgument(this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); - Preconditions.checkArgument(this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); - Preconditions.checkArgument(!this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); - Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); - return new RecurrentAttentionLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java index e7db009ed..860a43a9d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java @@ -20,10 +20,10 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; -import lombok.ToString; +import java.util.Collection; +import java.util.Map; +import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -39,133 +39,109 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.util.Collection; -import java.util.Map; - @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class RnnLossLayer extends FeedForwardLayer { - private RNNFormat rnnDataFormat = RNNFormat.NCW; - protected ILossFunction lossFn; + /** + * @param lossFunction Loss function for the loss layer + */ + protected ILossFunction lossFunction; + /** + * @param rnnDataFormat Data format expected by the layer. NCW = [miniBatchSize, size, + * timeSeriesLength], NWC = [miniBatchSize, timeSeriesLength, size]. Defaults to NCW. + */ + @lombok.Builder.Default private RNNFormat dataFormat = RNNFormat.NCW; - private RnnLossLayer(Builder builder) { - super(builder); - this.setLossFn(builder.lossFn); - this.rnnDataFormat = builder.rnnDataFormat; + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + lconf.setNetConfiguration(conf); + runInheritance(); + + org.deeplearning4j.nn.layers.recurrent.RnnLossLayer ret = + new org.deeplearning4j.nn.layers.recurrent.RnnLossLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input type for RnnLossLayer (layer index = " + + layerIndex + + ", layer name=\"" + + getName() + + "\"): Expected RNN input, got " + + inputType); + } + return inputType; + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + // During inference and training: dup the input array. But, this counts as *activations* not + // working memory + return new LayerMemoryReport.Builder(name, LossLayer.class, inputType, inputType) + .standardMemory(0, 0) // No params + .workingMemory(0, 0, 0, 0) + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + // No op + } + + public static abstract class RnnLossLayerBuilder< + C extends RnnLossLayer, B extends RnnLossLayerBuilder> + extends FeedForwardLayerBuilder { + @Override + public B nIn(long nIn) { + throw new UnsupportedOperationException( + "Ths layer has no parameters, thus nIn will always equal nOut."); } @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - lconf.setNetConfiguration(conf); - runInheritance(); - - org.deeplearning4j.nn.layers.recurrent.RnnLossLayer ret = - new org.deeplearning4j.nn.layers.recurrent.RnnLossLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + public B nOut(long nOut) { + throw new UnsupportedOperationException( + "Ths layer has no parameters, thus nIn will always equal nOut."); } - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); + public B lossFunction(ILossFunction lossFunction) { + this.lossFunction = lossFunction; + return self(); } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input type for RnnLossLayer (layer index = " + layerIndex - + ", layer name=\"" + getLayerName() + "\"): Expected RNN input, got " + inputType); - } - return inputType; - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getLayerName()); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - //During inference and training: dup the input array. But, this counts as *activations* not working memory - return new LayerMemoryReport.Builder(layerName, LossLayer.class, inputType, inputType).standardMemory(0, 0) //No params - .workingMemory(0, 0, 0, 0) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - //No op - } - - - public static class Builder extends BaseOutputLayer.Builder { - - private RNNFormat rnnDataFormat = RNNFormat.NCW; - - public Builder() { - } - - /** - * @param lossFunction Loss function for the loss layer - */ - public Builder(LossFunctions.LossFunction lossFunction) { - lossFunction(lossFunction); - } - - /** - * @param lossFunction Loss function for the loss layer - */ - public Builder(ILossFunction lossFunction) { - this.setLossFn(lossFunction); - } - - @Override - @SuppressWarnings("unchecked") - public Builder nIn(int nIn) { - throw new UnsupportedOperationException("Ths layer has no parameters, thus nIn will always equal nOut."); - } - - @Override - @SuppressWarnings("unchecked") - public Builder nOut(int nOut) { - throw new UnsupportedOperationException("Ths layer has no parameters, thus nIn will always equal nOut."); - } - - @Override - public void setNIn(long nIn){ - throw new UnsupportedOperationException( - "This layer has no parameters, thus nIn will always equal nOut."); - } - - @Override - public void setNOut(long nOut){ - throw new UnsupportedOperationException( - "This layer has no parameters, thus nIn will always equal nOut."); - } - - /** - * @param rnnDataFormat Data format expected by the layer. NCW = [miniBatchSize, size, timeSeriesLength], - * NWC = [miniBatchSize, timeSeriesLength, size]. Defaults to NCW. - */ - public Builder dataFormat(RNNFormat rnnDataFormat){ - this.rnnDataFormat = rnnDataFormat; - return this; - } - @Override - @SuppressWarnings("unchecked") - public RnnLossLayer build() { - return new RnnLossLayer(this); - } + public B lossFunction(LossFunctions.LossFunction lossFunction) { + this.lossFunction = lossFunction.getILossFunction(); + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java index 5b59c5399..9d774293c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java @@ -20,10 +20,13 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -32,33 +35,38 @@ import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.impl.ActivationSoftmax; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction; - -import java.util.Collection; -import java.util.Map; +import org.nd4j.linalg.lossfunctions.LossFunctions; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class RnnOutputLayer extends BaseOutputLayer { - private RNNFormat rnnDataFormat; + /** + * @param rnnDataFormat Data format expected by the layer. NCW = [miniBatchSize, size, timeSeriesLength], + * NWC = [miniBatchSize, timeSeriesLength, size]. Defaults to NCW. + */ + private RNNFormat dataFormat; - private RnnOutputLayer(Builder builder) { - super(builder); - initializeConstraints(builder); - this.rnnDataFormat = builder.rnnDataFormat; + public static RnnOutputLayerBuilder builder() { + return innerBuilder(); + } + + /** + * @param lossFn Loss function for the output layer + */ + public static RnnOutputLayerBuilder builder(LossFunctions.LossFunction lossFn) { + return innerBuilder() + .lossFunction(lossFn); } @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("RnnOutputLayer", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerValidation.assertNInNOutSet("RnnOutputLayer", getName(), layerIndex, getNIn(), getNOut()); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer ret = @@ -81,7 +89,7 @@ public class RnnOutputLayer extends BaseOutputLayer { public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.RNN) { throw new IllegalStateException("Invalid input type for RnnOutputLayer (layer index = " + layerIndex - + ", layer name=\"" + getLayerName() + "\"): Expected RNN input, got " + inputType); + + ", layer name=\"" + getName() + "\"): Expected RNN input, got " + inputType); } InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType; @@ -91,13 +99,13 @@ public class RnnOutputLayer extends BaseOutputLayer { @Override public void setNIn(InputType inputType, boolean override) { if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input type for RnnOutputLayer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input type for RnnOutputLayer (layer name=\"" + getName() + "\"): Expected RNN input, got " + inputType); } InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; - if(rnnDataFormat == null || override) { - this.rnnDataFormat = r.getFormat(); + if(dataFormat == null || override) { + this.dataFormat = r.getFormat(); } if (nIn <= 0 || override) { @@ -107,49 +115,15 @@ public class RnnOutputLayer extends BaseOutputLayer { @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat, getLayerName()); + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, dataFormat, getName()); } - - public static class Builder extends BaseOutputLayer.Builder { - - private RNNFormat rnnDataFormat; - public Builder() { - //Set default activation function to softmax (to match default loss function MCXENT) - this.setActivationFn(new ActivationSoftmax()); - } - - /** - * @param lossFunction Loss function for the output layer - */ - public Builder(LossFunction lossFunction) { - lossFunction(lossFunction); - //Set default activation function to softmax (for consistent behaviour with no-arg constructor) - this.setActivationFn(new ActivationSoftmax()); - } - - /** - * @param lossFunction Loss function for the output layer - */ - public Builder(ILossFunction lossFunction) { - this.setLossFn(lossFunction); - //Set default activation function to softmax (for consistent behaviour with no-arg constructor) - this.setActivationFn(new ActivationSoftmax()); - } - - @Override - @SuppressWarnings("unchecked") - public RnnOutputLayer build() { - return new RnnOutputLayer(this); - } - - /** - * @param rnnDataFormat Data format expected by the layer. NCW = [miniBatchSize, size, timeSeriesLength], - * NWC = [miniBatchSize, timeSeriesLength, size]. Defaults to NCW. - */ - public Builder dataFormat(RNNFormat rnnDataFormat){ - this.rnnDataFormat = rnnDataFormat; - return this; + public static abstract class RnnOutputLayerBuilder> extends BaseOutputLayerBuilder { + public C build() { + C l = this.initBuild(); + l.initializeConstraints(); + return l; } } + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java index 3bf4453c5..40153688e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -34,186 +36,129 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import java.util.Map; - @Data @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class SelfAttentionLayer extends SameDiffLayer { - private long nIn; - private long nOut; - private int nHeads; - private long headSize; - private boolean projectInput; + private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; + private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; + private static final String WEIGHT_KEY_VALUE_PROJECTION = "Wv"; + private static final String WEIGHT_KEY_OUT_PROJECTION = "Wo"; + /** Number of inputs to the layer (input size) */ + private int nIn; + /** Number of outputs (output size) */ + private int nOut; + /** Number of Attention Heads */ + private int nHeads; + /** Size of attention heads */ + private int headSize; + /** Project input before applying attention or not. */ + private boolean projectInput; - private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; - private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; - private static final String WEIGHT_KEY_VALUE_PROJECTION = "Wv"; - private static final String WEIGHT_KEY_OUT_PROJECTION = "Wo"; + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); + } - private SelfAttentionLayer(){/*No arg constructor for serialization*/} - - protected SelfAttentionLayer(Builder builder){ - super(builder); - nIn = builder.nIn; - nOut = builder.nOut; - nHeads = builder.nHeads; - headSize = builder.headSize == 0 ? nOut / nHeads : builder.headSize; - projectInput = builder.projectInput; + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for Self Attention layer (layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW,getLayerName()); + if (nIn <= 0 || override) { + InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; + this.nIn = (int) r.getSize(); + } + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for Self Attention layer (layer index = " + + layerIndex + + ", layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); } - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for Self Attention layer (layer name = \"" + getLayerName() - + "\"): expect RNN input type with size > 0. Got: " + inputType); - } + InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType; - if (nIn <= 0 || override) { - InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; - this.nIn = r.getSize(); - } + if (projectInput) { + return InputType.recurrent(nOut, itr.getTimeSeriesLength()); + } else { + return InputType.recurrent(nIn, itr.getTimeSeriesLength()); } + } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for Self Attention layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect RNN input type with size > 0. Got: " - + inputType); - } + @Override + public void defineParameters(SDLayerParams params) { + params.clear(); - InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType; - - if(projectInput){ - return InputType.recurrent(nOut, itr.getTimeSeriesLength()); - }else{ - return InputType.recurrent(nIn, itr.getTimeSeriesLength()); - } + if (projectInput) { + params.addWeightParam(WEIGHT_KEY_QUERY_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_KEY_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_VALUE_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_OUT_PROJECTION, nHeads * headSize, nOut); } + } - @Override - public void defineParameters(SDLayerParams params) { - params.clear(); - - if(projectInput){ - params.addWeightParam(WEIGHT_KEY_QUERY_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_KEY_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_VALUE_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_OUT_PROJECTION, nHeads * headSize, nOut); + @Override + public void initializeParameters(Map params) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + for (Map.Entry e : params.entrySet()) { + if (e.getKey().equals(WEIGHT_KEY_OUT_PROJECTION)) { + WeightInitUtil.initWeights( + nIn, headSize, e.getValue().shape(), weightInit, null, 'c', e.getValue()); + } else { + WeightInitUtil.initWeights( + nHeads * headSize, nOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); } + } } + } - @Override - public void initializeParameters(Map params) { - try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - for (Map.Entry e : params.entrySet()) { - if(e.getKey().equals(WEIGHT_KEY_OUT_PROJECTION)){ - WeightInitUtil.initWeights(nIn, headSize, e.getValue().shape(), weightInit, null, 'c', e.getValue()); - }else{ - WeightInitUtil.initWeights(nHeads * headSize, nOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); - } - } - } + @Override + public SDVariable defineLayer( + SameDiff sameDiff, + SDVariable layerInput, + Map paramTable, + SDVariable mask) { + if (projectInput) { + val Wq = paramTable.get(WEIGHT_KEY_QUERY_PROJECTION); + val Wk = paramTable.get(WEIGHT_KEY_KEY_PROJECTION); + val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); + val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); + + return sameDiff.nn.multiHeadDotProductAttention( + getName(), layerInput, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true); + } else { + return sameDiff.nn.dotProductAttention( + getName(), layerInput, layerInput, layerInput, mask, true); } + } + public static abstract class SelfAttentionLayerBuilder< + C extends SelfAttentionLayer, B extends SelfAttentionLayerBuilder> + extends SameDiffLayerBuilder { + public C build() { + Preconditions.checkArgument( + this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); + Preconditions.checkArgument( + this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); + Preconditions.checkArgument( + !this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); + Preconditions.checkArgument( + this.nOut % nHeads == 0 || headSize > 0, + "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); - @Override - public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable, SDVariable mask) { - if(projectInput){ - val Wq = paramTable.get(WEIGHT_KEY_QUERY_PROJECTION); - val Wk = paramTable.get(WEIGHT_KEY_KEY_PROJECTION); - val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); - val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); - - return sameDiff.nn.multiHeadDotProductAttention(getLayerName(), layerInput, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true); - }else{ - return sameDiff.nn.dotProductAttention(getLayerName(), layerInput, layerInput, layerInput, mask, true); - } - } - - - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder { - - /** - * Number of inputs to the layer (input size) - */ - private int nIn; - - /** - * Number of outputs (output size) - */ - private int nOut; - - /** - * Number of Attention Heads - */ - private int nHeads; - - /** - * Size of attention heads - */ - private int headSize; - - /** - * Project input before applying attention or not. - */ - private boolean projectInput; - - /** - * @param nIn Number of inputs to the layer (input size) - */ - public Builder nIn(int nIn) { - this.nIn = nIn; - return this; - } - - /** - * @param nOut Number of outputs (output size) - */ - public Builder nOut(int nOut) { - this.nOut = nOut; - return this; - } - - /** - * Number of Attention Heads - */ - public Builder nHeads(int nHeads){ - this.nHeads = nHeads; - return this; - } - - /** - * Size of attention heads - */ - public Builder headSize(int headSize){ - this.headSize = headSize; - return this; - } - - /** - * Project input before applying attention or not. - */ - public Builder projectInput(boolean projectInput){ - this.projectInput = projectInput; - return this; - } - - @Override - @SuppressWarnings("unchecked") - public SelfAttentionLayer build() { - Preconditions.checkArgument(this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); - Preconditions.checkArgument(this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); - Preconditions.checkArgument(!this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); - Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); - return new SelfAttentionLayer(this); - } + return initBuild(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java index 924c4cc2a..5d7d25066 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.*; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -30,256 +32,203 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.layers.convolution.SeparableConvolution2DLayer; import org.deeplearning4j.nn.params.SeparableConvolutionParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.deeplearning4j.util.ConvolutionUtils; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.*; - +/** + * SeparableConvolution2D layer nIn in the input layer is the number of channels nOut is the number + * of filters to be used in the net or in other words the channels The builder specifies the + * filter/kernel size, the stride and padding The pooling layer takes the kernel size + */ @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class SeparableConvolution2D extends ConvolutionLayer { + /** + * Set constraints to be applied to the point-wise convolution weight parameters of this layer. + * Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated. + */ + protected List pointWiseConstraints; + /** + * Set channels multiplier of channels-wise step in separable convolution + * + * @param depthMultiplier integer value, for each input map we get depthMultipler outputs in + * channels-wise step. + * @return Builder + */ + @Builder.Default private int depthMultiplier = 1; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat dataFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons + public static SeparableConvolution2DBuilder builder() { + return innerBuilder(); + } - int depthMultiplier; + public static SeparableConvolution2DBuilder builder(int... kernelSize) { + return innerBuilder().kernelSize(kernelSize); + } + + protected boolean allowCausal() { + // Causal convolution - allowed for 1D only + return false; + } + + @Override + protected void initializeConstraints() { + super.initializeConstraints(); + if (pointWiseConstraints != null) { + if (constraints == null) { + constraints = new ArrayList<>(); + } + for (LayerConstraint constraint : pointWiseConstraints) { + LayerConstraint clonedConstraint = constraint.clone(); + clonedConstraint.setParams( + Collections.singleton(SeparableConvolutionParamInitializer.POINT_WISE_WEIGHT_KEY)); + constraints.add(clonedConstraint); + } + } + } + + public boolean hasBias() { + return isHasBias(); + } + + @Override + public SeparableConvolution2D clone() { + SeparableConvolution2D clone = (SeparableConvolution2D) super.clone(); + if (clone.getKernelSize() != null) { + clone.setKernelSize( clone.getKernelSize().clone()); + } + if (clone.getStride() != null) { + clone.setStride( clone.getStride().clone()); + } + if (clone.getPadding() != null) { + clone.setPadding( clone.getPadding().clone()); + } + return clone; + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerValidation.assertNInNOutSet( + "SeparableConvolution2D", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.convolution.SeparableConvolution2DLayer ret = + new org.deeplearning4j.nn.layers.convolution.SeparableConvolution2DLayer( + lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + + return ret; + } + + @Override + public ParamInitializer initializer() { + return SeparableConvolutionParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalStateException( + "Invalid input for Convolution layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); + } + + CNN2DFormat format = ((InputType.InputTypeConvolutional) inputType).getFormat(); + + return InputTypeUtil.getOutputTypeCnnLayers( + inputType, + getKernelSize(), + getStride(), + getPadding(), + getDilation(), + getConvolutionMode(), + nOut, + layerIndex, + getName(), + format, + SeparableConvolution2DLayer.class); + } + + public static abstract class SeparableConvolution2DBuilder< + C extends SeparableConvolution2D, B extends SeparableConvolution2DBuilder> + extends ConvolutionLayerBuilder { + public C build() { + C l = this.initBuild(); + if (l.getKernelSize().length != 2) { + throw new IllegalArgumentException("Kernel size of should be rows x columns (a 2d array)"); + } + + if (l.getStride().length != 2) { + throw new IllegalArgumentException( + "Stride should include stride for rows and columns (a 2d array)"); + } + + if (l.getPadding().length != 2) { + throw new IllegalArgumentException( + "Padding should include padding for rows and columns (a 2d array)"); + } + l.initializeConstraints(); + return l; + } /** - * SeparableConvolution2D layer nIn in the input layer is the number of channels nOut is the number of filters to be - * used in the net or in other words the channels The builder specifies the filter/kernel size, the stride and - * padding The pooling layer takes the kernel size + * Set constraints to be applied to the point-wise convolution weight parameters of this layer. + * Default: no constraints.
+ * Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm + * regularization, etc). These constraints are applied at each iteration, after the parameters + * have been updated. + * + * @param constraints Constraints to apply to the point-wise convolution parameters of this + * layer */ - protected SeparableConvolution2D(Builder builder) { - super(builder); - this.hasBias = builder.hasBias; - this.depthMultiplier = builder.depthMultiplier; - this.convolutionMode = builder.convolutionMode; - this.dilation = builder.dilation; - if (builder.kernelSize.length != 2) { - throw new IllegalArgumentException("Kernel size of should be rows x columns (a 2d array)"); - } - this.kernelSize = builder.kernelSize; - if (builder.stride.length != 2) { - throw new IllegalArgumentException("Stride should include stride for rows and columns (a 2d array)"); - } - this.stride = builder.stride; - if (builder.padding.length != 2) { - throw new IllegalArgumentException("Padding should include padding for rows and columns (a 2d array)"); - } - this.padding = builder.padding; - this.cudnnAlgoMode = builder.cudnnAlgoMode; - this.cudnnFwdAlgo = builder.cudnnFwdAlgo; - this.cudnnBwdFilterAlgo = builder.cudnnBwdFilterAlgo; - this.cudnnBwdDataAlgo = builder.cudnnBwdDataAlgo; - this.cnn2dDataFormat = builder.dataFormat; - - - initializeConstraints(builder); + public B constrainPointWise(LayerConstraint... constraints) { + this.pointWiseConstraints = Arrays.asList(constraints); + return self(); } @Override - protected void initializeConstraints(LayerConfiguration.Builder builder) { - super.initializeConstraints(builder); - if (((Builder) builder).pointWiseConstraints != null) { - if (constraints == null) { - constraints = new ArrayList<>(); - } - for (LayerConstraint constraint : ((Builder) builder).pointWiseConstraints) { - LayerConstraint clonedConstraint = constraint.clone(); - clonedConstraint.setParams( - Collections.singleton(SeparableConvolutionParamInitializer.POINT_WISE_WEIGHT_KEY)); - constraints.add(clonedConstraint); - } - } - } - - public boolean hasBias() { - return hasBias; + public B kernelSize(int... kernelSize) { + super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize")); + return self(); } @Override - public SeparableConvolution2D clone() { - SeparableConvolution2D clone = (SeparableConvolution2D) super.clone(); - if (clone.kernelSize != null) { - clone.kernelSize = clone.kernelSize.clone(); - } - if (clone.stride != null) { - clone.stride = clone.stride.clone(); - } - if (clone.padding != null) { - clone.padding = clone.padding.clone(); - } - return clone; + public B stride(int... stride) { + super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride")); + return self(); } @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("SeparableConvolution2D", getLayerName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - - org.deeplearning4j.nn.layers.convolution.SeparableConvolution2DLayer ret = - new org.deeplearning4j.nn.layers.convolution.SeparableConvolution2DLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - - return ret; + public B padding(int... padding) { + super.padding(ValidationUtils.validate2NonNegative(padding, false, "kernelSize")); + return self(); } - - @Override - public ParamInitializer initializer() { - return SeparableConvolutionParamInitializer.getInstance(); - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getLayerName() - + "\"): Expected CNN input, got " + inputType); - } - - CNN2DFormat format = ((InputType.InputTypeConvolutional)inputType).getFormat(); - - return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode, - nOut, layerIndex, getLayerName(), format, SeparableConvolution2DLayer.class); - } - - - @Getter - @Setter - public static class Builder extends BaseConvBuilder { - - /** - * Set channels multiplier of channels-wise step in separable convolution - * - */ - protected int depthMultiplier = 1; - protected CNN2DFormat dataFormat; - - public Builder(int[] kernelSize, int[] stride, int[] padding) { - super(kernelSize, stride, padding); - } - - public Builder(int[] kernelSize, int[] stride) { - super(kernelSize, stride); - } - - public Builder(int... kernelSize) { - super(kernelSize); - } - - public Builder() { - super(); - } - - @Override - protected boolean allowCausal() { - //Causal convolution - allowed for 1D only - return false; - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param format Format for activations (in and out) - */ - public Builder dataFormat(CNN2DFormat format){ - this.dataFormat = format; - return this; - } - - /** - * Set channels multiplier of channels-wise step in separable convolution - * - * @param depthMultiplier integer value, for each input map we get depthMultipler outputs in channels-wise - * step. - * @return Builder - */ - public Builder depthMultiplier(int depthMultiplier) { - this.setDepthMultiplier(depthMultiplier); - return this; - } - - /** - * Set constraints to be applied to the point-wise convolution weight parameters of this layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, - * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have - * been updated. - */ - protected List pointWiseConstraints; - - /** - * Set constraints to be applied to the point-wise convolution weight parameters of this layer. Default: no - * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, - * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have - * been updated. - * - * @param constraints Constraints to apply to the point-wise convolution parameters of this layer - */ - public Builder constrainPointWise(LayerConstraint... constraints) { - this.setPointWiseConstraints(Arrays.asList(constraints)); - return this; - } - - /** - * Size of the convolution rows/columns (height/width) - * - * @param kernelSize the height and width of the kernel - */ - public Builder kernelSize(int... kernelSize) { - this.setKernelSize(kernelSize); - return this; - } - - /** - * Stride of the convolution rows/columns (height/width) - * - * @param stride the stride of the kernel (in h/w dimensions) - */ - public Builder stride(int... stride) { - this.setStride(stride); - return this; - } - - /** - * Padding - rows/columns (height/width) - * - * @param padding the padding in h/w dimensions - */ - public Builder padding(int... padding) { - this.setPadding(padding); - return this; - } - - @Override - public void setKernelSize(int... kernelSize){ - this.kernelSize = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"); - } - - @Override - public void setStride(int... stride){ - this.stride = ValidationUtils.validate2NonNegative(stride, false, "stride"); - } - - @Override - public void setPadding(int... padding){ - this.padding = ValidationUtils.validate2NonNegative(padding, false, "padding"); - } - - @Override - @SuppressWarnings("unchecked") - public SeparableConvolution2D build() { - ConvolutionUtils.validateConvolutionModePadding(convolutionMode, padding); - ConvolutionUtils.validateCnnKernelStridePadding(kernelSize, stride, padding); - - return new SeparableConvolution2D(this); - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java index 50f91781b..85d1b11e4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -35,195 +38,159 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class SpaceToBatchLayer extends NoParamLayer { - // TODO: throw error when block and padding dims don't match + /** + * Block size for SpaceToBatch layer. Should be a length 2 array for the height and width + * dimensions + */ + protected int[] blockSize; + /** A 2d array, with format [[padTop, padBottom], [padLeft, padRight]] */ + @Builder.Default protected int[][] padding = new int[][] {{0, 0}, {0, 0}}; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; - protected int[] blocks; - protected int[][] padding; - protected CNN2DFormat format = CNN2DFormat.NCHW; + public static SpaceToBatchLayerBuilder builder() { + return innerBuilder(); + } + // TODO: throw error when block and padding dims don't match + /** + * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and + * width dimensions + */ + public static SpaceToBatchLayerBuilder builder(int[] blocks) { + return innerBuilder().blockSize(blocks); + } - protected SpaceToBatchLayer(Builder builder) { - super(builder); - this.blocks = builder.blocks; - this.padding = builder.padding; - this.format = builder.format; + /** + * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and + * width dimensions + * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, + * padRight]] + */ + public static SpaceToBatchLayerBuilder builder(int[] blocks, int[][] padding) { + return innerBuilder().blockSize(blocks).padding(padding); + } + + @Override + public SpaceToBatchLayer clone() { + return (SpaceToBatchLayer) super.clone(); + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + + org.deeplearning4j.nn.layers.convolution.SpaceToBatch ret = + new org.deeplearning4j.nn.layers.convolution.SpaceToBatch(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + InputType.InputTypeConvolutional outputType = + (InputType.InputTypeConvolutional) getOutputType(-1, inputType); + + return new LayerMemoryReport.Builder(name, SpaceToBatchLayer.class, inputType, outputType) + .standardMemory(0, 0) // No params + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalStateException( + "Invalid input for Subsampling layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); + } + InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType; + return InputType.convolutional( + (i.getHeight() + padding[0][0] + padding[0][1]) / blockSize[0], + (i.getWidth() + padding[1][0] + padding[1][1]) / blockSize[1], + i.getChannels(), + i.getFormat()); + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + Preconditions.checkState( + inputType.getType() == InputType.Type.CNN, + "Only CNN input types can be used with SpaceToBatchLayer, got %s", + inputType); + this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for space to batch layer (layer name=\"" + + getName() + + "\"): input is null"); + } + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); + } + + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException("SpaceToBatchLayer does not contain parameters"); + } + + public static abstract class SpaceToBatchLayerBuilder< + C extends SpaceToBatchLayer, B extends SpaceToBatchLayerBuilder> + extends NoParamLayerBuilder { + /** + * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height + * and width dimensions + * @return + */ + public B blockSize(int... blocks) { + this.blockSize = ValidationUtils.validate2NonNegative(blocks, false, "blocks"); + return self(); } - @Override - public SpaceToBatchLayer clone() { - return (SpaceToBatchLayer) super.clone(); + /** + * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, + * padRight]] + * @return + */ + public B padding(int[][] padding) { + this.padding$value = ValidationUtils.validate2x2NonNegative(padding, "padding"); + this.padding$set = true; + return self(); } - - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - - org.deeplearning4j.nn.layers.convolution.SpaceToBatch ret = - new org.deeplearning4j.nn.layers.convolution.SpaceToBatch(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType); - - return new LayerMemoryReport.Builder(layerName, SpaceToBatchLayer.class, inputType, outputType) - .standardMemory(0, 0) //No params - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Subsampling layer (layer name=\"" + getLayerName() - + "\"): Expected CNN input, got " + inputType); - } - InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType; - return InputType.convolutional((i.getHeight() + padding[0][0] + padding[0][1]) / blocks[0], - (i.getWidth() + padding[1][0] + padding[1][1]) / blocks[1], i.getChannels(), i.getFormat()); - } - - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } - - - @Override - public void setNIn(InputType inputType, boolean override) { - Preconditions.checkState(inputType.getType() == InputType.Type.CNN, "Only CNN input types can be used with SpaceToBatchLayer, got %s", inputType); - this.format = ((InputType.InputTypeConvolutional)inputType).getFormat(); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input for space to batch layer (layer name=\"" + getLayerName() - + "\"): input is null"); - } - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); - } - - @Override - public boolean isPretrainParam(String paramName) { - throw new UnsupportedOperationException("SpaceToBatchLayer does not contain parameters"); - } - - - @NoArgsConstructor - @Getter - @Setter - public static class Builder> extends LayerConfiguration.Builder { - - /** - * Block size for SpaceToBatch layer. Should be a length 2 array for the height and width - * dimensions - */ - @Setter(AccessLevel.NONE) - protected int[] blocks; - - /** - * A 2d array, with format [[padTop, padBottom], [padLeft, padRight]] - */ - protected int[][] padding; - - protected CNN2DFormat format = CNN2DFormat.NCHW; - - /** - * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width - * dimensions - */ - public void setBlocks(int... blocks) { - this.blocks = ValidationUtils.validate2NonNegative(blocks, false, "blocks"); - } - - /** - * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, padRight]] - */ - public void setPadding(int[][] padding) { - this.padding = ValidationUtils.validate2x2NonNegative(padding, "padding"); - } - - - /** - * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width - * dimensions - */ - public Builder(int[] blocks) { - this.setBlocks(blocks); - this.setPadding(new int[][] {{0, 0}, {0, 0}}); - } - - /** - * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width - * dimensions - * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, padRight]] - */ - public Builder(int[] blocks, int[][] padding) { - this.setBlocks(blocks); - this.setPadding(padding); - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param format Format for activations (in and out) - */ - public T dataFormat(CNN2DFormat format){ - this.format = format; - return (T)this; - } - - /** - * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width - * dimensions - */ - public T blocks(int... blocks) { - this.setBlocks(blocks); - return (T) this; - } - - /** - * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, padRight]] - */ - public T padding(int[][] padding) { - this.setPadding(padding); - return (T) this; - } - - @Override - public T name(String layerName) { - this.setLayerName(layerName); - return (T) this; - } - - @Override - @SuppressWarnings("unchecked") - public SpaceToBatchLayer build() { - if(padding == null) - setPadding(new int[][] {{0, 0}, {0, 0}}); - return new SpaceToBatchLayer(this); - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java index 462f3ab5e..1c3c625a1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -37,9 +38,9 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class SpaceToDepthLayer extends NoParamLayer { /** @@ -53,16 +54,20 @@ public class SpaceToDepthLayer extends NoParamLayer { return this == NCHW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC; } } - + /** + * @param blockSize Block size + */ protected int blockSize; - protected CNN2DFormat dataFormat; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * @param dataFormat Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; - protected SpaceToDepthLayer(Builder builder) { - super(builder); - this.setBlockSize(builder.blockSize); - this.setDataFormat(builder.dataFormat); - } @Override public SpaceToDepthLayer clone() { @@ -74,7 +79,7 @@ public class SpaceToDepthLayer extends NoParamLayer { Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - +runInheritance(); org.deeplearning4j.nn.layers.convolution.SpaceToDepth ret = new org.deeplearning4j.nn.layers.convolution.SpaceToDepth(lconf, networkDataType); ret.addTrainingListeners(trainingListeners); @@ -90,7 +95,7 @@ public class SpaceToDepthLayer extends NoParamLayer { public LayerMemoryReport getMemoryReport(InputType inputType) { InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType); - return new LayerMemoryReport.Builder(layerName, SpaceToDepthLayer.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, SpaceToDepthLayer.class, inputType, outputType) .standardMemory(0, 0) //No params .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); @@ -99,7 +104,7 @@ public class SpaceToDepthLayer extends NoParamLayer { @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for space to channels layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for space to channels layer (layer name=\"" + getName() + "\"): Expected CNN input, got " + inputType); } InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType; @@ -121,10 +126,10 @@ public class SpaceToDepthLayer extends NoParamLayer { @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType == null) { - throw new IllegalStateException("Invalid input for space to channels layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for space to channels layer (layer name=\"" + getName() + "\"): input is null"); } - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); } @Override @@ -133,78 +138,5 @@ public class SpaceToDepthLayer extends NoParamLayer { } - @NoArgsConstructor - @Getter - @Setter - public static class Builder> extends LayerConfiguration.Builder { - - protected int blockSize; - - /** - * Data format for input activations. Note DL4J uses NCHW in most cases - */ - protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; - - /** - * @param blockSize Block size - */ - public Builder(int blockSize) { - this.setBlockSize(blockSize); - } - - /** - * @param blockSize Block size - * @param dataFormat Data format for input activations. Note DL4J uses NCHW in most cases - */ - @Deprecated - public Builder(int blockSize, DataFormat dataFormat) { - this(blockSize, dataFormat.toFormat()); - } - - public Builder(int blockSize, CNN2DFormat dataFormat) { - this.setBlockSize(blockSize); - this.setDataFormat(dataFormat); - } - - /** - * @param blockSize Block size - */ - public T blocks(int blockSize) { - this.setBlockSize(blockSize); - return (T) this; - } - - /** - * @param dataFormat Data format for input activations. Note DL4J uses NCHW in most cases - * @deprecated Use {@link #dataFormat(CNN2DFormat)} - */ - @Deprecated - public T dataFormat(DataFormat dataFormat) { - return dataFormat(dataFormat.toFormat()); - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param dataFormat Format for activations (in and out) - */ - public T dataFormat(CNN2DFormat dataFormat) { - this.setDataFormat(dataFormat); - return (T) this; - } - - @Override - public T name(String layerName) { - this.setLayerName(layerName); - return (T) this; - } - - @Override - @SuppressWarnings("unchecked") - public SpaceToDepthLayer build() { - return new SpaceToDepthLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java index be544fb2f..a64db7447 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java @@ -20,10 +20,13 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -36,241 +39,155 @@ import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; +/* + * Currently, we just subclass off the SubsamplingLayer and hard code the "width" dimension to 1. + * TODO: We will eventually want to NOT subclass off of SubsamplingLayer. + * This approach treats a multivariate time series with L timesteps and + * P variables as an L x 1 x P image (L rows high, 1 column wide, P + * channels deep). The kernel should be H trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling1DLayer ret = + new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling1DLayer( + lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for Subsampling1D layer (layer name=\"" + + getName() + + "\"): Expected RNN input, got " + + inputType); + } + InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; + long inputTsLength = r.getTimeSeriesLength(); + long outLength; + if (inputTsLength < 0) { + // Probably: user did InputType.recurrent(x) without specifying sequence length + outLength = -1; + } else { + outLength = + Convolution1DUtils.getOutputSize( + inputTsLength, kernelSize[0], stride[0], padding[0], convolutionMode, getDilation()[0]); + } + return InputType.recurrent(r.getSize(), outLength, r.getFormat()); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + // No op: subsampling layer doesn't have nIn value + if (dataFormat == null || override) { + if (inputType.getType() == InputType.Type.RNN) { + InputType.InputTypeRecurrent inputTypeConvolutional = + (InputType.InputTypeRecurrent) inputType; + this.dataFormat = + inputTypeConvolutional.getFormat() == RNNFormat.NCW + ? CNN2DFormat.NCHW + : CNN2DFormat.NHWC; + + } else if (inputType.getType() == InputType.Type.CNN) { + InputType.InputTypeConvolutional inputTypeConvolutional = + (InputType.InputTypeConvolutional) inputType; + this.dataFormat = inputTypeConvolutional.getFormat(); + } + } + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for Subsampling1D layer (layer name=\"" + + getName() + + "\"): input is null"); + } + + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); + } + + @Override + public Subsampling1DLayer clone() { + Subsampling1DLayer clone = (Subsampling1DLayer) super.clone(); + + if (clone.kernelSize != null) { + clone.kernelSize = clone.kernelSize.clone(); + } + if (clone.stride != null) { + clone.stride = clone.stride.clone(); + } + if (clone.padding != null) { + clone.padding = clone.padding.clone(); + } + if (clone.getDilation() != null) { + clone.setDilation( clone.getDilation().clone()); + } + return clone; + } + + protected boolean allowCausal() { + return true; + } + + public static abstract class Subsampling1DLayerBuilder> extends + SubsamplingLayerBuilder { + + public C build() { + C l = this.initBuild(); + if (l.getPoolingType() == org.deeplearning4j.nn.conf.layers.PoolingType.PNORM && l.getPnorm() <= 0) { + throw new IllegalStateException( + "Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM"); + } + ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + ConvolutionUtils.validateCnnKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding()); + return l; + } + +/** +* + * @param kernelSize + * @return +*/ + @Override + public B kernelSize(int... kernelSize) { + super.kernelSize( ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]); + return self(); } @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - - org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling1DLayer ret = - new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling1DLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + public B stride(int... stride) { + super.stride( ValidationUtils.validate1NonNegative(stride, "stride")[0]); + return self(); } @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for Subsampling1D layer (layer name=\"" + getLayerName() - + "\"): Expected RNN input, got " + inputType); - } - InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; - long inputTsLength = r.getTimeSeriesLength(); - long outLength; - if (inputTsLength < 0) { - //Probably: user did InputType.recurrent(x) without specifying sequence length - outLength = -1; - } else { - outLength = Convolution1DUtils.getOutputSize(inputTsLength, kernelSize[0], stride[0], padding[0], - convolutionMode, dilation[0]); - } - return InputType.recurrent(r.getSize(), outLength, r.getFormat()); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - //No op: subsampling layer doesn't have nIn value - if(cnn2dDataFormat == null || override) { - if(inputType.getType() == InputType.Type.RNN) { - InputType.InputTypeRecurrent inputTypeConvolutional = (InputType.InputTypeRecurrent) inputType; - this.cnn2dDataFormat = inputTypeConvolutional.getFormat() == RNNFormat.NCW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC; - - } else if(inputType.getType() == InputType.Type.CNN) { - InputType.InputTypeConvolutional inputTypeConvolutional = (InputType.InputTypeConvolutional) inputType; - this.cnn2dDataFormat = inputTypeConvolutional.getFormat(); - } - - } - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input for Subsampling1D layer (layer name=\"" + getLayerName() - + "\"): input is null"); - } - - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getLayerName()); - } - - @Override - public Subsampling1DLayer clone() { - Subsampling1DLayer clone = (Subsampling1DLayer) super.clone(); - - if (clone.kernelSize != null) { - clone.kernelSize = clone.kernelSize.clone(); - } - if (clone.stride != null) { - clone.stride = clone.stride.clone(); - } - if (clone.padding != null) { - clone.padding = clone.padding.clone(); - } - if (clone.dilation != null) { - clone.dilation = clone.dilation.clone(); - } - return clone; - } - - public static class Builder extends SubsamplingLayer.BaseSubsamplingBuilder { - - private static final org.deeplearning4j.nn.conf.layers.PoolingType DEFAULT_POOLING = - org.deeplearning4j.nn.conf.layers.PoolingType.MAX; - private static final int DEFAULT_KERNEL = 2; - private static final int DEFAULT_STRIDE = 1; - private static final int DEFAULT_PADDING = 0; - - public Builder(PoolingType poolingType, int kernelSize, int stride) { - this(poolingType, kernelSize, stride, DEFAULT_PADDING); - } - - public Builder(PoolingType poolingType, int kernelSize) { - this(poolingType, kernelSize, DEFAULT_STRIDE, DEFAULT_PADDING); - } - - public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int kernelSize) { - this(poolingType, kernelSize, DEFAULT_STRIDE, DEFAULT_PADDING); - } - - public Builder(int kernelSize, int stride, int padding) { - this(DEFAULT_POOLING, kernelSize, stride, padding); - } - - public Builder(int kernelSize, int stride) { - this(DEFAULT_POOLING, kernelSize, stride, DEFAULT_PADDING); - } - - public Builder(int kernelSize) { - this(DEFAULT_POOLING, kernelSize, DEFAULT_STRIDE, DEFAULT_PADDING); - } - - public Builder(PoolingType poolingType) { - this(poolingType, DEFAULT_KERNEL, DEFAULT_STRIDE, DEFAULT_PADDING); - } - - public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { - this(poolingType, DEFAULT_KERNEL, DEFAULT_STRIDE, DEFAULT_PADDING); - } - - @Override - protected boolean allowCausal() { - return true; - } - - public Builder() { - this(DEFAULT_POOLING, DEFAULT_KERNEL, DEFAULT_STRIDE, DEFAULT_PADDING); - } - - public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int kernelSize, int stride, - int padding) { - setKernelSize(kernelSize); - setPadding(padding); - setStride(stride); - } - - public Builder(PoolingType poolingType, int kernelSize, int stride, int padding) { - this.poolingType = poolingType.toPoolingType(); - setKernelSize(kernelSize); - setStride(stride); - setPadding(padding); - } - - @SuppressWarnings("unchecked") - public Subsampling1DLayer build() { - if (poolingType == org.deeplearning4j.nn.conf.layers.PoolingType.PNORM && pnorm <= 0) { - throw new IllegalStateException( - "Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM"); - } - ConvolutionUtils.validateConvolutionModePadding(convolutionMode, padding); - ConvolutionUtils.validateCnnKernelStridePadding(kernelSize, stride, padding); - - return new Subsampling1DLayer(this); - } - - /** - * Kernel size - * - * @param kernelSize kernel size - */ - public Subsampling1DLayer.Builder kernelSize(int kernelSize) { - this.setKernelSize(kernelSize); - return this; - } - - /** - * Stride - * - * @param stride stride value - */ - public Subsampling1DLayer.Builder stride(int stride) { - this.setStride(stride); - return this; - } - - /** - * Padding - * - * @param padding padding value - */ - public Subsampling1DLayer.Builder padding(int padding) { - this.setPadding(padding); - return this; - } - - /** - * Kernel size - * - * @param kernelSize kernel size - */ - @Override - public void setKernelSize(int... kernelSize) { - this.kernelSize[0] = ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]; - } - - /** - * Stride - * - * @param stride stride value - */ - @Override - public void setStride(int... stride) { - this.stride[0] = ValidationUtils.validate1NonNegative(stride, "stride")[0]; - } - - /** - * Padding - * - * @param padding padding value - */ - @Override - public void setPadding(int... padding) { - this.padding[0] = ValidationUtils.validate1NonNegative(padding, "padding")[0]; - } + public B padding(int... padding) { + super.padding( ValidationUtils.validate1NonNegative(padding, "padding")[0]); + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java index 123df419b..324503c3d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java @@ -20,7 +20,11 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.List; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -33,460 +37,285 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.util.Convolution3DUtils; import org.deeplearning4j.util.ConvolutionUtils; import org.deeplearning4j.util.ValidationUtils; -import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.Collection; -import java.util.List; -import java.util.Map; - @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild") public class Subsampling3DLayer extends NoParamLayer { - protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; - protected org.deeplearning4j.nn.conf.layers.PoolingType poolingType; - protected int[] kernelSize; - protected int[] stride; - protected int[] padding; - protected int[] dilation; - protected boolean cudnnAllowFallback = true; - protected Convolution3D.DataFormat dataFormat = Convolution3D.DataFormat.NCDHW; //Default for 1.0.0-beta3 and earlier (before config added) + @Builder.Default protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + protected org.deeplearning4j.nn.conf.layers.PoolingType poolingType; + /** + * Kernel size + * + * @param kernelSize kernel size in height and width dimensions + */ + protected int[] kernelSize; + /** + * Stride + * + * @param stride stride in height and width dimensions + */ + protected int[] stride; + /** + * Padding + * + * @param padding padding in the height and width dimensions + */ + protected int[] padding; - public enum PoolingType { - MAX, AVG; + protected int[] dilation; + @Builder.Default protected boolean cudnnAllowFallback = true; + /** + * The data format for input and output activations.
+ * NCDHW: activations (in/out) should have shape [minibatch, channels, depth, height, width]
+ * NDHWC: activations (in/out) should have shape [minibatch, depth, height, width, channels]
+ */ + @Builder.Default + protected Convolution3D.DataFormat dataFormat = + Convolution3D.DataFormat.NCDHW; // Default for 1.0.0-beta3 and earlier (before config added) - public org.deeplearning4j.nn.conf.layers.PoolingType toPoolingType() { - switch (this) { - case MAX: - return org.deeplearning4j.nn.conf.layers.PoolingType.MAX; - case AVG: - return org.deeplearning4j.nn.conf.layers.PoolingType.AVG; - } - throw new UnsupportedOperationException("Unknown/not supported pooling type: " + this); - } + public static Subsampling3DLayerBuilder builder() { + return innerBuilder(); + } + + public static Subsampling3DLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { + return innerBuilder() + .poolingType(poolingType); + } + + public static Subsampling3DLayerBuilder builder( + org.deeplearning4j.nn.conf.layers.PoolingType poolingType, + int[] kernelSize, + int[] stride, + int[] padding) { + return innerBuilder() + .poolingType(poolingType) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding); + } + + public static Subsampling3DLayerBuilder builder( + int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding); + } + + public static Subsampling3DLayerBuilder builder( + PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding); + } + + public static Subsampling3DLayerBuilder builder(PoolingType poolingType, int[] kernelSize) { + return innerBuilder().poolingType(poolingType.toPoolingType()).kernelSize(kernelSize); + } + + public static Subsampling3DLayerBuilder builder( + org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) { + return innerBuilder().poolingType(poolingType).kernelSize(kernelSize); + } + + public static Subsampling3DLayerBuilder builder(int[] kernelSize, int[] stride) { + return innerBuilder().kernelSize(kernelSize).stride(stride); + } + + public static Subsampling3DLayerBuilder builder(int... kernelSize) { + return innerBuilder().kernelSize(kernelSize); + } + + @Override + public Subsampling3DLayer clone() { + Subsampling3DLayer clone = (Subsampling3DLayer) super.clone(); + + if (clone.kernelSize != null) { + clone.kernelSize = clone.kernelSize.clone(); + } + if (clone.stride != null) { + clone.stride = clone.stride.clone(); + } + if (clone.padding != null) { + clone.padding = clone.padding.clone(); + } + if (clone.dilation != null) { + clone.dilation = clone.dilation.clone(); + } + return clone; + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection iterationListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer ret = + new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer( + lconf, networkDataType); + ret.addTrainingListeners(iterationListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { + throw new IllegalStateException( + "Invalid input for Subsampling 3D layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); } - protected Subsampling3DLayer(Builder builder) { - super(builder); - this.poolingType = builder.poolingType; - if (builder.kernelSize.length != 3) { - throw new IllegalArgumentException("Kernel size must be length 3"); - } - this.kernelSize = builder.kernelSize; - if (builder.stride.length != 3) { - throw new IllegalArgumentException("Invalid stride, must be length 3"); - } - this.stride = builder.stride; - this.padding = builder.padding; - this.dilation = builder.dilation; - this.convolutionMode = builder.convolutionMode; - this.cudnnAllowFallback = builder.cudnnAllowFallback; - this.dataFormat = builder.dataFormat; + long inChannels = ((InputType.InputTypeConvolutional3D) inputType).getChannels(); + if (inChannels > Integer.MAX_VALUE) throw new ND4JArraySizeException(); + return InputTypeUtil.getOutputTypeCnn3DLayers( + inputType, + dataFormat, + kernelSize, + stride, + padding, + new int[] {1, 1, 1}, // no dilation + convolutionMode, + (int) inChannels, + layerIndex, + getName(), + Subsampling3DLayer.class); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + // No op: subsampling layer doesn't have nIn value + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for Subsampling 3D layer (layer name=\"" + + getName() + + "\"): input is null"); } - @Override - public Subsampling3DLayer clone() { - Subsampling3DLayer clone = (Subsampling3DLayer) super.clone(); + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getName()); + } - if (clone.kernelSize != null) { - clone.kernelSize = clone.kernelSize.clone(); - } - if (clone.stride != null) { - clone.stride = clone.stride.clone(); - } - if (clone.padding != null) { - clone.padding = clone.padding.clone(); - } - if (clone.dilation != null) { - clone.dilation = clone.dilation.clone(); - } - return clone; + @Override + public List getRegularizationByParam(String paramName) { + // Not applicable + return null; + } + + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException("SubsamplingLayer does not contain parameters"); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; + InputType.InputTypeConvolutional3D outputType = + (InputType.InputTypeConvolutional3D) getOutputType(-1, inputType); + val actElementsPerEx = outputType.arrayElementsPerExample(); + + // During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col + // is working mem + val im2colSizePerEx = + c.getChannels() + * outputType.getHeight() + * outputType.getWidth() + * outputType.getDepth() + * kernelSize[0] + * kernelSize[1]; + + // Current implementation does NOT cache im2col etc... which means: it's recalculated on each + // backward pass + long trainingWorkingSizePerEx = im2colSizePerEx; + if (getDropOut() != null) { + // Dup on the input before dropout, but only for training + trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); } - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection iterationListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + return new LayerMemoryReport.Builder(name, Subsampling3DLayer.class, inputType, outputType) + .standardMemory(0, 0) // No params + .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } - org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer ret = - new org.deeplearning4j.nn.layers.convolution.subsampling.Subsampling3DLayer(lconf, networkDataType); - ret.addTrainingListeners(iterationListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + public enum PoolingType { + MAX, + AVG; + + public org.deeplearning4j.nn.conf.layers.PoolingType toPoolingType() { + switch (this) { + case MAX: + return org.deeplearning4j.nn.conf.layers.PoolingType.MAX; + case AVG: + return org.deeplearning4j.nn.conf.layers.PoolingType.AVG; + } + throw new UnsupportedOperationException("Unknown/not supported pooling type: " + this); + } + } + + public static abstract class Subsampling3DLayerBuilder< + C extends Subsampling3DLayer, B extends Subsampling3DLayerBuilder> + extends NoParamLayerBuilder { + + public B kernelSize(int... kernelSize) { + this.kernelSize = ValidationUtils.validate3NonNegative(kernelSize, "kernelSize"); + return self(); } - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); + public B stride(int... stride) { + this.stride = ValidationUtils.validate3NonNegative(stride, "stride"); + return self(); } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { - throw new IllegalStateException("Invalid input for Subsampling 3D layer (layer name=\"" + getLayerName() - + "\"): Expected CNN input, got " + inputType); - } - - long inChannels = ((InputType.InputTypeConvolutional3D) inputType).getChannels(); - if (inChannels > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - return InputTypeUtil.getOutputTypeCnn3DLayers(inputType, dataFormat, kernelSize, stride, padding, new int[] {1, 1, 1}, // no dilation - convolutionMode, (int) inChannels, - layerIndex, getLayerName(), Subsampling3DLayer.class); + public B padding(int... padding) { + this.padding = ValidationUtils.validate3NonNegative(padding, "padding"); + return self(); } - @Override - public void setNIn(InputType inputType, boolean override) { - //No op: subsampling layer doesn't have nIn value + public B dilation(int... padding) { + this.dilation = ValidationUtils.validate3NonNegative(padding, "dilation"); + return self(); } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input for Subsampling 3D layer (layer name=\"" + getLayerName() - + "\"): input is null"); - } + public C build() { + if (kernelSize.length != 3) { + throw new IllegalArgumentException("Kernel size must be length 3"); + } - return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); + if (stride.length != 3) { + throw new IllegalArgumentException("Invalid stride, must be length 3"); + } + C l = this.initBuild(); + ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), padding); + Convolution3DUtils.validateCnn3DKernelStridePadding(kernelSize, stride, padding); + return l; } - - @Override - public List getRegularizationByParam(String paramName) { - //Not applicable - return null; - } - - @Override - public boolean isPretrainParam(String paramName) { - throw new UnsupportedOperationException("SubsamplingLayer does not contain parameters"); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; - InputType.InputTypeConvolutional3D outputType = - (InputType.InputTypeConvolutional3D) getOutputType(-1, inputType); - val actElementsPerEx = outputType.arrayElementsPerExample(); - - //During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem - val im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() * outputType.getDepth() - * kernelSize[0] * kernelSize[1]; - - //Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass - long trainingWorkingSizePerEx = im2colSizePerEx; - if (getIDropout() != null) { - //Dup on the input before dropout, but only for training - trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); - } - - return new LayerMemoryReport.Builder(layerName, Subsampling3DLayer.class, inputType, outputType) - .standardMemory(0, 0) //No params - .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); - } - - @NoArgsConstructor - @Getter - @Setter - public static class Builder extends BaseSubsamplingBuilder { - - /** - * The data format for input and output activations.
NCDHW: activations (in/out) should have shape - * [minibatch, channels, depth, height, width]
NDHWC: activations (in/out) should have shape [minibatch, - * depth, height, width, channels]
- */ - protected Convolution3D.DataFormat dataFormat = Convolution3D.DataFormat.NCDHW; - - public Builder(PoolingType poolingType, int[] kernelSize, int[] stride) { - super(poolingType, kernelSize, stride); - } - - public Builder(PoolingType poolingType, int[] kernelSize) { - super(poolingType, kernelSize); - } - - public Builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { - super(poolingType, kernelSize, stride, padding); - } - - public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) { - super(poolingType, kernelSize); - } - - public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, - int[] padding) { - super(poolingType, kernelSize, stride, padding); - } - - public Builder(int[] kernelSize, int[] stride, int[] padding) { - super(kernelSize, stride, padding); - } - - public Builder(int[] kernelSize, int[] stride) { - super(kernelSize, stride); - } - - public Builder(int... kernelSize) { - super(kernelSize); - } - - public Builder(PoolingType poolingType) { - super(poolingType); - } - - public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { - super(poolingType); - } - - /** - * Kernel size - * - * @param kernelSize kernel size in height and width dimensions - */ - public Builder kernelSize(int... kernelSize) { - this.setKernelSize(kernelSize); - return this; - } - - /** - * Stride - * - * @param stride stride in height and width dimensions - */ - public Builder stride(int... stride) { - this.setStride(stride); - return this; - } - - /** - * Padding - * - * @param padding padding in the height and width dimensions - */ - public Builder padding(int... padding) { - this.setPadding(padding); - return this; - } - - /** - * The data format for input and output activations.
NCDHW: activations (in/out) should have shape - * [minibatch, channels, depth, height, width]
NDHWC: activations (in/out) should have shape [minibatch, - * depth, height, width, channels]
- * - * @param dataFormat Data format to use for activations - */ - public Builder dataFormat(Convolution3D.DataFormat dataFormat) { - this.setDataFormat(dataFormat); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public Subsampling3DLayer build() { - ConvolutionUtils.validateConvolutionModePadding(convolutionMode, padding); - Convolution3DUtils.validateCnn3DKernelStridePadding(kernelSize, stride, padding); - return new Subsampling3DLayer(this); - } - - @Override - public void setKernelSize(int... kernelSize) { - this.kernelSize = ValidationUtils.validate3NonNegative(kernelSize, "kernelSize"); - } - - /** - * Stride - * - * @param stride stride in height and width dimensions - */ - @Override - public void setStride(int... stride) { - this.stride = ValidationUtils.validate3NonNegative(stride, "stride"); - } - - /** - * Padding - * - * @param padding padding in the height and width dimensions - */ - @Override - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate3NonNegative(padding, "padding"); - } - - /** - * Dilation - * - * @param dilation padding in the height and width dimensions - */ - @Override - public void setDilation(int... dilation) { - this.dilation = ValidationUtils.validate3NonNegative(dilation, "dilation"); - } - } - - @Getter - @Setter - @NoArgsConstructor - protected static abstract class BaseSubsamplingBuilder> - extends LayerConfiguration.Builder { - - protected org.deeplearning4j.nn.conf.layers.PoolingType poolingType = - org.deeplearning4j.nn.conf.layers.PoolingType.MAX; - - protected int[] kernelSize = new int[] {1, 1, 1}; - protected int[] stride = new int[] {2, 2, 2}; - protected int[] padding = new int[] {0, 0, 0}; - - @Setter(AccessLevel.NONE) - protected int[] dilation = new int[] {1, 1, 1}; - - /** - * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more details - * - */ - protected ConvolutionMode convolutionMode = ConvolutionMode.Same; - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in - * (non-CuDNN) implementation for ConvolutionLayer will be used - */ - protected boolean cudnnAllowFallback = true; - - public void setDilation(int... dilation) { - Preconditions.checkArgument(dilation.length == 1 || dilation.length == 3, - "Must have 1 or 3 dilation values - got %s", dilation); - - if (dilation.length == 1) { - dilation(dilation[0], dilation[0], dilation[0]); - } else { - dilation(dilation[0], dilation[1], dilation[2]); - } - } - - protected BaseSubsamplingBuilder(PoolingType poolingType, int[] kernelSize, int[] stride) { - this.setPoolingType(poolingType.toPoolingType()); - this.setKernelSize(kernelSize); - this.setStride(stride); - } - - protected BaseSubsamplingBuilder(PoolingType poolingType, int[] kernelSize) { - this.setPoolingType(poolingType.toPoolingType()); - this.setKernelSize(kernelSize); - } - - protected BaseSubsamplingBuilder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { - this.setPoolingType(poolingType.toPoolingType()); - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - } - - protected BaseSubsamplingBuilder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) { - this.setPoolingType(poolingType); - this.setKernelSize(kernelSize); - } - - protected BaseSubsamplingBuilder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, - int[] stride, int[] padding) { - this.setPoolingType(poolingType); - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - } - - protected BaseSubsamplingBuilder(int[] kernelSize, int[] stride, int[] padding) { - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - } - - protected BaseSubsamplingBuilder(int[] kernelSize, int[] stride) { - this.setKernelSize(kernelSize); - this.setStride(stride); - } - - protected BaseSubsamplingBuilder(int... kernelSize) { - this.setKernelSize(kernelSize); - } - - protected BaseSubsamplingBuilder(PoolingType poolingType) { - this.setPoolingType(poolingType.toPoolingType()); - } - - protected BaseSubsamplingBuilder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { - this.setPoolingType(poolingType); - } - - protected void setConvolutionMode(ConvolutionMode convolutionMode){ - Preconditions.checkState(convolutionMode != ConvolutionMode.Causal, "Causal convolution mode can only be used with 1D" + - " convolutional neural network layers"); - this.convolutionMode = convolutionMode; - } - - /** - * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more details - * - * @param convolutionMode Convolution mode for layer - */ - public T convolutionMode(ConvolutionMode convolutionMode) { - this.setConvolutionMode(convolutionMode); - return (T) this; - } - - public T poolingType(PoolingType poolingType) { - this.setPoolingType(poolingType.toPoolingType()); - return (T) this; - } - - public T poolingType(org.deeplearning4j.nn.conf.layers.PoolingType poolingType){ - this.setPoolingType(poolingType); - return (T) this; - } - - public T dilation(int dDepth, int dHeight, int dWidth) { - this.setDilation(dDepth, dHeight, dWidth); - return (T) this; - } - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If true, the built-in - * (non-CuDNN) implementation for ConvolutionLayer will be used - * - * @deprecated Use {@link #helperAllowFallback(boolean)} - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - @Deprecated - public T cudnnAllowFallback(boolean allowFallback) { - this.setCudnnAllowFallback(allowFallback); - return (T) this; - } - - /** - * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed? - * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in - * (non-MKL/CuDNN) implementation for Subsampling3DLayer will be used - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - public T helperAllowFallback(boolean allowFallback) { - this.cudnnAllowFallback = allowFallback; - return (T) this; - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java index 55e766133..d110e90a3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java @@ -20,7 +20,12 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.annotation.JsonIgnore; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.Accessors; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.ConvolutionMode; @@ -36,542 +41,422 @@ import org.deeplearning4j.util.ValidationUtils; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonIgnore; - -import java.util.Collection; -import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class SubsamplingLayer extends NoParamLayer { - protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; //Default to truncate here - default for 0.6.0 and earlier networks on JSON deserialization - protected org.deeplearning4j.nn.conf.layers.PoolingType poolingType; - protected int[] kernelSize; // Same as filter size from the last conv layer - protected int[] stride; // Default is 2. Down-sample by a factor of 2 - protected int[] padding; - protected int[] dilation = new int[] {1, 1}; - protected int pnorm; - protected double eps; - protected boolean cudnnAllowFallback = true; - protected CNN2DFormat cnn2dDataFormat = CNN2DFormat.NCHW; //default value for legacy reasons - public final static CNN2DFormat DEFAULT_FORMAT = CNN2DFormat.NCHW; - @JsonIgnore - @EqualsAndHashCode.Exclude - private boolean defaultValueOverridden = false; + public static final CNN2DFormat DEFAULT_FORMAT = CNN2DFormat.NCHW; + /** + * Only conv1d/subsampling1d can use causal mode + */ + @Builder.Default + protected boolean allowCausal = false; + /** + * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more + * details + * + * @param convolutionMode Convolution mode for layer + */ + @Builder.Default protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + @Builder.Default + protected org.deeplearning4j.nn.conf.layers.PoolingType poolingType = + org.deeplearning4j.nn.conf.layers.PoolingType.MAX; + /** + * Kernel size + * + * @param kernelSize kernel size in height and width dimensions + */ + @Builder.Default + protected int[] kernelSize = new int[] {1, 1}; // Same as filter size from the last conv layer + /** + * Stride + * + * @param stride stride in height and width dimensions + */ + @Builder.Default + protected int[] stride = new int[] {2, 2}; // Default is 2. Down-sample by a factor of 2 + /** + * Padding + * + * @param padding padding in the height and width dimensions + */ + @Builder.Default protected int[] padding = new int[] {0, 0}; + protected int pnorm; + @Builder.Default protected double eps = 1e-8; + /** + * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper + * implementation be allowed? If set to false, an exception in the helper will be propagated back + * to the user. If true, the built-in (non-MKL/CuDNN) implementation for SubsamplingLayer will be + * used + * + * @param allowFallback Whether fallback to non-CuDNN implementation should be used + */ + @Builder.Default protected boolean cudnnAllowFallback = true; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + protected @Builder.Default CNN2DFormat dataFormat = + CNN2DFormat.NCHW; // default value for legacy reasons + /** + * When doing average pooling, should the padding values be included in the divisor or not?
+ * Not applicable for max and p-norm pooling.
+ * Users should not usually set this - instead, leave it as the default (false). It is included + * mainly for backward compatibility of older models
+ * Consider the following 2x2 segment along the right side of the image:
+ * + *

+   * [A, P]
+   * [B, P]
+   * 
+ * + * Where A and B are actual values, and P is padding (0).
+ * With avgPoolIncludePadInDivisor = true, we have: out = (A+B+0+0)/4
+ * With avgPoolIncludePadInDivisor = false, we have: out = (A+B+0+0)/2
+ *
+ * Earlier versions of DL4J originally included padding in the count, newer versions exclude it. + *
+ * + * @param avgPoolIncludePadInDivisor Whether the divisor should include or exclude padding for + * average pooling + */ + @Builder.Default protected boolean avgPoolIncludePadInDivisor = true; + /** + * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated + * convolutions, which are also known as atrous convolutions.
+ * NOTE: Kernel dilation is less common in practice for subsampling layers, compared to + * convolutional layers. + * + *

For more details, see: Yu and Koltun (2014) + * and Chen et al. (2014), as well as + * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions + *
+ * Dilation for kernel + */ + private @Builder.Default int[] dilation = new int[] {1, 1}; + @JsonIgnore @EqualsAndHashCode.Exclude + private @Builder.Default boolean defaultValueOverridden = false; - /* - Default here for JSON deserialization of 1.0.0-beta4 and earlier models. New models default to false via builder. - This impacts average pooling only - whether the divisor should include or exclude padding along image edges. - DL4J originally included padding in the count, versions after 1.0.0-beta4 will exclude it by default. - */ - protected boolean avgPoolIncludePadInDivisor = true; + public static SubsamplingLayerBuilder builder() { + return innerBuilder(); + } - public enum PoolingType { - MAX, AVG, SUM, PNORM; + public static SubsamplingLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { + return innerBuilder() + .poolingType(poolingType); + } - public org.deeplearning4j.nn.conf.layers.PoolingType toPoolingType() { - switch (this) { - case MAX: - return org.deeplearning4j.nn.conf.layers.PoolingType.MAX; - case AVG: - return org.deeplearning4j.nn.conf.layers.PoolingType.AVG; - case SUM: - return org.deeplearning4j.nn.conf.layers.PoolingType.SUM; - case PNORM: - return org.deeplearning4j.nn.conf.layers.PoolingType.PNORM; - } - throw new UnsupportedOperationException("Unknown/not supported pooling type: " + this); - } + public static SubsamplingLayerBuilder builder(PoolingType poolingType) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()); + } + + public static SubsamplingLayerBuilder builder(int... kernelSize) { + return innerBuilder() + .kernelSize(kernelSize); + } + + public static SubsamplingLayerBuilder builder(int[] kernelSize, int[] stride) { + return innerBuilder() + .kernelSize(kernelSize) + .stride(stride); + } + + public static SubsamplingLayerBuilder builder(int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .kernelSize(kernelSize) + .stride(stride) + .padding(padding); + } + + public static SubsamplingLayerBuilder builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + ; + } + + public static SubsamplingLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .poolingType(poolingType) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + ; + } + + public static SubsamplingLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) { + return innerBuilder() + .poolingType(poolingType) + .kernelSize(kernelSize) + ; + } + + public static SubsamplingLayerBuilder builder(PoolingType poolingType, int[] kernelSize) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()) + .kernelSize(kernelSize) + ; + } + + + public static SubsamplingLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride) { + return innerBuilder() + .poolingType(poolingType) + .kernelSize(kernelSize) + .stride(stride) + ; + } + + public static SubsamplingLayerBuilder builder(PoolingType poolingType, int[] kernelSize, int[] stride) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()) + .kernelSize(kernelSize) + .stride(stride) + ; + } + + @Override + public SubsamplingLayer clone() { + SubsamplingLayer clone = (SubsamplingLayer) super.clone(); + + if (clone.kernelSize != null) { + clone.kernelSize = clone.kernelSize.clone(); + } + if (clone.stride != null) { + clone.stride = clone.stride.clone(); + } + if (clone.padding != null) { + clone.padding = clone.padding.clone(); + } + if (clone.dilation != null) { + clone.dilation = clone.dilation.clone(); } - protected SubsamplingLayer(BaseSubsamplingBuilder builder) { - super(builder); - this.poolingType = builder.poolingType; - if (builder.kernelSize.length != 2) { - throw new IllegalArgumentException("Kernel size of should be rows x columns (a 2d array)"); - } - this.kernelSize = builder.kernelSize; - if (builder.stride.length != 2) { - throw new IllegalArgumentException("Invalid stride, must be length 2"); - } - this.stride = builder.stride; - this.padding = builder.padding; - this.convolutionMode = builder.convolutionMode; - if (builder instanceof Builder) { - this.dilation = ((Builder) builder).dilation; - this.cnn2dDataFormat = ((Builder) builder).dataFormat; - } - this.pnorm = builder.pnorm; - this.eps = builder.eps; - this.cudnnAllowFallback = builder.cudnnAllowFallback; - this.avgPoolIncludePadInDivisor = builder.avgPoolIncludePadInDivisor; + return clone; + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + + org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer ret = + new org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer( + lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalStateException( + "Invalid input for Subsampling layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); } - @Override - public SubsamplingLayer clone() { - SubsamplingLayer clone = (SubsamplingLayer) super.clone(); + return InputTypeUtil.getOutputTypeCnnLayers( + inputType, + kernelSize, + stride, + padding, + dilation, + convolutionMode, + ((InputType.InputTypeConvolutional) inputType).getChannels(), + layerIndex, + getName(), + dataFormat, + SubsamplingLayer.class); + } - if (clone.kernelSize != null) { - clone.kernelSize = clone.kernelSize.clone(); - } - if (clone.stride != null) { - clone.stride = clone.stride.clone(); - } - if (clone.padding != null) { - clone.padding = clone.padding.clone(); - } - if (clone.dilation != null) { - clone.dilation = clone.dilation.clone(); - } + @Override + public void setNIn(InputType inputType, boolean override) { + // No op: subsampling layer doesn't have nIn value + if (!defaultValueOverridden || override) { + this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + defaultValueOverridden = true; + } + } - return clone; + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for Subsampling layer (layer name=\"" + getName() + "\"): input is null"); } - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - runInheritance(); + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); + } - org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer ret = - new org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException("SubsamplingLayer does not contain parameters"); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + InputType.InputTypeConvolutional outputType = + (InputType.InputTypeConvolutional) getOutputType(-1, inputType); + val actElementsPerEx = outputType.arrayElementsPerExample(); + + // TODO Subsampling helper memory use... (CuDNN etc) + + // During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col + // is working mem + val im2colSizePerEx = + c.getChannels() + * outputType.getHeight() + * outputType.getWidth() + * kernelSize[0] + * kernelSize[1]; + + // Current implementation does NOT cache im2col etc... which means: it's recalculated on each + // backward pass + long trainingWorkingSizePerEx = im2colSizePerEx; + if (getDropOut() != null) { + // Dup on the input before dropout, but only for training + trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); } - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); + return new LayerMemoryReport.Builder(name, SubsamplingLayer.class, inputType, outputType) + .standardMemory(0, 0) // No params + .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + public int getPnorm() { + return pnorm; + } + + public double getEps() { + return eps; + } + + public enum PoolingType { + MAX, + AVG, + SUM, + PNORM; + + public org.deeplearning4j.nn.conf.layers.PoolingType toPoolingType() { + switch (this) { + case MAX: + return org.deeplearning4j.nn.conf.layers.PoolingType.MAX; + case AVG: + return org.deeplearning4j.nn.conf.layers.PoolingType.AVG; + case SUM: + return org.deeplearning4j.nn.conf.layers.PoolingType.SUM; + case PNORM: + return org.deeplearning4j.nn.conf.layers.PoolingType.PNORM; + } + throw new UnsupportedOperationException("Unknown/not supported pooling type: " + this); + } + } + + public static abstract class SubsamplingLayerBuilder< + C extends SubsamplingLayer, B extends SubsamplingLayerBuilder> + extends NoParamLayerBuilder { + + public B pnorm(int pnorm) { + ValidationUtils.validateNonNegative(pnorm, "pnorm"); + this.pnorm = pnorm; + return self(); } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Subsampling layer (layer name=\"" + getLayerName() - + "\"): Expected CNN input, got " + inputType); - } - - return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode, - ((InputType.InputTypeConvolutional) inputType).getChannels(), layerIndex, getLayerName(), - cnn2dDataFormat, SubsamplingLayer.class); + public B eps(int eps) { + ValidationUtils.validateNonNegative(eps, "eps"); + this.eps$value = eps; + this.eps$set = true; + return self(); } - @Override - public void setNIn(InputType inputType, boolean override) { - //No op: subsampling layer doesn't have nIn value - if(!defaultValueOverridden || override) { - this.cnn2dDataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); - defaultValueOverridden = true; - } + public B kernelSize(int... kernelSize) { + this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"); + this.kernelSize$set = true; + return self(); } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input for Subsampling layer (layer name=\"" + getLayerName() - + "\"): input is null"); - } - - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + public B stride(int... stride) { + this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride"); + this.stride$set = true; + return self(); } - @Override - public boolean isPretrainParam(String paramName) { - throw new UnsupportedOperationException("SubsamplingLayer does not contain parameters"); + public B padding(int... padding) { + this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding"); + this.padding$set = true; + return self(); } - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType); - val actElementsPerEx = outputType.arrayElementsPerExample(); - - //TODO Subsampling helper memory use... (CuDNN etc) - - //During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem - val im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() * kernelSize[0] - * kernelSize[1]; - - //Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass - long trainingWorkingSizePerEx = im2colSizePerEx; - if (getIDropout() != null) { - //Dup on the input before dropout, but only for training - trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); - } - - return new LayerMemoryReport.Builder(layerName, SubsamplingLayer.class, inputType, outputType) - .standardMemory(0, 0) //No params - .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); + public B dilation(int... dilation) { + this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); + this.dilation$set = true; + return self(); } - public int getPnorm() { - return pnorm; + public C build() { + if (kernelSize$value.length != 2) { + throw new IllegalArgumentException("Kernel size of should be rows x columns (a 2d array)"); + } + + if (stride$value.length != 2) { + throw new IllegalArgumentException("Invalid stride, must be length 2"); + } + if (poolingType$value == org.deeplearning4j.nn.conf.layers.PoolingType.PNORM && pnorm <= 0) { + throw new IllegalStateException( + "Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM"); + } + ConvolutionUtils.validateConvolutionModePadding(convolutionMode$value, padding$value); + ConvolutionUtils.validateCnnKernelStridePadding( + kernelSize$value, stride$value, padding$value); + + C l = initBuild(); + return l; } - public double getEps() { - return eps; + public B setConvolutionMode(ConvolutionMode convolutionMode){ + Preconditions.checkState(allowCausal$value || convolutionMode$value != ConvolutionMode.Causal, "Causal convolution mode can only be used with 1D" + + " convolutional neural network layers"); + this.convolutionMode$value = convolutionMode; + this.convolutionMode$set = true; + return self(); } - @NoArgsConstructor - @Getter - @Setter - public static class Builder extends BaseSubsamplingBuilder { - - /** - * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated convolutions, - * which are also known as atrous convolutions.
NOTE: Kernel dilation is less common in practice for - * subsampling layers, compared to convolutional layers. - * - * For more details, see: - * Yu and Koltun (2014) and - * Chen et al. (2014), as well as - * - * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
- * - * Dilation for kernel - */ - private int[] dilation = new int[] {1, 1}; - protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; - - public Builder(PoolingType poolingType, int[] kernelSize, int[] stride) { - super(poolingType, kernelSize, stride); - } - - public Builder(PoolingType poolingType, int[] kernelSize) { - super(poolingType, kernelSize); - } - - public Builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { - super(poolingType, kernelSize, stride, padding); - } - - public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) { - super(poolingType, kernelSize); - } - - public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, - int[] padding) { - super(poolingType, kernelSize, stride, padding); - } - - public Builder(int[] kernelSize, int[] stride, int[] padding) { - super(kernelSize, stride, padding); - } - - public Builder(int[] kernelSize, int[] stride) { - super(kernelSize, stride); - } - - public Builder(int... kernelSize) { - super(kernelSize); - } - - public Builder(PoolingType poolingType) { - super(poolingType); - } - - public Builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { - super(poolingType); - } - - @Override - protected boolean allowCausal() { - //Only conv1d/subsampling1d can use causal mode - return false; - } - - /** - * Kernel size - * - * @param kernelSize kernel size in height and width dimensions - */ - public Builder kernelSize(int... kernelSize) { - this.setKernelSize(kernelSize); - return this; - } - - /** - * Stride - * - * @param stride stride in height and width dimensions - */ - public Builder stride(int... stride) { - this.setStride(stride); - return this; - } - - /** - * Padding - * - * @param padding padding in the height and width dimensions - */ - public Builder padding(int... padding) { - this.setPadding(padding); - return this; - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param format Format for activations (in and out) - */ - public Builder dataFormat(CNN2DFormat format){ - this.dataFormat = format; - return this; - } - - /** - * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated convolutions, - * which are also known as atrous convolutions.
NOTE: Kernel dilation is less common in practice for - * subsampling layers, compared to convolutional layers. - * - * For more details, see: - * Yu and Koltun (2014) and - * Chen et al. (2014), as well as - * - * http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
- * - * @param dilation Dilation for kernel - */ - public Builder dilation(int... dilation) { - this.setDilation(dilation); - return this; - } - - - @Override - @SuppressWarnings("unchecked") - public SubsamplingLayer build() { - if (poolingType == org.deeplearning4j.nn.conf.layers.PoolingType.PNORM && pnorm <= 0) { - throw new IllegalStateException( - "Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM"); - } - ConvolutionUtils.validateConvolutionModePadding(convolutionMode, padding); - ConvolutionUtils.validateCnnKernelStridePadding(kernelSize, stride, padding); - - return new SubsamplingLayer(this); - } - - @Override - public void setKernelSize(int... kernelSize) { - this.kernelSize = ValidationUtils.validate2NonNegative(kernelSize,false, "kernelSize"); - } - - @Override - public void setStride(int... stride) { - this.stride = ValidationUtils.validate2NonNegative(stride, false, "stride"); - } - - @Override - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate2NonNegative(padding,false, "padding"); - } - - - public void setDilation(int[] dilation) { - this.dilation = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); - } - - public void setDataFormat(CNN2DFormat format){ - this.dataFormat = format; - } + public B helperAllowFallback(boolean b) { + this.cudnnAllowFallback$value = b; + this.cudnnAllowFallback$set = true; + return self(); } - - @NoArgsConstructor - @Getter - @Setter - protected static abstract class BaseSubsamplingBuilder> - extends LayerConfiguration.Builder { - - protected org.deeplearning4j.nn.conf.layers.PoolingType poolingType = - org.deeplearning4j.nn.conf.layers.PoolingType.MAX; - - protected int[] kernelSize = new int[] {1, 1}; // Same as filter size from the last conv layer - protected int[] stride = new int[] {2, 2}; // Default is 2. Down-sample by a factor of 2 - protected int[] padding = new int[] {0, 0}; - - /** - * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more details - * - * Convolution mode for layer - */ - protected ConvolutionMode convolutionMode = null; - protected int pnorm; - protected double eps = 1e-8; - - /** - * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed? - * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in - * (non-CuDNN) implementation for ConvolutionLayer will be used - * - * Whether fallback to non-CuDNN implementation should be used - */ - protected boolean cudnnAllowFallback = true; - protected boolean avgPoolIncludePadInDivisor = false; - - protected BaseSubsamplingBuilder(PoolingType poolingType, int[] kernelSize, int[] stride) { - this.setPoolingType(poolingType.toPoolingType()); - this.setKernelSize(kernelSize); - this.setStride(stride); - } - - protected BaseSubsamplingBuilder(PoolingType poolingType, int[] kernelSize) { - this.setPoolingType(poolingType.toPoolingType()); - this.setKernelSize(kernelSize); - } - - protected BaseSubsamplingBuilder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { - this.setPoolingType(poolingType.toPoolingType()); - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - } - - protected BaseSubsamplingBuilder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) { - this.setPoolingType(poolingType); - this.setKernelSize(kernelSize); - } - - protected BaseSubsamplingBuilder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, - int[] stride, int[] padding) { - this.setPoolingType(poolingType); - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - } - - protected BaseSubsamplingBuilder(int[] kernelSize, int[] stride, int[] padding) { - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - } - - protected BaseSubsamplingBuilder(int[] kernelSize, int[] stride) { - this.setKernelSize(kernelSize); - this.setStride(stride); - } - - protected BaseSubsamplingBuilder(int... kernelSize) { - this.setKernelSize(kernelSize); - } - - protected BaseSubsamplingBuilder(PoolingType poolingType) { - this.setPoolingType(poolingType.toPoolingType()); - } - - protected BaseSubsamplingBuilder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { - this.setPoolingType(poolingType); - } - - public void setPnorm(int pnorm){ - ValidationUtils.validateNonNegative(pnorm, "pnorm"); - this.pnorm = pnorm; - } - - public void setEps(double eps){ - ValidationUtils.validateNonNegative(eps, "eps"); - this.eps = eps; - } - - protected abstract boolean allowCausal(); - - public void setConvolutionMode(ConvolutionMode convolutionMode){ - Preconditions.checkState(allowCausal() || convolutionMode != ConvolutionMode.Causal, "Causal convolution mode can only be used with 1D" + - " convolutional neural network layers"); - this.convolutionMode = convolutionMode; - } - - /** - * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more details - * - * @param convolutionMode Convolution mode for layer - */ - public T convolutionMode(ConvolutionMode convolutionMode) { - this.setConvolutionMode(convolutionMode); - return (T) this; - } - - public T poolingType(PoolingType poolingType) { - this.setPoolingType(poolingType.toPoolingType()); - return (T) this; - } - - public T poolingType(org.deeplearning4j.nn.conf.layers.PoolingType poolingType){ - this.setPoolingType(poolingType); - return (T) this; - } - - public T pnorm(int pnorm) { - this.setPnorm(pnorm); - return (T) this; - } - - public T eps(double eps) { - this.setEps(eps); - return (T) this; - } - - /** - * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed? - * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in - * (non-MKL/CuDNN) implementation for ConvolutionLayer will be used - * - * @deprecated Use {@link #helperAllowFallback(boolean)} - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - @Deprecated - public T cudnnAllowFallback(boolean allowFallback) { - this.cudnnAllowFallback = allowFallback; - return (T) this; - } - - /** - * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed? - * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in - * (non-MKL/CuDNN) implementation for SubsamplingLayer will be used - * - * @param allowFallback Whether fallback to non-CuDNN implementation should be used - */ - public T helperAllowFallback(boolean allowFallback) { - this.cudnnAllowFallback = allowFallback; - return (T) this; - } - - /** - * When doing average pooling, should the padding values be included in the divisor or not?
- * Not applicable for max and p-norm pooling.
- * Users should not usually set this - instead, leave it as the default (false). It is included mainly for backward - * compatibility of older models
- * Consider the following 2x2 segment along the right side of the image:
- *

-         * [A, P]
-         * [B, P]
-         * 
- * Where A and B are actual values, and P is padding (0).
- * With avgPoolIncludePadInDivisor = true, we have: out = (A+B+0+0)/4
- * With avgPoolIncludePadInDivisor = false, we have: out = (A+B+0+0)/2
- *
- * Earlier versions of DL4J originally included padding in the count, newer versions exclude it.
- * - * @param avgPoolIncludePadInDivisor Whether the divisor should include or exclude padding for average pooling - */ - public T avgPoolIncludePadInDivisor(boolean avgPoolIncludePadInDivisor){ - this.avgPoolIncludePadInDivisor = avgPoolIncludePadInDivisor; - return (T) this; - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java index a2d3c4fb8..cf83a6164 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java @@ -25,6 +25,7 @@ import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.ToString; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -40,24 +41,36 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class Upsampling1D extends BaseUpsamplingLayer { - + /** + * Upsampling size int array with a single element. Array must be length 1 + * + * @param size upsampling size in single spatial dimension of this 1D layer + */ protected int[] size; - protected Upsampling1D(UpsamplingBuilder builder) { - super(builder); - this.size = builder.size; + public static Upsampling1DBuilder builder() { + return innerBuilder(); + } + public static Upsampling1DBuilder builder(int size) { + return innerBuilder() + .size(size); } + public static Upsampling1DBuilder builder(int[] size) { + return innerBuilder() + .size(size); + } @Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D ret = new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling1D(lconf, networkDataType); @@ -80,7 +93,7 @@ public class Upsampling1D extends BaseUpsamplingLayer { public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.RNN) { throw new IllegalStateException("Invalid input for 1D Upsampling layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect RNN input type with size > 0. Got: " + + ", layer name = \"" + getName() + "\"): expect RNN input type with size > 0. Got: " + inputType); } InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType; @@ -94,10 +107,10 @@ public class Upsampling1D extends BaseUpsamplingLayer { @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType == null) { - throw new IllegalStateException("Invalid input for Upsampling layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for Upsampling layer (layer name=\"" + getName() + "\"): input is null"); } - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); } @Override @@ -107,67 +120,42 @@ public class Upsampling1D extends BaseUpsamplingLayer { long im2colSizePerEx = recurrent.getSize() * outputType.getTimeSeriesLength() * size[0]; long trainingWorkingSizePerEx = im2colSizePerEx; - if (getIDropout() != null) { + if (getDropOut() != null) { trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); } - return new LayerMemoryReport.Builder(layerName, Upsampling1D.class, inputType, outputType).standardMemory(0, 0) //No params + return new LayerMemoryReport.Builder(name, Upsampling1D.class, inputType, outputType).standardMemory(0, 0) //No params .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); } - @NoArgsConstructor - public static class Builder extends UpsamplingBuilder { + public static abstract class Upsampling1DBuilder< + C extends Upsampling1D, B extends Upsampling1DBuilder> + extends BaseUpsamplingLayerBuilder { - public Builder(int size) { - super(new int[] {size, size}); + public B size(int size) { + size(size, size); + return self(); + } + public B size(int... size) { + if (size.length == 2) { + if (size[0] == size[1]) { + size(size[0]); + return self(); + } else { + Preconditions.checkArgument( + false, + "When given a length 2 array for size, " + + "the values must be equal. Got: " + + Arrays.toString(size)); } + } - /** - * Upsampling size - * - * @param size upsampling size in single spatial dimension of this 1D layer - */ - public Builder size(int size) { - - this.setSize(size); - return this; - } - - /** - * Upsampling size int array with a single element. Array must be length 1 - * - * @param size upsampling size in single spatial dimension of this 1D layer - */ - public Builder size(int[] size) { - this.setSize(size); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public Upsampling1D build() { - return new Upsampling1D(this); - } - - @Override - public void setSize(int... size) { - - if(size.length == 2){ - if(size[0] == size[1]) { - setSize(size[0]); - return; - } else { - Preconditions.checkArgument(false, - "When given a length 2 array for size, " - + "the values must be equal. Got: " + Arrays.toString(size)); - } - } - - int[] temp = ValidationUtils.validate1NonNegative(size, "size"); - this.size = new int[]{temp[0], temp[0]}; - } + int[] temp = ValidationUtils.validate1NonNegative(size, "size"); + size(new int[] {temp[0], temp[0]}); + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java index 48e86c848..febaa0fb1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -38,19 +39,23 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class Upsampling2D extends BaseUpsamplingLayer { @JsonDeserialize(using = LegacyIntArrayDeserializer.class) protected int[] size; - protected CNN2DFormat format = CNN2DFormat.NCHW; + @lombok.Builder.Default + protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; - protected Upsampling2D(UpsamplingBuilder builder) { - super(builder); - this.size = builder.size; - this.format = ((Builder)builder).format; + public static Upsampling2DBuilder builder() { + return innerBuilder(); + } + + public static Upsampling2DBuilder builder(int size) { + return innerBuilder() + .size(size, size); } @Override @@ -64,6 +69,7 @@ public class Upsampling2D extends BaseUpsamplingLayer { Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling2D ret = new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling2D(lconf, networkDataType); @@ -79,7 +85,7 @@ public class Upsampling2D extends BaseUpsamplingLayer { @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Upsampling 2D layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for Upsampling 2D layer (layer name=\"" + getName() + "\"): Expected CNN input, got " + inputType); } InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType; @@ -93,10 +99,10 @@ public class Upsampling2D extends BaseUpsamplingLayer { @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType == null) { - throw new IllegalStateException("Invalid input for Upsampling 2D layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for Upsampling 2D layer (layer name=\"" + getName() + "\"): input is null"); } - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); } @Override @@ -110,12 +116,12 @@ public class Upsampling2D extends BaseUpsamplingLayer { // Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass long trainingWorkingSizePerEx = im2colSizePerEx; - if (getIDropout() != null) { + if (getDropOut() != null) { //Dup on the input before dropout, but only for training trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); } - return new LayerMemoryReport.Builder(layerName, Upsampling2D.class, inputType, outputType).standardMemory(0, 0) //No params + return new LayerMemoryReport.Builder(name, Upsampling2D.class, inputType, outputType).standardMemory(0, 0) //No params .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); @@ -124,64 +130,29 @@ public class Upsampling2D extends BaseUpsamplingLayer { @Override public void setNIn(InputType inputType, boolean override) { if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Upsampling 2D layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for Upsampling 2D layer (layer name=\"" + getName() + "\"): Expected CNN input, got " + inputType); } - this.format = ((InputType.InputTypeConvolutional)inputType).getFormat(); + this.dataFormat = ((InputType.InputTypeConvolutional)inputType).getFormat(); } - @NoArgsConstructor - public static class Builder extends UpsamplingBuilder { - - protected CNN2DFormat format = CNN2DFormat.NCHW; - - public Builder(int size) { - super(new int[] {size, size}); - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param format Format for activations (in and out) - */ - public Builder dataFormat(CNN2DFormat format){ - this.format = format; - return this; - } - - /** - * Upsampling size int, used for both height and width - * - * @param size upsampling size in height and width dimensions - */ - public Builder size(int size) { - - this.setSize(size, size); - return this; - } + public static abstract class Upsampling2DBuilder> extends + BaseUpsamplingLayerBuilder { - /** - * Upsampling size array - * - * @param size upsampling size in height and width dimensions - */ - public Builder size(int[] size) { - this.setSize(size); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public Upsampling2D build() { - return new Upsampling2D(this); - } - - @Override - public void setSize(int... size) { - this.size = ValidationUtils.validate2NonNegative(size, false, "size"); - } - } + /** + * Upsampling size int, used for both height and width + * + * @param size upsampling size in height and width dimensions + */ + public B size(int size) { + this.size(size, size); + return self(); + } + public B size(int... size) { + this.size = ValidationUtils.validate2NonNegative(size, false, "size"); + return self(); + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java index 4d629e2fd..4aba279ae 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -36,21 +37,46 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class Upsampling3D extends BaseUpsamplingLayer { - protected int[] size; + /** + * Sets the DataFormat. See {@link Convolution3D.DataFormat} for more details + */ + @lombok.Builder.Default protected Convolution3D.DataFormat dataFormat = Convolution3D.DataFormat.NCDHW; //Default to NCDHW for 1.0.0-beta4 and earlier, when no config existed (NCDHW only) - - - protected Upsampling3D(Builder builder) { - super(builder); - this.size = builder.size; - this.dataFormat = builder.dataFormat; + private int[] size; + public static Upsampling3DBuilder builder() { + return innerBuilder(); } + public static Upsampling3DBuilder builder(int[] size) { + return innerBuilder() + .size(size); + } + + /** + * Data format - see {@link Convolution3D.DataFormat} for more details + * @param size Upsampling layer size (most common value: 2) + */ + public static Upsampling3DBuilder builder(Convolution3D.DataFormat format, int[] size) { + return innerBuilder() + .dataFormat(format) + .size(size); + } + public static Upsampling3DBuilder builder(Convolution3D.DataFormat format, int size) { + return innerBuilder() + .dataFormat(format) + .size(size, size, size); + } + + public static Upsampling3DBuilder builder(int size) { + return innerBuilder() + .size(size, size, size); + } + @Override public Upsampling3D clone() { @@ -62,6 +88,7 @@ public class Upsampling3D extends BaseUpsamplingLayer { Collection iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling3D ret = new org.deeplearning4j.nn.layers.convolution.upsampling.Upsampling3D(lconf, networkDataType); @@ -79,7 +106,7 @@ public class Upsampling3D extends BaseUpsamplingLayer { @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { - throw new IllegalStateException("Invalid input for Upsampling 3D layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for Upsampling 3D layer (layer name=\"" + getName() + "\"): Expected CNN3D input, got " + inputType); } InputType.InputTypeConvolutional3D i = (InputType.InputTypeConvolutional3D) inputType; @@ -95,10 +122,10 @@ public class Upsampling3D extends BaseUpsamplingLayer { @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType == null) { - throw new IllegalStateException("Invalid input for Upsampling 3D layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for Upsampling 3D layer (layer name=\"" + getName() + "\"): input is null"); } - return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getName()); } @Override @@ -113,79 +140,32 @@ public class Upsampling3D extends BaseUpsamplingLayer { // Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass long trainingWorkingSizePerEx = im2colSizePerEx; - if (getIDropout() != null) { + if (getDropOut() != null) { //Dup on the input before dropout, but only for training trainingWorkingSizePerEx += inputType.arrayElementsPerExample(); } - return new LayerMemoryReport.Builder(layerName, Upsampling3D.class, inputType, outputType).standardMemory(0, 0) //No params + return new LayerMemoryReport.Builder(name, Upsampling3D.class, inputType, outputType).standardMemory(0, 0) //No params .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx) .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); } + public static abstract class Upsampling3DBuilder> + extends BaseUpsamplingLayerBuilder { - @NoArgsConstructor - public static class Builder extends UpsamplingBuilder { - - protected Convolution3D.DataFormat dataFormat = Convolution3D.DataFormat.NCDHW; - - /** - * @param size Upsampling layer size (most common value: 2) - */ - public Builder(int size) { - super(new int[] {size, size, size}); - } - - /** - * @param dataFormat Data format - see {@link Convolution3D.DataFormat} for more details - * @param size Upsampling layer size (most common value: 2) - */ - public Builder(@NonNull Convolution3D.DataFormat dataFormat, int size){ - super(new int[]{size, size, size}); - this.dataFormat = dataFormat; - } - - /** - * Sets the DataFormat. See {@link Convolution3D.DataFormat} for more details - */ - public Builder dataFormat(@NonNull Convolution3D.DataFormat dataFormat){ - this.dataFormat = dataFormat; - return this; - } - - /** - * Upsampling size as int, so same upsampling size is used for depth, width and height - * - * @param size upsampling size in height, width and depth dimensions - */ - public Builder size(int size) { - - this.setSize(size, size, size); - return this; - } - - /** - * Upsampling size as int, so same upsampling size is used for depth, width and height - * - * @param size upsampling size in height, width and depth dimensions - */ - public Builder size(int[] size) { + public B size(int... size) { Preconditions.checkArgument(size.length == 3); - this.setSize(size); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public Upsampling3D build() { - return new Upsampling3D(this); - } - - @Override - public void setSize(int... size) { this.size = ValidationUtils.validate3NonNegative(size, "size"); + return self(); } + + public B size(int size) { + this.size(size, size, size); + return self(); + } + + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java index 43f6e4ed1..a641a16f7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -34,147 +37,113 @@ import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Arrays; -import java.util.Collection; -import java.util.Map; - @Data -@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class ZeroPadding1DLayer extends NoParamLayer { + /** Padding value for left and right. Must be length 2 array */ + @Builder.Default private int[] padding = new int[] {0, 0}; // Padding: left, right - private int[] padding; // [padLeft, padRight] + /** + * @param padding Padding for both the left and right + */ + public static ZeroPadding1DLayerBuilder builder(int padding) { + return innerBuilder().padding(padding, padding); + } - private ZeroPadding1DLayer(Builder builder) { - super(builder); - this.padding = builder.padding; + public static ZeroPadding1DLayerBuilder builder() { + return innerBuilder(); + } + /** + * @param paddingLeft Padding value for left + * @param paddingRight Padding value for right + */ + public static ZeroPadding1DLayerBuilder builder(int paddingLeft, int paddingRight) { + return innerBuilder().padding(paddingLeft, paddingRight); + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.convolution.ZeroPadding1DLayer ret = + new org.deeplearning4j.nn.layers.convolution.ZeroPadding1DLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for 1D CNN layer (layer index = " + + layerIndex + + ", layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); + } + InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType; + return InputType.recurrent( + recurrent.getSize(), recurrent.getTimeSeriesLength() + padding[0] + padding[1]); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + // No op + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for ZeroPadding1DLayer layer (layer name=\"" + + getName() + + "\"): input is null"); } - public ZeroPadding1DLayer(int padding) { - this(new Builder(padding)); - } - - public ZeroPadding1DLayer(int padLeft, int padRight) { - this(new Builder(padLeft, padRight)); - } - - public ZeroPadding1DLayer(int[] padding) { - this(new Builder(padding)); - } - - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - - org.deeplearning4j.nn.layers.convolution.ZeroPadding1DLayer ret = - new org.deeplearning4j.nn.layers.convolution.ZeroPadding1DLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for 1D CNN layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect RNN input type with size > 0. Got: " - + inputType); - } - InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType; - return InputType.recurrent(recurrent.getSize(), recurrent.getTimeSeriesLength() + padding[0] + padding[1]); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - //No op - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input for ZeroPadding1DLayer layer (layer name=\"" + getLayerName() - + "\"): input is null"); - } - - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getLayerName()); - } - - @Override - public boolean isPretrainParam(String paramName) { - throw new UnsupportedOperationException("ZeroPaddingLayer does not contain parameters"); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - InputType outputType = getOutputType(-1, inputType); - - return new LayerMemoryReport.Builder(layerName, ZeroPaddingLayer.class, inputType, outputType) - .standardMemory(0, 0) //No params - .workingMemory(0, 0, MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); - } - - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - - /** - * Padding value for left and right. Must be length 2 array - */ - @Setter(AccessLevel.NONE) - private int[] padding = new int[] {0, 0}; //Padding: left, right - - /** - * @param padding Padding value for left and right. Must be length 1 or 2 array. - */ - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate2NonNegative(padding, false, "padding"); - } - - - /** - * @param padding Padding for both the left and right - */ - public Builder(int padding) { - this(padding, padding); - } - - /** - * @param padLeft Padding value for left - * @param padRight Padding value for right - */ - public Builder(int padLeft, int padRight) { - this(new int[] {padLeft, padRight}); - } - - /** - * @param padding Padding value for left and right. Must be length 1 or 2 array - */ - public Builder(@NonNull int... padding) { - this.setPadding(padding); - } - - @Override - @SuppressWarnings("unchecked") - public ZeroPadding1DLayer build() { - for (int p : padding) { - if (p < 0) { - throw new IllegalStateException("Invalid zero padding layer config: padding [left, right]" - + " must be > 0 for all elements. Got: " + Arrays.toString(padding)); - } - } - return new ZeroPadding1DLayer(this); - } + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); + } + + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException("ZeroPaddingLayer does not contain parameters"); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType outputType = getOutputType(-1, inputType); + + return new LayerMemoryReport.Builder(name, ZeroPaddingLayer.class, inputType, outputType) + .standardMemory(0, 0) // No params + .workingMemory(0, 0, MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + public static abstract class ZeroPadding1DLayerBuilder< + C extends ZeroPadding1DLayer, B extends ZeroPadding1DLayerBuilder> + extends NoParamLayerBuilder { + public B padding(int... padding) { + this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding"); + this.padding$set = true; + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java index cdabe2788..02701dbb8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -38,17 +39,69 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class ZeroPadding3DLayer extends NoParamLayer { + /** + * [padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW] + */ + @Builder.Default + private int[] padding = new int[] {0, 0, 0, 0, 0, 0};// [padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW] - private int[] padding; // [padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW] - - private ZeroPadding3DLayer(Builder builder) { - super(builder); - this.padding = builder.padding; + public static ZeroPadding3DLayerBuilder builder() { + return innerBuilder(); } + /** + * @param padding Padding for both the left and right in all three spatial dimensions + */ + public static ZeroPadding3DLayerBuilder builder(int padding) { + return innerBuilder() + .padding(padding, padding, padding, padding, padding, padding); + } + /** + * @param padding Padding for both the left and right in all three spatial dimensions + */ + public static ZeroPadding3DLayerBuilder builder(int[] padding) { + return innerBuilder() + .padding(padding); + } + /** + * Use same padding for left and right boundaries in depth, height and width. + * + * @param padDepth padding used for both depth boundaries + * @param padHeight padding used for both height boundaries + * @param padWidth padding used for both width boudaries + */ + public static ZeroPadding3DLayerBuilder builder(int padDepth, int padHeight, int padWidth) { + return innerBuilder() + .padding(padDepth, padDepth, padHeight, padHeight, padWidth, padWidth); + } + /** + * Explicit padding of left and right boundaries in depth, height and width dimensions + * + * @param padLeftD Depth padding left + * @param padRightD Depth padding right + * @param padLeftH Height padding left + * @param padRightH Height padding right + * @param padLeftW Width padding left + * @param padRightW Width padding right + */ + public static ZeroPadding3DLayerBuilder builder(int padLeftD, int padRightD, int padLeftH, int padRightH, int padLeftW, int padRightW) { + return innerBuilder() + .padding(padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW); + } +public static abstract class ZeroPadding3DLayerBuilder> extends + NoParamLayerBuilder { + /** + * [padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW] + */ + public B padding(int... padding) { + this.padding$value = ValidationUtils.validate6NonNegative(padding, "padding"); + this.padding$set = true; + return self(); + } +} @Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection iterationListeners, int layerIndex, INDArray layerParamsView, @@ -74,7 +127,7 @@ public class ZeroPadding3DLayer extends NoParamLayer { public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { throw new IllegalStateException("Invalid input for 3D CNN layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect CNN3D input type with size > 0. Got: " + + ", layer name = \"" + getName() + "\"): expect CNN3D input type with size > 0. Got: " + inputType); } InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; @@ -91,11 +144,11 @@ public class ZeroPadding3DLayer extends NoParamLayer { @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType == null) { - throw new IllegalStateException("Invalid input for ZeroPadding3DLayer layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for ZeroPadding3DLayer layer (layer name=\"" + getName() + "\"): input is null"); } - return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getName()); } @Override @@ -107,77 +160,12 @@ public class ZeroPadding3DLayer extends NoParamLayer { public LayerMemoryReport getMemoryReport(InputType inputType) { InputType outputType = getOutputType(-1, inputType); - return new LayerMemoryReport.Builder(layerName, ZeroPadding3DLayer.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, ZeroPadding3DLayer.class, inputType, outputType) .standardMemory(0, 0) //No params .workingMemory(0, 0, MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching .build(); } - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - /** - * [padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW] - */ - @Setter(AccessLevel.NONE) - private int[] padding = new int[] {0, 0, 0, 0, 0, 0}; - - /** - * [padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW] - */ - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate6NonNegative(padding, "padding"); - } - - /** - * @param padding Padding for both the left and right in all three spatial dimensions - */ - public Builder(int padding) { - this(padding, padding, padding, padding, padding, padding); - } - - - /** - * Use same padding for left and right boundaries in depth, height and width. - * - * @param padDepth padding used for both depth boundaries - * @param padHeight padding used for both height boundaries - * @param padWidth padding used for both width boudaries - */ - public Builder(int padDepth, int padHeight, int padWidth) { - this(padDepth, padDepth, padHeight, padHeight, padWidth, padWidth); - } - - /** - * Explicit padding of left and right boundaries in depth, height and width dimensions - * - * @param padLeftD Depth padding left - * @param padRightD Depth padding right - * @param padLeftH Height padding left - * @param padRightH Height padding right - * @param padLeftW Width padding left - * @param padRightW Width padding right - */ - public Builder(int padLeftD, int padRightD, int padLeftH, int padRightH, int padLeftW, int padRightW) { - this(new int[] {padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW}); - } - - public Builder(int[] padding) { - this.setPadding(padding); - } - - @Override - @SuppressWarnings("unchecked") - public ZeroPadding3DLayer build() { - for (int p : padding) { - if (p < 0) { - throw new IllegalStateException("Invalid zero padding layer config: padding [left, right]" - + " must be > 0 for all elements. Got: " + Arrays.toString(padding)); - } - } - return new ZeroPadding3DLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java index 4582f42c5..064a55123 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -39,39 +40,80 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder", buildMethodName = "initBuild") public class ZeroPaddingLayer extends NoParamLayer { + /** + * @param padding Padding value for top, bottom, left, and right. Must be length 4 array + */ + @Builder.Default + private int[] padding = new int[] {0, 0, 0, 0}; //Padding: top, bottom, left, right - private int[] padding; - private CNN2DFormat dataFormat = CNN2DFormat.NCHW; - - public ZeroPaddingLayer(int padTopBottom, int padLeftRight) { - this(new Builder(padTopBottom, padLeftRight)); + public static ZeroPaddingLayerBuilder builder() { + return innerBuilder(); } - public ZeroPaddingLayer(int padTop, int padBottom, int padLeft, int padRight) { - this(new Builder(padTop, padBottom, padLeft, padRight)); + /** + * @param padHeight Padding for both the top and bottom + * @param padWidth Padding for both the left and right + */ + public static ZeroPaddingLayerBuilder builder(int padHeight, int padWidth) { + return innerBuilder() + .padding(padHeight, padHeight, padWidth, padWidth); + } + /** + * @param padTop Top padding value + * @param padBottom Bottom padding value + * @param padLeft Left padding value + * @param padRight Right padding value + */ + public static ZeroPaddingLayerBuilder builder(int padTop, int padBottom, int padLeft, int padRight) { + return innerBuilder() + .padding(padTop, padBottom, padLeft, padRight); } - private ZeroPaddingLayer(Builder builder) { - super(builder); - if (builder.padding == null || builder.padding.length != 4) { - throw new IllegalArgumentException( - "Invalid padding values: must have exactly 4 values [top, bottom, left, right]." + " Got: " - + (builder.padding == null ? null : Arrays.toString(builder.padding))); + public static ZeroPaddingLayerBuilder builder(int[] padding) { + return innerBuilder() + .padding(padding); + } + + public static abstract class ZeroPaddingLayerBuilder> + extends NoParamLayerBuilder { + public C build() { + if (padding$value == null || padding$value.length != 4) { + throw new IllegalArgumentException( + "Invalid padding values: must have exactly 4 values [top, bottom, left, right]." + " Got: " + + (padding$value == null ? null : Arrays.toString(padding$value))); + } + + C l = initBuild(); + l.initializeConstraints(); + return l; } - this.padding = builder.padding; - this.dataFormat = builder.cnn2DFormat; + public B padding(int ... padding) { + this.padding$value = ValidationUtils.validate4NonNegative(padding, "padding"); + this.padding$set = true; + return self(); + } } + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * @param format Format for activations (in and out) + */ + @Builder.Default + private CNN2DFormat dataFormat = CNN2DFormat.NCHW; + + @Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - +runInheritance(); org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer ret = new org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer(lconf, networkDataType); ret.addTrainingListeners(trainingListeners); @@ -96,15 +138,15 @@ public class ZeroPaddingLayer extends NoParamLayer { @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { Preconditions.checkArgument(inputType != null, "Invalid input for ZeroPaddingLayer layer (layer name=\"" - + getLayerName() + "\"): InputType is null"); - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); + + getName() + "\"): InputType is null"); + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); } @Override public LayerMemoryReport getMemoryReport(InputType inputType) { InputType outputType = getOutputType(-1, inputType); - return new LayerMemoryReport.Builder(layerName, ZeroPaddingLayer.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, ZeroPaddingLayer.class, inputType, outputType) .standardMemory(0, 0) //No params //Inference and training is same - just output activations, no working memory in addition to that .workingMemory(0, 0, MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) @@ -118,76 +160,5 @@ public class ZeroPaddingLayer extends NoParamLayer { this.dataFormat = c.getFormat(); } - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - /** - * Padding value for top, bottom, left, and right. Must be length 4 array - */ - @Setter(AccessLevel.NONE) - private int[] padding = new int[] {0, 0, 0, 0}; //Padding: top, bottom, left, right - - private CNN2DFormat cnn2DFormat = CNN2DFormat.NCHW; - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param format Format for activations (in and out) - */ - public Builder dataFormat(CNN2DFormat format){ - this.cnn2DFormat = format; - return this; - } - - /** - * @param padding Padding value for top, bottom, left, and right. Must be length 4 array - */ - public void setPadding(int... padding) { - this.padding = ValidationUtils.validate4NonNegative(padding, "padding"); - } - - /** - * @param padHeight Padding for both the top and bottom - * @param padWidth Padding for both the left and right - */ - public Builder(int padHeight, int padWidth) { - this(padHeight, padHeight, padWidth, padWidth); - } - - /** - * @param padTop Top padding value - * @param padBottom Bottom padding value - * @param padLeft Left padding value - * @param padRight Right padding value - */ - public Builder(int padTop, int padBottom, int padLeft, int padRight) { - this(new int[] {padTop, padBottom, padLeft, padRight}); - } - - /** - * @param padding Must be a length 1 array with values [paddingAll], a length 2 array with values - * [padTopBottom, padLeftRight], or a length 4 array with - * values [padTop, padBottom, padLeft, padRight] - */ - public Builder(int[] padding) { - this.setPadding(padding); - } - - @Override - @SuppressWarnings("unchecked") - public ZeroPaddingLayer build() { - for (int p : padding) { - if (p < 0) { - throw new IllegalStateException( - "Invalid zero padding layer config: padding [top, bottom, left, right]" - + " must be > 0 for all elements. Got: " - + Arrays.toString(padding)); - } - } - - return new ZeroPaddingLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java index ef3cedabe..1e43e3031 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers.convolutional; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -35,128 +38,93 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - +/** Amount of cropping to apply to both the top and the bottom of the input activations */ @Data -@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class Cropping1D extends NoParamLayer { + /** + * Cropping amount for top/bottom (in that order). Must be length 1 or 2 array. + * + * @param cropping Cropping amount for top/bottom (in that order). Must be length 1 or 2 array. + */ + @Builder.Default private int[] cropping = new int[] {0, 0}; - private int[] cropping; + public static Cropping1DBuilder builder() { + return innerBuilder(); + } + /** + * @param cropTopBottom Amount of cropping to apply to both the top and the bottom of the input + * activations + */ + public static Cropping1DBuilder builder(int cropTopBottom) { + return innerBuilder().cropping(cropTopBottom, cropTopBottom); + } + /** + * @param cropTop Amount of cropping to apply to the top of the input activations + * @param cropBottom Amount of cropping to apply to the bottom of the input activations + */ + public static Cropping1DBuilder builder(int cropTop, int cropBottom) { + return innerBuilder().cropping(cropTop, cropBottom); + } - /** - * @param cropTopBottom Amount of cropping to apply to both the top and the bottom of the input activations - */ - public Cropping1D(int cropTopBottom) { - this(cropTopBottom, cropTopBottom); + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + Cropping1DLayer ret = new Cropping1DLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for 1D Cropping layer (layer index = " + + layerIndex + + ", layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); } + InputType.InputTypeRecurrent cnn1d = (InputType.InputTypeRecurrent) inputType; + val length = cnn1d.getTimeSeriesLength(); + val outLength = length - cropping[0] - cropping[1]; + return InputType.recurrent(cnn1d.getSize(), outLength); + } - /** - * @param cropTop Amount of cropping to apply to the top of the input activations - * @param cropBottom Amount of cropping to apply to the bottom of the input activations - */ - public Cropping1D(int cropTop, int cropBottom) { - this(new Builder(cropTop, cropBottom)); - } - - /** - * @param cropping Cropping as a length 2 array, with values {@code [cropTop, cropBottom]} - */ - public Cropping1D(int[] cropping) { - this(new Builder(cropping)); - } - - protected Cropping1D(Builder builder) { - super(builder); - this.cropping = builder.cropping; - } - - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - setNetConfiguration(conf); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - Cropping1DLayer ret = new Cropping1DLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for 1D Cropping layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect RNN input type with size > 0. Got: " - + inputType); - } - InputType.InputTypeRecurrent cnn1d = (InputType.InputTypeRecurrent) inputType; - val length = cnn1d.getTimeSeriesLength(); - val outLength = length - cropping[0] - cropping[1]; - return InputType.recurrent(cnn1d.getSize(), outLength); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - Preconditions.checkArgument(inputType != null, "Invalid input for Cropping1D layer (layer name=\"" - + getLayerName() + "\"): InputType is null"); - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - return null; - } - - - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - /** - * Cropping amount for top/bottom (in that order). Must be length 1 or 2 array. - */ - @Setter(AccessLevel.NONE) - private int[] cropping = new int[] {0, 0}; - - /** - * @param cropping Cropping amount for top/bottom (in that order). Must be length 1 or 2 array. - */ - public void setCropping(int... cropping) { - this.cropping = ValidationUtils.validate2NonNegative(cropping, true,"cropping"); - } - - public Builder() { - - } - - /** - * @param cropping Cropping amount for top/bottom (in that order). Must be length 1 or 2 array. - */ - public Builder(@NonNull int[] cropping) { - this.setCropping(cropping); - } - - /** - * @param cropTopBottom Amount of cropping to apply to both the top and the bottom of the input activations - */ - public Builder(int cropTopBottom) { - this(cropTopBottom, cropTopBottom); - } - - /** - * @param cropTop Amount of cropping to apply to the top of the input activations - * @param cropBottom Amount of cropping to apply to the bottom of the input activations - */ - public Builder(int cropTop, int cropBottom) { - this.setCropping(cropTop, cropBottom); - } - - public Cropping1D build() { - return new Cropping1D(this); - } + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + Preconditions.checkArgument( + inputType != null, + "Invalid input for Cropping1D layer (layer name=\"" + getName() + "\"): InputType is null"); + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return null; + } + + public static abstract class Cropping1DBuilder< + C extends Cropping1D, B extends Cropping1DBuilder> + extends NoParamLayerBuilder { + public B cropping(int... cropping) { + this.cropping$value = ValidationUtils.validate2NonNegative(cropping, true, "cropping"); + this.cropping$set = true; + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java index d73d33950..cf3f6bcb6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers.convolutional; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -37,161 +40,111 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data -@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class Cropping2D extends NoParamLayer { + /** Cropping amount for top/bottom/left/right (in that order). A length 4 array. */ + @Builder.Default private int[] cropping = new int[] {0, 0, 0, 0}; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default private CNN2DFormat dataFormat = CNN2DFormat.NCHW; - private int[] cropping; - private CNN2DFormat dataFormat = CNN2DFormat.NCHW; + public static Cropping2DBuilder builder() { + return innerBuilder(); + } - /** - * @param cropTopBottom Amount of cropping to apply to both the top and the bottom of the input activations - * @param cropLeftRight Amount of cropping to apply to both the left and the right of the input activations - */ - public Cropping2D(int cropTopBottom, int cropLeftRight) { - this(cropTopBottom, cropTopBottom, cropLeftRight, cropLeftRight); - } - - public Cropping2D(CNN2DFormat dataFormat, int cropTopBottom, int cropLeftRight) { - this(dataFormat, cropTopBottom, cropTopBottom, cropLeftRight, cropLeftRight); - } - - /** - * @param cropTop Amount of cropping to apply to the top of the input activations - * @param cropBottom Amount of cropping to apply to the bottom of the input activations - * @param cropLeft Amount of cropping to apply to the left of the input activations - * @param cropRight Amount of cropping to apply to the right of the input activations - */ - public Cropping2D(int cropTop, int cropBottom, int cropLeft, int cropRight) { - this(CNN2DFormat.NCHW, cropTop, cropBottom, cropLeft, cropRight); - } - - public Cropping2D(CNN2DFormat format, int cropTop, int cropBottom, int cropLeft, int cropRight) { - this(new Builder(cropTop, cropBottom, cropLeft, cropRight).dataFormat(format)); - } - - /** - * @param cropping Cropping as either a length 2 array, with values {@code [cropTopBottom, cropLeftRight]}, or as a - * length 4 array, with values {@code [cropTop, cropBottom, cropLeft, cropRight]} - */ - public Cropping2D(int[] cropping) { - this(new Builder(cropping)); - } - - protected Cropping2D(Builder builder) { - super(builder); - this.cropping = builder.cropping; - this.dataFormat = builder.cnn2DFormat; - } - - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - setNetConfiguration(conf); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - Cropping2DLayer ret = new Cropping2DLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - int[] hwd = ConvolutionUtils.getHWDFromInputType(inputType); - int outH = hwd[0] - cropping[0] - cropping[1]; - int outW = hwd[1] - cropping[2] - cropping[3]; - - InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional)inputType; - - return InputType.convolutional(outH, outW, hwd[2], c.getFormat()); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - Preconditions.checkArgument(inputType != null, "Invalid input for Cropping2D layer (layer name=\"" - + getLayerName() + "\"): InputType is null"); - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getLayerName()); - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - return null; - } - - @Override - public void setNIn(InputType inputType, boolean override) { - this.dataFormat = ((InputType.InputTypeConvolutional)inputType).getFormat(); - } - - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - - /** - * Cropping amount for top/bottom/left/right (in that order). A length 4 array. - */ - @Setter(AccessLevel.NONE) - private int[] cropping = new int[] {0, 0, 0, 0}; - - private CNN2DFormat cnn2DFormat = CNN2DFormat.NCHW; - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param format Format for activations (in and out) - */ - public Builder dataFormat(CNN2DFormat format){ - this.cnn2DFormat = format; - return this; - } - - /** - * @param cropping Cropping amount for top/bottom/left/right (in that order). Must be length 1, 2, or 4 array. - */ - public void setCropping(int... cropping) { - this.cropping = ValidationUtils.validate4NonNegative(cropping, "cropping"); - } - - public Builder() { - - } - - /** - * @param cropping Cropping amount for top/bottom/left/right (in that order). Must be length 4 array. - */ - public Builder(@NonNull int[] cropping) { - this.setCropping(cropping); - } - - /** - * @param cropTopBottom Amount of cropping to apply to both the top and the bottom of the input activations - * @param cropLeftRight Amount of cropping to apply to both the left and the right of the input activations - */ - public Builder(int cropTopBottom, int cropLeftRight) { - this(cropTopBottom, cropTopBottom, cropLeftRight, cropLeftRight); - } - - /** - * @param cropTop Amount of cropping to apply to the top of the input activations - * @param cropBottom Amount of cropping to apply to the bottom of the input activations - * @param cropLeft Amount of cropping to apply to the left of the input activations - * @param cropRight Amount of cropping to apply to the right of the input activations - */ - public Builder(int cropTop, int cropBottom, int cropLeft, int cropRight) { - this.setCropping(cropTop, cropBottom, cropLeft, cropRight); - } - - public Cropping2D build() { - return new Cropping2D(this); - } + /** + * @param cropping Cropping amount for top/bottom/left/right (in that order). Must be length 4 + * array. + */ + public static Cropping2DBuilder builder(int... cropping) { + return innerBuilder().cropping(cropping); + } + + /** + * @param cropTopBottom Amount of cropping to apply to both the top and the bottom of the input + * activations + * @param cropLeftRight Amount of cropping to apply to both the left and the right of the input + * activations + */ + public static Cropping2DBuilder builder(int cropTopBottom, int cropLeftRight) { + return innerBuilder().cropping(cropTopBottom, cropTopBottom, cropLeftRight, cropLeftRight); + } + + /** + * @param cropTop Amount of cropping to apply to the top of the input activations + * @param cropBottom Amount of cropping to apply to the bottom of the input activations + * @param cropLeft Amount of cropping to apply to the left of the input activations + * @param cropRight Amount of cropping to apply to the right of the input activations + */ + public static Cropping2DBuilder builder( + int cropTop, int cropBottom, int cropLeft, int cropRight) { + return innerBuilder().cropping(cropTop, cropBottom, cropLeft, cropRight); + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + setNetConfiguration(conf); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + + Cropping2DLayer ret = new Cropping2DLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + int[] hwd = ConvolutionUtils.getHWDFromInputType(inputType); + int outH = hwd[0] - cropping[0] - cropping[1]; + int outW = hwd[1] - cropping[2] - cropping[3]; + + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + + return InputType.convolutional(outH, outW, hwd[2], c.getFormat()); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + Preconditions.checkArgument( + inputType != null, + "Invalid input for Cropping2D layer (layer name=\"" + getName() + "\"): InputType is null"); + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return null; + } + + @Override + public void setNIn(InputType inputType, boolean override) { + this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + } + + public static abstract class Cropping2DBuilder< + C extends Cropping2D, B extends Cropping2DBuilder> + extends NoParamLayerBuilder { + public B cropping(int... cropping) { + this.cropping$value = ValidationUtils.validate4NonNegative(cropping, "cropping"); + this.cropping$set = true; + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java index a950ed633..9c4e70196 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers.convolutional; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -39,19 +40,20 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class Cropping3D extends NoParamLayer { - - private int[] cropping; +@Builder.Default + private int[] cropping = new int[] {0, 0, 0, 0, 0, 0};; /** * @param cropDepth Amount of cropping to apply to both depth boundaries of the input activations * @param cropHeight Amount of cropping to apply to both height boundaries of the input activations * @param cropWidth Amount of cropping to apply to both width boundaries of the input activations */ - public Cropping3D(int cropDepth, int cropHeight, int cropWidth) { - this(cropDepth, cropDepth, cropHeight, cropHeight, cropWidth, cropWidth); + public static Cropping3DBuilder builder(int cropDepth, int cropHeight, int cropWidth) { + return innerBuilder() + .cropping(cropDepth, cropDepth, cropHeight, cropHeight, cropWidth, cropWidth); } /** @@ -62,8 +64,9 @@ public class Cropping3D extends NoParamLayer { * @param cropLeftW Amount of cropping to apply to the left of the width dimension * @param cropRightW Amount of cropping to apply to the right of the width dimension */ - public Cropping3D(int cropLeftD, int cropRightD, int cropLeftH, int cropRightH, int cropLeftW, int cropRightW) { - this(new Builder(cropLeftD, cropRightD, cropLeftH, cropRightH, cropLeftW, cropRightW)); + public static Cropping3DBuilder builder(int cropLeftD, int cropRightD, int cropLeftH, int cropRightH, int cropLeftW, int cropRightW) { + return innerBuilder() + .cropping(cropLeftD, cropRightD, cropLeftH, cropRightH, cropLeftW, cropRightW); } /** @@ -71,14 +74,19 @@ public class Cropping3D extends NoParamLayer { * as a length 4 array, with values {@code [cropLeftDepth, cropRightDepth, cropLeftHeight, cropRightHeight, * cropLeftWidth, cropRightWidth]} */ - public Cropping3D(int[] cropping) { - this(new Builder(cropping)); + public static Cropping3DBuilder builder(int[] cropping) { + return innerBuilder() + .cropping(cropping); } - protected Cropping3D(Builder builder) { - super(builder); - this.cropping = builder.cropping; - } +public static abstract class Cropping3DBuilder> extends + NoParamLayerBuilder { + public B cropping(int ... cropping) { + this.cropping$value = ValidationUtils.validate6NonNegative(cropping, "cropping"); + this.cropping$set = true; + return self(); + } +} @Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, @@ -86,6 +94,7 @@ public class Cropping3D extends NoParamLayer { boolean initializeParams, DataType networkDataType) { setNetConfiguration(conf); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); Cropping3DLayer ret = new Cropping3DLayer(lconf, networkDataType); ret.addTrainingListeners(iterationListeners); ret.setIndex(layerIndex); @@ -99,7 +108,7 @@ public class Cropping3D extends NoParamLayer { public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN3D) { throw new IllegalStateException("Invalid input for 3D cropping layer (layer index = " + layerIndex - + ", layer name = \"" + getLayerName() + "\"): expect CNN3D input type with size > 0. Got: " + + ", layer name = \"" + getName() + "\"): expect CNN3D input type with size > 0. Got: " + inputType); } InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType; @@ -111,8 +120,8 @@ public class Cropping3D extends NoParamLayer { @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { Preconditions.checkArgument(inputType != null, "Invalid input for Cropping3D " + "layer (layer name=\"" - + getLayerName() + "\"): InputType is null"); - return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getLayerName()); + + getName() + "\"): InputType is null"); + return InputTypeUtil.getPreProcessorForInputTypeCnn3DLayers(inputType, getName()); } @Override @@ -121,59 +130,5 @@ public class Cropping3D extends NoParamLayer { } - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - /** - * Cropping amount, a length 6 array, i.e. crop left depth, crop right depth, crop left height, crop right height, crop left width, crop right width - */ - @Setter(AccessLevel.NONE) - private int[] cropping = new int[] {0, 0, 0, 0, 0, 0}; - - /** - * @param cropping Cropping amount, must be length 1, 3, or 6 array, i.e. either all values, crop depth, crop height, crop width - * or crop left depth, crop right depth, crop left height, crop right height, crop left width, crop right width - */ - public void setCropping(int... cropping) { - this.cropping = ValidationUtils.validate6NonNegative(cropping, "cropping"); - } - - public Builder() { - - } - - /** - * @param cropping Cropping amount, must be length 3 or 6 array, i.e. either crop depth, crop height, crop width - * or crop left depth, crop right depth, crop left height, crop right height, crop left width, crop right width - */ - public Builder(@NonNull int[] cropping) { - this.setCropping(cropping); - } - - /** - * @param cropDepth Amount of cropping to apply to both depth boundaries of the input activations - * @param cropHeight Amount of cropping to apply to both height boundaries of the input activations - * @param cropWidth Amount of cropping to apply to both width boundaries of the input activations - */ - public Builder(int cropDepth, int cropHeight, int cropWidth) { - this(cropDepth, cropDepth, cropHeight, cropHeight, cropWidth, cropWidth); - } - - /** - * @param cropLeftD Amount of cropping to apply to the left of the depth dimension - * @param cropRightD Amount of cropping to apply to the right of the depth dimension - * @param cropLeftH Amount of cropping to apply to the left of the height dimension - * @param cropRightH Amount of cropping to apply to the right of the height dimension - * @param cropLeftW Amount of cropping to apply to the left of the width dimension - * @param cropRightW Amount of cropping to apply to the right of the width dimension - */ - public Builder(int cropLeftD, int cropRightD, int cropLeftH, int cropRightH, int cropLeftW, int cropRightW) { - this.setCropping(cropLeftD, cropRightD, cropLeftH, cropRightH, cropLeftW, cropRightW); - } - - public Cropping3D build() { - return new Cropping3D(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java index 703d95cea..65c47b7f5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers.misc; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -41,14 +42,9 @@ import java.util.Map; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class ElementWiseMultiplicationLayer extends org.deeplearning4j.nn.conf.layers.FeedForwardLayer { - // We have to add an empty constructor for custom layers otherwise we will have errors when loading the model - protected ElementWiseMultiplicationLayer() {} - - protected ElementWiseMultiplicationLayer(Builder builder) { - super(builder); - } @Override public ElementWiseMultiplicationLayer clone() { @@ -65,6 +61,7 @@ public class ElementWiseMultiplicationLayer extends org.deeplearning4j.nn.conf.l + nIn + ", nOut=" + nOut); } LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.feedforward.elementwise.ElementWiseMultiplicationLayer ret = new org.deeplearning4j.nn.layers.feedforward.elementwise.ElementWiseMultiplicationLayer(lconf, networkDataType); @@ -98,7 +95,7 @@ public class ElementWiseMultiplicationLayer extends org.deeplearning4j.nn.conf.l int trainSizeFixed = 0; int trainSizeVariable = 0; - if (getIDropout() != null) { + if (getDropOut() != null) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... @@ -114,7 +111,7 @@ public class ElementWiseMultiplicationLayer extends org.deeplearning4j.nn.conf.l // then we have 'epsilonNext' which is equivalent to input size trainSizeVariable += outputType.arrayElementsPerExample(); - return new LayerMemoryReport.Builder(layerName, ElementWiseMultiplicationLayer.class, inputType, outputType) + return new LayerMemoryReport.Builder(name, ElementWiseMultiplicationLayer.class, inputType, outputType) .standardMemory(numParams, updaterStateSize) .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayerConfiguration @@ -122,13 +119,4 @@ public class ElementWiseMultiplicationLayer extends org.deeplearning4j.nn.conf.l } - @AllArgsConstructor - public static class Builder extends FeedForwardLayer.Builder { - - @Override - @SuppressWarnings("unchecked") - public ElementWiseMultiplicationLayer build() { - return new ElementWiseMultiplicationLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java index eb15350dc..83f66b966 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java @@ -20,12 +20,15 @@ package org.deeplearning4j.nn.conf.layers.misc; +import java.util.Collection; +import java.util.List; +import java.util.Set; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.Setter; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; -import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -37,126 +40,111 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Collection; -import java.util.List; @EqualsAndHashCode(callSuper = false) +@SuperBuilder(builderMethodName = "innerBuilder") public class FrozenLayer extends LayerConfiguration { - /** - * A layer configuration, only if this layer config has been created from another one - */ - @Getter @Setter - private LayerConfiguration innerConfiguration; + /** A layer configuration, only if this layer config has been created from another one */ + @Getter @Setter private LayerConfiguration innerConfiguration; - private FrozenLayer(Builder builder) { - super(builder); - this.innerConfiguration = builder.layer; + public static FrozenLayerBuilder builder() { + return innerBuilder(); + } + + public static FrozenLayerBuilder builder(LayerConfiguration innerConfiguration) { + return innerBuilder().innerConfiguration(innerConfiguration); + } + + @Override + public LayerConfiguration clone() { + FrozenLayer l = (FrozenLayer) super.clone(); + l.innerConfiguration = innerConfiguration.clone(); + return l; + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + + // Need to be able to instantiate a layer, from a config - for JSON -> net type situations + org.deeplearning4j.nn.api.Layer underlying = + innerConfiguration.instantiate( + getNetConfiguration(), + trainingListeners, + layerIndex, + layerParamsView, + initializeParams, + networkDataType); + + NeuralNetConfiguration nncUnderlying = underlying.getNetConfiguration(); + if (nncUnderlying.getNetWideVariables() != null) { + Set vars = nncUnderlying.getNetWideVariables(true); + nncUnderlying.clearNetWideVariable(); + conf.clearNetWideVariable(); + for (String s : vars) { + conf.getNetWideVariables(false).add(s); + nncUnderlying.getNetWideVariables(false).add(s); + } } - public FrozenLayer(@JsonProperty("layer") LayerConfiguration layer) { - this.innerConfiguration = layer; - } + return new org.deeplearning4j.nn.layers.FrozenLayer(underlying); + } - @Override - public LayerConfiguration clone() { - FrozenLayer l = (FrozenLayer) super.clone(); - l.innerConfiguration = innerConfiguration.clone(); - return l; - } + @Override + public ParamInitializer initializer() { + return FrozenLayerParamInitializer.getInstance(); + } - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + return innerConfiguration.getOutputType(layerIndex, inputType); + } - //Need to be able to instantiate a layer, from a config - for JSON -> net type situations - org.deeplearning4j.nn.api.Layer underlying = innerConfiguration.instantiate(getNetConfiguration(), trainingListeners, - layerIndex, layerParamsView, initializeParams, networkDataType); + @Override + public void setNIn(InputType inputType, boolean override) { + innerConfiguration.setNIn(inputType, override); + } - NeuralNetConfiguration nncUnderlying = underlying.getNetConfiguration(); - if (nncUnderlying.netWideVariables() != null) { - List vars = nncUnderlying.netWideVariables(true); - nncUnderlying.clearNetWideVariable(); - conf.clearNetWideVariable(); - for (String s : vars) { - conf.netWideVariables(false).add(s); - nncUnderlying.netWideVariables(false).add(s); - } - } + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return innerConfiguration.getPreProcessorForInputType(inputType); + } - return new org.deeplearning4j.nn.layers.FrozenLayer(underlying); - } + @Override + public List getRegularizationByParam(String param) { + return null; + } - @Override - public ParamInitializer initializer() { - return FrozenLayerParamInitializer.getInstance(); - } + @Override + public boolean isPretrainParam(String paramName) { + return false; + } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - return innerConfiguration.getOutputType(layerIndex, inputType); - } + @Override + public IUpdater getUpdaterByParam(String paramName) { + return null; + } - @Override - public void setNIn(InputType inputType, boolean override) { - innerConfiguration.setNIn(inputType, override); - } + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return innerConfiguration.getMemoryReport(inputType); + } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return innerConfiguration.getPreProcessorForInputType(inputType); - } + @Override + public void setName(String layerName) { + super.setName(layerName); + innerConfiguration.setName(layerName); + } - @Override - public List getRegularizationByParam(String param){ - return null; - } - - @Override - public boolean isPretrainParam(String paramName) { - return false; - } - - @Override - public IUpdater getUpdaterByParam(String paramName) { - return null; - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - return innerConfiguration.getMemoryReport(inputType); - } - - @Override - public void setLayerName(String layerName) { - super.setLayerName(layerName); - innerConfiguration.setLayerName(layerName); - } - - @Override - public void setConstraints(List constraints) { - this.constraints = constraints; - this.innerConfiguration.setConstraints(constraints); - } - - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - - private LayerConfiguration layer; - - public Builder layer(LayerConfiguration layer) { - this.setLayer(layer); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public FrozenLayer build() { - return new FrozenLayer(this); - } - } + @Override + public void setConstraints(List constraints) { + this.constraints = constraints; + this.innerConfiguration.setConstraints(constraints); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java index 6abf467d3..ff9a66c2b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java @@ -22,6 +22,7 @@ package org.deeplearning4j.nn.conf.layers.misc; import lombok.Data; import lombok.EqualsAndHashCode; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -37,20 +38,28 @@ import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Collection; import java.util.List; +import java.util.Set; + -@Data @EqualsAndHashCode(callSuper = false) +@SuperBuilder(builderMethodName = "innerBuilder") public class FrozenLayerWithBackprop extends BaseWrapperLayerConfiguration { + + public static FrozenLayerWithBackpropBuilder builder() { + return innerBuilder(); + } /** * Create a new Frozen Layer, that wraps another layer with backpropagation enabled. * - * @param layer configuration of the layer to wrap + * @param innerConfiguration configuration of the layer to wrap */ - public FrozenLayerWithBackprop(@JsonProperty("layer") LayerConfiguration layer) { - super(layer); + public static FrozenLayerWithBackpropBuilder builder(LayerConfiguration innerConfiguration) { + return innerBuilder().underlying(innerConfiguration); + } + public static FrozenLayerWithBackpropBuilder builder(LayerConfigurationBuilder innerConfiguration) { + return innerBuilder().underlying(innerConfiguration.build()); } - public NeuralNetConfiguration getInnerConf(NeuralNetConfiguration conf) { NeuralNetConfiguration nnc = conf.clone(); nnc.getLayerConfigurations().add(0, underlying); @@ -76,13 +85,13 @@ public class FrozenLayerWithBackprop extends BaseWrapperLayerConfiguration { newUnderlyingLayer.setLayerConfiguration(underlying); //Fix a problem, where the embedded layer gets the conf of the frozen layer, rather than its own NeuralNetConfiguration nncUnderlying = underlying.getNetConfiguration(); - if (nncUnderlying.netWideVariables() != null) { - List vars = nncUnderlying.netWideVariables(true); + if (nncUnderlying.getNetWideVariables() != null) { + Set vars = nncUnderlying.getNetWideVariables(true); nncUnderlying.clearNetWideVariable(); conf.clearNetWideVariable(); for (String s : vars) { - conf.netWideVariables(false).add(s); - nncUnderlying.netWideVariables(false).add(s); + conf.getNetWideVariables(false).add(s); + nncUnderlying.getNetWideVariables(false).add(s); } } @@ -111,9 +120,9 @@ public class FrozenLayerWithBackprop extends BaseWrapperLayerConfiguration { } @Override - public void setLayerName(String layerName) { - super.setLayerName(layerName); - underlying.setLayerName(layerName); + public void setName(String layerName) { + super.setName(layerName); + underlying.setName(layerName); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java index 541c26914..62736066f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers.misc; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; @@ -38,19 +39,20 @@ import java.util.Collection; import java.util.Map; @Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class RepeatVector extends FeedForwardLayer { - - private int n = 1; + /** + * Set repetition factor for RepeatVector layer + * + * @param n upsampling size in height and width dimensions + */ + @lombok.Builder.Default + private int repetitionFactor = 1; // no repetition by default + @lombok.Builder.Default private RNNFormat dataFormat = RNNFormat.NCW; - protected RepeatVector(Builder builder) { - super(builder); - this.n = builder.n; - this.dataFormat = builder.dataFormat; - } @Override public RepeatVector clone() { @@ -67,7 +69,7 @@ public class RepeatVector extends FeedForwardLayer { Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - +runInheritance(); org.deeplearning4j.nn.layers.RepeatVector ret = new org.deeplearning4j.nn.layers.RepeatVector(lconf, networkDataType); ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); @@ -81,18 +83,18 @@ public class RepeatVector extends FeedForwardLayer { @Override public InputType getOutputType(int layerIndex, InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.FF) { - throw new IllegalStateException("Invalid input for RepeatVector layer (layer name=\"" + getLayerName() + throw new IllegalStateException("Invalid input for RepeatVector layer (layer name=\"" + getName() + "\"): Expected FF input, got " + inputType); } InputType.InputTypeFeedForward ffInput = (InputType.InputTypeFeedForward) inputType; - return InputType.recurrent(ffInput.getSize(), n, this.dataFormat); + return InputType.recurrent(ffInput.getSize(), repetitionFactor, this.dataFormat); } @Override public LayerMemoryReport getMemoryReport(InputType inputType) { InputType outputType = getOutputType(-1, inputType); - return new LayerMemoryReport.Builder(layerName, RepeatVector.class, inputType, outputType).standardMemory(0, 0) + return new LayerMemoryReport.Builder(name, RepeatVector.class, inputType, outputType).standardMemory(0, 0) .workingMemory(0, 0, 0, 0) .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS).build(); } @@ -102,58 +104,4 @@ public class RepeatVector extends FeedForwardLayer { throw new UnsupportedOperationException("UpsamplingLayer does not contain parameters"); } - - - @NoArgsConstructor - @Getter - @Setter - public static class Builder> extends FeedForwardLayer.Builder { - - private int n = 1; // no repetition by default - private RNNFormat dataFormat = RNNFormat.NCW; - /** - * Set repetition factor for RepeatVector layer - */ - public int getRepetitionFactor() { - return n; - } - - public RNNFormat getDataFormat(){ - return dataFormat; - } - - public Builder dataFormat(RNNFormat dataFormat){ - this.dataFormat = dataFormat; - return this; - } - - /** - * Set repetition factor for RepeatVector layer - * - * @param n upsampling size in height and width dimensions - */ - public void setRepetitionFactor(int n) { - this.setN(n); - } - - public Builder(int n) { - this.setN(n); - } - - /** - * Set repetition factor for RepeatVector layer - * - * @param n upsampling size in height and width dimensions - */ - public Builder repetitionFactor(int n) { - this.setN(n); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public RepeatVector build() { - return new RepeatVector(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java index 70bd048e6..031869ab6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java @@ -20,10 +20,14 @@ package org.deeplearning4j.nn.conf.layers.objdetect; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.Setter; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -41,218 +45,136 @@ import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.impl.LossL2; import org.nd4j.serde.jackson.shaded.NDArrayTextSerializer; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; -import com.fasterxml.jackson.databind.annotation.JsonSerialize; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Map; -@Data @EqualsAndHashCode(callSuper = false) +@SuperBuilder(buildMethodName = "initBuild") public class Yolo2OutputLayer extends LayerConfiguration { - private double lambdaCoord; - private double lambdaNoObj; - private ILossFunction lossPositionScale; - private ILossFunction lossClassPredictions; - @JsonSerialize(using = NDArrayTextSerializer.class) - @JsonDeserialize(using = BoundingBoxesDeserializer.class) - private INDArray boundingBoxes; + /** + * Loss function coefficient for position and size/scale components of the loss function. Default + * (as per paper): 5 + */ + @Builder.Default @Getter private double lambdaCoord = 5; + /** + * Loss function coefficient for the "no object confidence" components of the loss function. + * Default (as per paper): 0.5 + */ + @Builder.Default @Getter private double lambdaNoObj = 0.5; + /** Loss function for position/scale component of the loss function */ + @Builder.Default @Getter private ILossFunction lossPositionScale = new LossL2(); + /** + * Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as + * per the paper), however Loss MCXENT could also be used (which is more common for + * classification). + */ + @Builder.Default @Getter private ILossFunction lossClassPredictions = new LossL2(); + ; + /** + * Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows, + * columns] = [N, 2] Note that dimensions should be specified as fraction of grid size. For + * example, a network with 13x13 output, a value of 1.0 would correspond to one grid cell; a value + * of 13 would correspond to the entire image. + */ + @JsonSerialize(using = NDArrayTextSerializer.class) + @JsonDeserialize(using = BoundingBoxesDeserializer.class) + @Getter + private INDArray boundingBoxes; - private CNN2DFormat format = CNN2DFormat.NCHW; //Default for serialization of old formats + @Builder.Default @Getter + private CNN2DFormat format = CNN2DFormat.NCHW; // Default for serialization of old formats - private Yolo2OutputLayer() { - //No-arg constructor for Jackson JSON - } - private Yolo2OutputLayer(Builder builder) { - super(builder); - this.lambdaCoord = builder.lambdaCoord; - this.lambdaNoObj = builder.lambdaNoObj; - this.lossPositionScale = builder.lossPositionScale; - this.lossClassPredictions = builder.lossClassPredictions; - this.boundingBoxes = builder.boundingBoxes; - } + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer ret = + new org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } - org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer ret = - new org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + return inputType; // Same shape output as input + } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - return inputType; //Same shape output as input - } + @Override + public void setNIn(InputType inputType, boolean override) { + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + this.format = c.getFormat(); + } - @Override - public void setNIn(InputType inputType, boolean override) { - InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - this.format = c.getFormat(); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - switch (inputType.getType()) { - case FF: - case RNN: - throw new UnsupportedOperationException("Cannot use FF or RNN input types"); - case CNN: - return null; - case CNNFlat: - InputType.InputTypeConvolutionalFlat cf = (InputType.InputTypeConvolutionalFlat) inputType; - return new FeedForwardToCnnPreProcessor(cf.getHeight(), cf.getWidth(), cf.getDepth()); - default: - return null; - } - } - - @Override - public List getRegularizationByParam(String paramName) { - //Not applicable + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + switch (inputType.getType()) { + case FF: + case RNN: + throw new UnsupportedOperationException("Cannot use FF or RNN input types"); + case CNN: + return null; + case CNNFlat: + InputType.InputTypeConvolutionalFlat cf = (InputType.InputTypeConvolutionalFlat) inputType; + return new FeedForwardToCnnPreProcessor(cf.getHeight(), cf.getWidth(), cf.getDepth()); + default: return null; } + } - @Override - public boolean isPretrainParam(String paramName) { - return false; //No params - } - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - long numValues = inputType.arrayElementsPerExample(); - - //This is a VERY rough estimate... - return new LayerMemoryReport.Builder(layerName, Yolo2OutputLayer.class, inputType, inputType) - .standardMemory(0, 0) //No params - .workingMemory(0, numValues, 0, 6 * numValues).cacheMemory(0, 0) //No cache - .build(); - } - - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - - /** - * Loss function coefficient for position and size/scale components of the loss function. Default (as per - * paper): 5 - * - */ - private double lambdaCoord = 5; - - /** - * Loss function coefficient for the "no object confidence" components of the loss function. Default (as per - * paper): 0.5 - * - */ - private double lambdaNoObj = 0.5; - - /** - * Loss function for position/scale component of the loss function - * - */ - private ILossFunction lossPositionScale = new LossL2(); - - /** - * Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as per the - * paper), however Loss MCXENT could also be used (which is more common for classification). - * - */ - private ILossFunction lossClassPredictions = new LossL2(); - - /** - * Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows, columns] = [N, - * 2] Note that dimensions should be specified as fraction of grid size. For example, a network with 13x13 - * output, a value of 1.0 would correspond to one grid cell; a value of 13 would correspond to the entire - * image. - * - */ - private INDArray boundingBoxes; - - /** - * Loss function coefficient for position and size/scale components of the loss function. Default (as per - * paper): 5 - * - * @param lambdaCoord Lambda value for size/scale component of loss function - */ - public Builder lambdaCoord(double lambdaCoord) { - this.setLambdaCoord(lambdaCoord); - return this; - } - - /** - * Loss function coefficient for the "no object confidence" components of the loss function. Default (as per - * paper): 0.5 - * - * @param lambdaNoObj Lambda value for no-object (confidence) component of the loss function - */ - public Builder lambdaNoObj(double lambdaNoObj) { - this.setLambdaNoObj(lambdaNoObj); - return this; - } - - /** - * Loss function for position/scale component of the loss function - * - * @param lossPositionScale Loss function for position/scale - */ - public Builder lossPositionScale(ILossFunction lossPositionScale) { - this.setLossPositionScale(lossPositionScale); - return this; - } - - /** - * Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as per the - * paper), however Loss MCXENT could also be used (which is more common for classification). - * - * @param lossClassPredictions Loss function for the class prediction error component of the YOLO loss function - */ - public Builder lossClassPredictions(ILossFunction lossClassPredictions) { - this.setLossClassPredictions(lossClassPredictions); - return this; - } - - /** - * Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows, columns] = [N, - * 2] Note that dimensions should be specified as fraction of grid size. For example, a network with 13x13 - * output, a value of 1.0 would correspond to one grid cell; a value of 13 would correspond to the entire - * image. - * - * @param boundingBoxes Bounding box prior dimensions (width, height) - */ - public Builder boundingBoxPriors(INDArray boundingBoxes) { - this.setBoundingBoxes(boundingBoxes); - return this; - } - - @Override - public Yolo2OutputLayer build() { - if (boundingBoxes == null) { - throw new IllegalStateException("Bounding boxes have not been set"); - } - - if (boundingBoxes.rank() != 2 || boundingBoxes.size(1) != 2) { - throw new IllegalStateException("Bounding box priors must have shape [nBoxes, 2]. Has shape: " - + Arrays.toString(boundingBoxes.shape())); - } - - return new Yolo2OutputLayer(this); - } + @Override + public List getRegularizationByParam(String paramName) { + // Not applicable + return null; + } + + @Override + public boolean isPretrainParam(String paramName) { + return false; // No params + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + long numValues = inputType.arrayElementsPerExample(); + + // This is a VERY rough estimate... + return new LayerMemoryReport.Builder(name, Yolo2OutputLayer.class, inputType, inputType) + .standardMemory(0, 0) // No params + .workingMemory(0, numValues, 0, 6 * numValues) + .cacheMemory(0, 0) // No cache + .build(); + } + + public static abstract class Yolo2OutputLayerBuilder< + C extends Yolo2OutputLayer, B extends Yolo2OutputLayerBuilder> + extends LayerConfigurationBuilder { + public C build() { + if (boundingBoxes == null) { + throw new IllegalStateException("Bounding boxes have not been set"); + } + + if (boundingBoxes.rank() != 2 || boundingBoxes.size(1) != 2) { + throw new IllegalStateException( + "Bounding box priors must have shape [nBoxes, 2]. Has shape: " + + Arrays.toString(boundingBoxes.shape())); + } + return initBuild(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java index 573492f3a..26e9e12ec 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java @@ -21,8 +21,8 @@ package org.deeplearning4j.nn.conf.layers.recurrent; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; @@ -48,12 +48,27 @@ import java.util.Map; import static org.nd4j.linalg.indexing.NDArrayIndex.interval; -@NoArgsConstructor @Data @EqualsAndHashCode(callSuper = true, exclude = {"initializer"}) @JsonIgnoreProperties({"initializer"}) +@SuperBuilder(builderMethodName = "innerBuilder") public class Bidirectional extends LayerConfiguration { + public static abstract class BidirectionalBuilder> + extends LayerConfigurationBuilder { + + public B rnnLayer(LayerConfiguration layer) { + if (!(layer instanceof BaseRecurrentLayer || layer instanceof LastTimeStep + || layer instanceof BaseWrapperLayerConfiguration)) { + throw new IllegalArgumentException("Cannot wrap a non-recurrent layer: " + + "config must extend BaseRecurrentLayer or LastTimeStep " + "Got class: " + + layer.getClass()); + } + this.fwd = layer; + this.bwd = layer.clone(); + return self(); + } + } /** * This Mode enumeration defines how the activations for the forward and backward networks should be combined.
* ADD: out = forward + backward (elementwise addition)
MUL: out = forward * backward (elementwise @@ -69,39 +84,31 @@ public class Bidirectional extends LayerConfiguration { private LayerConfiguration fwd; private LayerConfiguration bwd; - private Mode mode; + @Builder.Default + private Mode mode = Mode.CONCAT; private transient BidirectionalParamInitializer initializer; - private Bidirectional(Bidirectional.Builder builder) { - super(builder); - } + /** * Create a Bidirectional wrapper, with the default Mode (CONCAT) for the specified layer * - * @param layer layer to wrap + * @param conf layer to wrap */ - public Bidirectional(@NonNull LayerConfiguration layer) { - this(Mode.CONCAT, layer); + public static BidirectionalBuilder builder(@NonNull LayerConfiguration conf) { + return innerBuilder() + .rnnLayer(conf); } - /** - * Create a Bidirectional wrapper for the specified layer - * - * @param mode Mode to use to combine activations. See {@link Mode} for details - * @param layer layer to wrap - */ - public Bidirectional(@NonNull Mode mode, @NonNull LayerConfiguration layer) { - if (!(layer instanceof BaseRecurrentLayer || layer instanceof LastTimeStep - || layer instanceof BaseWrapperLayerConfiguration)) { - throw new IllegalArgumentException("Cannot wrap a non-recurrent layer: " - + "config must extend BaseRecurrentLayer or LastTimeStep " + "Got class: " - + layer.getClass()); - } - this.fwd = layer; - this.bwd = layer.clone(); - this.mode = mode; + public static BidirectionalBuilder builder(@NonNull Mode mode, @NonNull LayerConfiguration conf) { + return innerBuilder() + .rnnLayer(conf) + .mode(mode); } + public static BidirectionalBuilder builder() { + return innerBuilder(); + } + public long getNOut() { if (this.fwd instanceof LastTimeStep) { @@ -128,6 +135,7 @@ public class Bidirectional extends LayerConfiguration { Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); NeuralNetConfiguration c1 = conf.clone(); NeuralNetConfiguration c2 = conf.clone(); c1.setLayer(fwd); @@ -212,10 +220,10 @@ public class Bidirectional extends LayerConfiguration { } @Override - public void setLayerName(String layerName) { - this.layerName = layerName; - fwd.setLayerName(layerName); - bwd.setLayerName(layerName); + public void setName(String layerName) { + this.name = layerName; + fwd.setName(layerName); + bwd.setName(layerName); } @Override @@ -225,37 +233,5 @@ public class Bidirectional extends LayerConfiguration { return lmr; } - @AllArgsConstructor - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - private Mode mode; - private LayerConfiguration layer; - - public void setLayer(LayerConfiguration layer) { - rnnLayer(layer); - } - - public Builder mode(Mode mode) { - this.setMode(mode); - return this; - } - - public Builder rnnLayer(LayerConfiguration layer) { - if (!(layer instanceof BaseRecurrentLayer || layer instanceof LastTimeStep - || layer instanceof BaseWrapperLayerConfiguration)) { - throw new IllegalArgumentException("Cannot wrap a non-recurrent layer: " - + "config must extend BaseRecurrentLayer or LastTimeStep " + "Got class: " - + layer.getClass()); - } - this.setLayer(layer); - return this; - } - - @SuppressWarnings("unchecked") - public Bidirectional build() { - return new Bidirectional(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java index a5dff218f..6abc8b3c2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.layers.recurrent; +import lombok.experimental.SuperBuilder; +import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -30,14 +32,24 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; - +@SuperBuilder(builderMethodName = "innerBuilder") public class LastTimeStep extends BaseWrapperLayerConfiguration { - private LastTimeStep() {} + public static LastTimeStepBuilder builder() { + return innerBuilder(); + } - public LastTimeStep(LayerConfiguration underlying) { - super(underlying); - this.layerName = underlying.getLayerName(); // needed for keras import to match names + + public static LastTimeStepBuilder builder(LayerConfiguration underlying) { + return innerBuilder() + .underlying(underlying) + .name(underlying.getName()); + } + + public static LastTimeStepBuilder builder(Layer underlying) { + return innerBuilder() + .underlying(underlying.getLayerConfiguration()) + .name(underlying.getLayerConfiguration().getName()); } public LayerConfiguration getUnderlying() { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java index 1d4c182aa..832d48034 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java @@ -21,6 +21,8 @@ package org.deeplearning4j.nn.conf.layers.recurrent; import lombok.*; +import lombok.experimental.Accessors; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -37,26 +39,24 @@ import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; import java.util.Map; -@Data -@EqualsAndHashCode(callSuper = false) -public class SimpleRnn extends BaseRecurrentLayer { +@EqualsAndHashCode(callSuper = false) +@SuperBuilder +public class SimpleRnn extends BaseRecurrentLayer { + /** + * If true (default = false): enable layer normalization on this layer + * + */ + @lombok.Builder.Default @Accessors @Getter private boolean hasLayerNorm = false; - protected SimpleRnn(Builder builder) { - super(builder); - this.hasLayerNorm = builder.hasLayerNorm; - } - - private SimpleRnn() { - - } @Override public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("SimpleRnn", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerValidation.assertNInNOutSet("SimpleRnn", getName(), layerIndex, getNIn(), getNOut()); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.recurrent.SimpleRnn ret = new org.deeplearning4j.nn.layers.recurrent.SimpleRnn(lconf, networkDataType); @@ -78,30 +78,4 @@ public class SimpleRnn extends BaseRecurrentLayer { public LayerMemoryReport getMemoryReport(InputType inputType) { return null; } - - public boolean hasLayerNorm(){ - return hasLayerNorm; - } - - @NoArgsConstructor - @Getter - @Setter - public static class Builder extends BaseRecurrentLayer.Builder { - - - @Override - public SimpleRnn build() { - return new SimpleRnn(this); - } - - /** - * If true (default = false): enable layer normalization on this layer - * - */ - private boolean hasLayerNorm = false; - public SimpleRnn.Builder hasLayerNorm(boolean hasLayerNorm){ - this.hasLayerNorm = hasLayerNorm; - return this; - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java index 73cddbf14..d78b4acf3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java @@ -20,9 +20,9 @@ package org.deeplearning4j.nn.conf.layers.recurrent; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NonNull; +import java.util.Collection; +import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; @@ -33,66 +33,67 @@ import org.deeplearning4j.nn.layers.recurrent.TimeDistributedLayer; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; -import java.util.Collection; - -@Data @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class TimeDistributed extends BaseWrapperLayerConfiguration { + @Getter @Setter private RNNFormat rnnDataFormat = RNNFormat.NCW; - private RNNFormat rnnDataFormat = RNNFormat.NCW; + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - /** - * @param underlying Underlying (internal) layer - should be a feed forward type such as DenseLayerConfiguration - */ - public TimeDistributed(@JsonProperty("underlying") @NonNull LayerConfiguration underlying, @JsonProperty("rnnDataFormat") RNNFormat rnnDataFormat) { - super(underlying); - this.rnnDataFormat = rnnDataFormat; + NeuralNetConfiguration conf2 = conf.clone(); + conf2.setLayer(((TimeDistributed) lconf).getUnderlying()); + return new TimeDistributedLayer( + underlying.instantiate( + conf2, + trainingListeners, + layerIndex, + layerParamsView, + initializeParams, + networkDataType), + rnnDataFormat); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Only RNN input type is supported as input to TimeDistributed layer (layer #" + + layerIndex + + ")"); } - public TimeDistributed(LayerConfiguration underlying){ - super(underlying); + InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType; + InputType ff = InputType.feedForward(rnn.getSize()); + InputType ffOut = underlying.getOutputType(layerIndex, ff); + return InputType.recurrent( + ffOut.arrayElementsPerExample(), rnn.getTimeSeriesLength(), rnnDataFormat); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Only RNN input type is supported as input to TimeDistributed layer"); } - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType; + InputType ff = InputType.feedForward(rnn.getSize()); + this.rnnDataFormat = rnn.getFormat(); + underlying.setNIn(ff, override); + } - NeuralNetConfiguration conf2 = conf.clone(); - conf2.setLayer(((TimeDistributed) lconf).getUnderlying()); - return new TimeDistributedLayer(underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView, - initializeParams, networkDataType), rnnDataFormat); - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer (layer #" + layerIndex + ")"); - } - - InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType; - InputType ff = InputType.feedForward(rnn.getSize()); - InputType ffOut = underlying.getOutputType(layerIndex, ff); - return InputType.recurrent(ffOut.arrayElementsPerExample(), rnn.getTimeSeriesLength(), rnnDataFormat); - } - - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer"); - } - - InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType; - InputType ff = InputType.feedForward(rnn.getSize()); - this.rnnDataFormat = rnn.getFormat(); - underlying.setNIn(ff, override); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - //No preprocessor - the wrapper layer operates as the preprocessor - return null; - } + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + // No preprocessor - the wrapper layer operates as the preprocessor + return null; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index 0d05a9486..70cf38d07 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -20,10 +20,11 @@ package org.deeplearning4j.nn.conf.layers.samediff; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.Setter; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import lombok.*; +import lombok.experimental.SuperBuilder; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.GradientNormalization; @@ -46,362 +47,334 @@ import org.nd4j.linalg.learning.regularization.L2Regularization; import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.WeightDecay; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; - @Slf4j -@Data @EqualsAndHashCode(callSuper = true, doNotUseGetters = true) +@NoArgsConstructor +@SuperBuilder public abstract class AbstractSameDiffLayer extends LayerConfiguration { - protected List regularization; - protected List regularizationBias; - protected IUpdater updater; - protected IUpdater biasUpdater; - protected GradientNormalization gradientNormalization; - protected double gradientNormalizationThreshold = Double.NaN; + /** + * The regularization for the parameters (excluding biases) - for example {@link WeightDecay} + * + *

-- SETTER -- Set the regularization for the parameters (excluding biases) - for example + * {@link WeightDecay} + * + * @param regularization Regularization to apply for the network parameters/weights (excluding + * biases) + */ + @Getter + protected List regularization; + /** + * The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the + * regularization for the biases only - for example {@link WeightDecay} + * + * @param regularizationBias Regularization to apply for the network biases only + */ + @Getter + protected List regularizationBias; + /** + * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link + * org.nd4j.linalg.learning.config.Nesterovs} + * + * @param updater Updater to use + */ + protected @Getter @Setter IUpdater updater; + /** + * Gradient updater configuration, for the biases only. If not set, biases will use the updater as + * set by {@link #setUpdater(IUpdater)} + * + * @param biasUpdater Updater to use for bias parameters + */ + protected @Getter @Setter IUpdater biasUpdater; +@Getter @Setter + protected GradientNormalization gradientNormalization; +@Getter @Setter + protected double gradientNormalizationThreshold = Double.NaN; +@Getter @Setter + private SDLayerParams layerParams; - private SDLayerParams layerParams; + @Override + public List getRegularizationByParam(String paramName) { + if (layerParams.isWeightParam(paramName)) { + return regularization; + } else if (layerParams.isBiasParam(paramName)) { + return regularizationBias; + } + return null; + } - @Override - public List getRegularizationByParam(String paramName) { - if(layerParams.isWeightParam(paramName)){ - return regularization; - } else if(layerParams.isBiasParam(paramName)){ - return regularizationBias; - } - return null; + public SDLayerParams getLayerParams() { + if (layerParams == null) { + layerParams = new SDLayerParams(); + defineParameters(layerParams); + } + return layerParams; + } + + @Override + public void setNIn(InputType inputType, boolean override) { + // Default implementation: no-op + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + // Default implementation: no-op + return null; + } + + public void applyGlobalConfigToLayer( + NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { + // Default implementation: no op + } + + /** + * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String, + * long...)} and {@link SDLayerParams#addBiasParam(String, long...)} + * + * @param params Object used to set parameters for this layer + */ + public abstract void defineParameters(SDLayerParams params); + + /** + * Set the initial parameter values for this layer, if required + * + * @param params Parameter arrays that may be initialized + */ + public abstract void initializeParameters(Map params); + + @Override + public abstract org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType); + + // ================================================================================================================== + + @Override + public ParamInitializer initializer() { + return SameDiffParamInitializer.getInstance(); + } + + @Override + public IUpdater getUpdaterByParam(String paramName) { + if (biasUpdater != null && initializer().isBiasParam(this, paramName)) { + return biasUpdater; + } else if (initializer().isBiasParam(this, paramName) + || initializer().isWeightParam(this, paramName)) { + return updater; + } + throw new IllegalStateException("Unknown parameter key: " + paramName); + } + + @Override + public boolean isPretrainParam(String paramName) { + return false; + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return new LayerMemoryReport(); // TODO + } + + /** + * Returns the memory layout ('c' or 'f' order - i.e., row/column major) of the parameters. In + * most cases, this can/should be left + * + * @param param Name of the parameter + * @return Memory layout ('c' or 'f') of the parameter + */ + public char paramReshapeOrder(String param) { + return 'c'; + } + + protected void initWeights(int fanIn, int fanOut, WeightInit weightInit, INDArray array) { + WeightInitUtil.initWeights( + fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array); + } + + public void applyGlobalConfig(NeuralNetConfiguration.NeuralNetConfigurationBuilder b) { + NeuralNetConfiguration bConf = b.build(); + if (regularization == null || regularization.isEmpty()) { + regularization = bConf.getRegularization(); + } + if (regularizationBias == null || regularizationBias.isEmpty()) { + regularizationBias = bConf.getRegularizationBias(); + } + if (updater == null) { + updater = bConf.getUpdater(); + } + if (biasUpdater == null) { + biasUpdater = bConf.getBiasUpdater(); + } + if (gradientNormalization == null) { + gradientNormalization = bConf.getGradientNormalization(); + } + if (Double.isNaN(gradientNormalizationThreshold)) { + gradientNormalizationThreshold = bConf.getGradientNormalizationThreshold(); } - protected AbstractSameDiffLayer(Builder builder) { - super(builder); - this.regularization = builder.regularization; - this.regularizationBias = builder.regularizationBias; - this.updater = builder.updater; - this.biasUpdater = builder.biasUpdater; + applyGlobalConfigToLayer(b); + } - //Check that this class has a no-arg constructor for JSON: better to detect this now provide useful information - // to pre-empt a failure later for users, which will have a more difficult to understand message - try { - getClass().getDeclaredConstructor(); - } catch (NoSuchMethodException e) { - log.warn("***SameDiff layer {} does not have a zero argument (no-arg) constructor.***\nA no-arg constructor " - + "is required for JSON deserialization, which is used for both model saving and distributed (Spark) " - + "training.\nA no-arg constructor (private, protected or public) as well as setters (or simply a " - + "Lombok @Data annotation) should be added to avoid JSON errors later.", - getClass().getName()); - } catch (SecurityException e) { - //Ignore - } + /** + * This method generates an "all ones" mask array for use in the SameDiff model when none is + * provided. + * + * @param input Input to the layer + * @return A mask array - should be same datatype as the input (usually) + */ + public INDArray onesMaskForInput(INDArray input) { + if (input.rank() == 2) { + return Nd4j.ones(input.dataType(), input.size(0), 1); + } else if (input.rank() == 3) { + return Nd4j.ones( + input.dataType(), + input.size(0), + input.size(2)); // mask: [mb, length] vs. input [mb, nIn, length] + } else if (input.rank() == 4) { + // CNN style - return [mb, 1, 1, 1] for broadcast... + return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1); + } else if (input.rank() == 5) { + // CNN3D style - return [mb, 1, 1, 1, 1] for broadcast... + return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1, 1); + } else { + throw new IllegalStateException( + "When using masking with rank 1 or 6+ inputs, the onesMaskForInput method must be implemented, " + + "in order to determine the correct mask shape for this layer"); } + } - protected AbstractSameDiffLayer() { - //No op constructor for Jackson - } + public abstract static class AbstractSameDiffLayerBuilder< + C extends AbstractSameDiffLayer, B extends AbstractSameDiffLayerBuilder> + extends LayerConfigurationBuilder { - public SDLayerParams getLayerParams() { - if (layerParams == null) { - layerParams = new SDLayerParams(); - defineParameters(layerParams); - } - return layerParams; - } - - @Override - public void setNIn(InputType inputType, boolean override) { - //Default implementation: no-op - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - //Default implementation: no-op - return null; - } - - - public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { - //Default implementation: no op + /** + * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 + * regularization coefficient for the bias. + */ + public B l1(double l1) { + // Check if existing L1 exists; if so, replace it + NetworkUtils.removeInstances(this.regularization, L1Regularization.class); + if (l1 > 0.0) { + this.regularization.add(new L1Regularization(l1)); + } + return self(); } /** - * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String, long...)} and {@link - * SDLayerParams#addBiasParam(String, long...)} + * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2 + * regularization coefficient for the bias.
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double,boolean)} + * should be preferred to L2 regularization. See {@link WeightDecay} javadoc for further + * details.
+ */ + public B l2(double l2) { + // Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make + // sense to use both + NetworkUtils.removeInstances(this.regularization, L2Regularization.class); + if (l2 > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularization, + WeightDecay.class, + "WeightDecay regularization removed: incompatible with added L2 regularization"); + this.regularization.add(new L2Regularization(l2)); + } + return self(); + } + + /** L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)} */ + public B l1Bias(double l1Bias) { + NetworkUtils.removeInstances(this.regularizationBias, L1Regularization.class); + if (l1Bias > 0.0) { + this.regularizationBias.add(new L1Regularization(l1Bias)); + } + return self(); + } + + /** + * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)}
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double,boolean)} + * should be preferred to L2 regularization. See {@link WeightDecay} javadoc for further + * details.
+ */ + public B l2Bias(double l2Bias) { + NetworkUtils.removeInstances(this.regularizationBias, L2Regularization.class); + if (l2Bias > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularizationBias, + WeightDecay.class, + "WeightDecay bias regularization removed: incompatible with added L2 regularization"); + this.regularizationBias.add(new L2Regularization(l2Bias)); + } + return self(); + } + + /** + * Add weight decay regularization for the network parameters (excluding biases).
+ * This applies weight decay with multiplying the learning rate - see {@link WeightDecay} + * for more details.
* - * @param params Object used to set parameters for this layer + * @param coefficient Weight decay regularization coefficient + * @see #weightDecay(double, boolean) */ - public abstract void defineParameters(SDLayerParams params); + public B weightDecay(double coefficient) { + return weightDecay(coefficient, true); + } /** - * Set the initial parameter values for this layer, if required + * Add weight decay regularization for the network parameters (excluding biases). See {@link + * WeightDecay} for more details.
* - * @param params Parameter arrays that may be initialized + * @param coefficient Weight decay regularization coefficient + * @param applyLR Whether the learning rate should be multiplied in when performing weight decay + * updates. See {@link WeightDecay} for more details. + * @see #weightDecay(double, boolean) */ - public abstract void initializeParameters(Map params); - - @Override - public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType); - - //================================================================================================================== - - @Override - public ParamInitializer initializer() { - return SameDiffParamInitializer.getInstance(); - } - - @Override - public IUpdater getUpdaterByParam(String paramName) { - if (biasUpdater != null && initializer().isBiasParam(this, paramName)) { - return biasUpdater; - } else if (initializer().isBiasParam(this, paramName) || initializer().isWeightParam(this, paramName)) { - return updater; - } - throw new IllegalStateException("Unknown parameter key: " + paramName); - } - - @Override - public boolean isPretrainParam(String paramName) { - return false; - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - return new LayerMemoryReport(); //TODO + public B weightDecay(double coefficient, boolean applyLR) { + // Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't + // make sense to use both + NetworkUtils.removeInstances(this.regularization, WeightDecay.class); + if (coefficient > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularization, + L2Regularization.class, + "L2 regularization removed: incompatible with added WeightDecay regularization"); + this.regularization.add(new WeightDecay(coefficient, applyLR)); + } + return self(); } /** - * Returns the memory layout ('c' or 'f' order - i.e., row/column major) of the parameters. In most cases, this - * can/should be left + * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. This + * applies weight decay with multiplying the learning rate.
* - * @param param Name of the parameter - * @return Memory layout ('c' or 'f') of the parameter + * @param coefficient Weight decay regularization coefficient + * @see #weightDecayBias(double, boolean) */ - public char paramReshapeOrder(String param) { - return 'c'; - } - - protected void initWeights(int fanIn, int fanOut, WeightInit weightInit, INDArray array) { - WeightInitUtil.initWeights(fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array); - } - - public void applyGlobalConfig(NeuralNetConfiguration.NeuralNetConfigurationBuilder b) { - NeuralNetConfiguration bConf = b.build(); - if (regularization == null || regularization.isEmpty()) { - regularization = bConf.getRegularization(); - } - if (regularizationBias == null || regularizationBias.isEmpty()) { - regularizationBias = bConf.getRegularizationBias(); - } - if (updater == null) { - updater = bConf.getUpdater(); - } - if (biasUpdater == null) { - biasUpdater = bConf.getBiasUpdater(); - } - if (gradientNormalization == null) { - gradientNormalization = bConf.getGradientNormalization(); - } - if (Double.isNaN(gradientNormalizationThreshold)) { - gradientNormalizationThreshold = bConf.getGradientNormalizationThreshold(); - } - - applyGlobalConfigToLayer(b); + public B weightDecayBias(double coefficient) { + return weightDecayBias(coefficient, true); } /** - * This method generates an "all ones" mask array for use in the SameDiff model when none is provided. - * @param input Input to the layer - * @return A mask array - should be same datatype as the input (usually) + * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
+ * + * @param coefficient Weight decay regularization coefficient */ - public INDArray onesMaskForInput(INDArray input){ - if(input.rank() == 2){ - return Nd4j.ones(input.dataType(), input.size(0), 1); - } else if(input.rank() == 3){ - return Nd4j.ones(input.dataType(), input.size(0), input.size(2)); //mask: [mb, length] vs. input [mb, nIn, length] - } else if(input.rank() == 4){ - //CNN style - return [mb, 1, 1, 1] for broadcast... - return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1); - } else if(input.rank() == 5){ - //CNN3D style - return [mb, 1, 1, 1, 1] for broadcast... - return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1, 1); - } else { - throw new IllegalStateException("When using masking with rank 1 or 6+ inputs, the onesMaskForInput method must be implemented, " + - "in order to determine the correct mask shape for this layer"); - } - } - - @Getter - @Setter - public static abstract class Builder> extends LayerConfiguration.Builder { - - protected List regularization = new ArrayList<>(); - protected List regularizationBias = new ArrayList<>(); - - /** - * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link - * org.nd4j.linalg.learning.config.Nesterovs} - * - */ - protected IUpdater updater = null; - - /** - * Gradient updater configuration, for the biases only. If not set, biases will use the updater as set by {@link - * #updater(IUpdater)} - * - */ - protected IUpdater biasUpdater = null; - - /** - * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization - * coefficient for the bias. - */ - public T l1(double l1) { - //Check if existing L1 exists; if so, replace it - NetworkUtils.removeInstances(this.regularization, L1Regularization.class); - if(l1 > 0.0) { - this.regularization.add(new L1Regularization(l1)); - } - return (T) this; - } - - /** - * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2 regularization - * coefficient for the bias.
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double,boolean)} should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
- */ - public T l2(double l2) { - //Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularization, L2Regularization.class); - if(l2 > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularization, WeightDecay.class, "WeightDecay regularization removed: incompatible with added L2 regularization"); - this.regularization.add(new L2Regularization(l2)); - } - return (T) this; - } - - /** - * L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)} - */ - public T l1Bias(double l1Bias) { - NetworkUtils.removeInstances(this.regularizationBias, L1Regularization.class); - if(l1Bias > 0.0) { - this.regularizationBias.add(new L1Regularization(l1Bias)); - } - return (T) this; - } - - /** - * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)}
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double,boolean)} should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
- */ - public T l2Bias(double l2Bias) { - NetworkUtils.removeInstances(this.regularizationBias, L2Regularization.class); - if(l2Bias > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularizationBias, WeightDecay.class, "WeightDecay bias regularization removed: incompatible with added L2 regularization"); - this.regularizationBias.add(new L2Regularization(l2Bias)); - } - return (T) this; - } - - /** - * Add weight decay regularization for the network parameters (excluding biases).
- * This applies weight decay with multiplying the learning rate - see {@link WeightDecay} for more details.
- * - * @param coefficient Weight decay regularization coefficient - * @see #weightDecay(double, boolean) - */ - public Builder weightDecay(double coefficient) { - return weightDecay(coefficient, true); - } - - /** - * Add weight decay regularization for the network parameters (excluding biases). See {@link WeightDecay} for more details.
- * - * @param coefficient Weight decay regularization coefficient - * @param applyLR Whether the learning rate should be multiplied in when performing weight decay updates. See {@link WeightDecay} for more details. - * @see #weightDecay(double, boolean) - */ - public Builder weightDecay(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularization, WeightDecay.class); - if(coefficient > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularization, L2Regularization.class, "L2 regularization removed: incompatible with added WeightDecay regularization"); - this.regularization.add(new WeightDecay(coefficient, applyLR)); - } - return this; - } - - /** - * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. - * This applies weight decay with multiplying the learning rate.
- * - * @param coefficient Weight decay regularization coefficient - * @see #weightDecayBias(double, boolean) - */ - public Builder weightDecayBias(double coefficient) { - return weightDecayBias(coefficient, true); - } - - /** - * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
- * - * @param coefficient Weight decay regularization coefficient - */ - public Builder weightDecayBias(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularizationBias, WeightDecay.class); - if(coefficient > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularizationBias, L2Regularization.class, "L2 bias regularization removed: incompatible with added WeightDecay regularization"); - this.regularizationBias.add(new WeightDecay(coefficient, applyLR)); - } - return this; - } - - /** - * Set the regularization for the parameters (excluding biases) - for example {@link WeightDecay}
- * - * @param regularization Regularization to apply for the network parameters/weights (excluding biases) - */ - public Builder regularization(List regularization) { - this.setRegularization(regularization); - return this; - } - - /** - * Set the regularization for the biases only - for example {@link WeightDecay}
- * - * @param regularizationBias Regularization to apply for the network biases only - */ - public Builder regularizationBias(List regularizationBias) { - this.setRegularizationBias(regularizationBias); - return this; - } - - /** - * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link - * org.nd4j.linalg.learning.config.Nesterovs} - * - * @param updater Updater to use - */ - public T updater(IUpdater updater) { - this.setUpdater(updater); - return (T) this; - } - - /** - * Gradient updater configuration, for the biases only. If not set, biases will use the updater as set by {@link - * #updater(IUpdater)} - * - * @param biasUpdater Updater to use for bias parameters - */ - public T biasUpdater(IUpdater biasUpdater) { - this.setBiasUpdater(biasUpdater); - return (T) this; - } + public B weightDecayBias(double coefficient, boolean applyLR) { + // Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't + // make sense to use both + NetworkUtils.removeInstances(this.regularizationBias, WeightDecay.class); + if (coefficient > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularizationBias, + L2Regularization.class, + "L2 bias regularization removed: incompatible with added WeightDecay regularization"); + this.regularizationBias.add(new WeightDecay(coefficient, applyLR)); + } + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java index 0b68bf649..83f41e663 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java @@ -20,13 +20,16 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Map; - +@NoArgsConstructor +@SuperBuilder public abstract class SameDiffLambdaLayer extends SameDiffLayer { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java index cb16d2f26..cd53db1fd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java @@ -20,7 +20,11 @@ package org.deeplearning4j.nn.conf.layers.samediff; -import lombok.*; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -30,30 +34,29 @@ import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; import java.util.Collection; import java.util.HashMap; import java.util.Map; -@Data + @EqualsAndHashCode(callSuper = true) +@Data +@NoArgsConstructor +@SuperBuilder public abstract class SameDiffLayer extends AbstractSameDiffLayer { - protected WeightInit weightInit; - protected Map paramWeightInit; + /** + * WeightInit, default is XAVIER. + */ + @Builder.Default + protected WeightInit weightInit = WeightInit.XAVIER; + @Builder.Default + protected Map paramWeightInit = new HashMap<>(); - protected SameDiffLayer(Builder builder) { - super(builder); - this.weightInit = builder.weightInit; - this.paramWeightInit = builder.paramWeightInit; - } - - protected SameDiffLayer() { - //No op constructor for Jackson - } /** * Define the layer @@ -87,6 +90,7 @@ public abstract class SameDiffLayer extends AbstractSameDiffLayer { Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.samediff.SameDiffLayer ret = new org.deeplearning4j.nn.layers.samediff.SameDiffLayer(lconf, networkDataType); @@ -97,29 +101,4 @@ public abstract class SameDiffLayer extends AbstractSameDiffLayer { ret.setLayerConfiguration(lconf); return ret; } - - - @SuppressWarnings("unchecked") - @Getter - @Setter - public static abstract class Builder> extends AbstractSameDiffLayer.Builder { - - protected WeightInit weightInit = WeightInit.XAVIER; - protected Map paramWeightInit; - - /** - * @param weightInit Weight initialization to use for the layer - */ - public T weightInit(WeightInit weightInit) { - this.setWeightInit(weightInit); - return (T) this; - } - - public T weightInit(@NonNull String param, @NonNull IWeightInit weightInit){ - if(paramWeightInit == null) - paramWeightInit = new HashMap<>(); - paramWeightInit.put(param, weightInit); - return (T) this; - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java index 8fa7fd4d0..7e8e0834a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.optimize.api.TrainingListener; @@ -30,13 +32,11 @@ import org.nd4j.linalg.api.ndarray.INDArray; import java.util.Collection; import java.util.Map; - +@NoArgsConstructor +@SuperBuilder public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer { - protected SameDiffOutputLayer() { - //No op constructor for Jackson - } /** * Define the output layer diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java index accc675d0..9d6144c90 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java @@ -176,7 +176,7 @@ public abstract class SameDiffVertex extends GraphVertex implements ITraininable } @Override - public String getLayerName() { + public String getName() { return name; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java index bd39eb828..50b71e837 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers.util; import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -38,7 +39,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; -@NoArgsConstructor +@SuperBuilder public class MaskLayer extends NoParamLayer { @Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java index 18f9cadc1..3161bfb02 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers.util; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -34,26 +35,17 @@ import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Collection; -@Data -@EqualsAndHashCode(callSuper = false) -public class MaskZeroLayer extends BaseWrapperLayerConfiguration { +@EqualsAndHashCode(callSuper = false) +@SuperBuilder +public class MaskZeroLayer extends BaseWrapperLayerConfiguration { +@Builder.Default @Getter @Setter private double maskingValue = 0.0; private static final long serialVersionUID = 9074525846200921839L; - public MaskZeroLayer(Builder builder) { - super(builder); - this.underlying = builder.underlying; - this.maskingValue = builder.maskValue; - } - public MaskZeroLayer(@JsonProperty("underlying") LayerConfiguration underlying, @JsonProperty("maskingValue") double maskingValue) { - this.underlying = underlying; - this.maskingValue = maskingValue; - } - @Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, @@ -99,39 +91,6 @@ public class MaskZeroLayer extends BaseWrapperLayerConfiguration { } - @NoArgsConstructor - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - private LayerConfiguration underlying; - private double maskValue; - - public Builder setUnderlying(LayerConfiguration underlying) { - this.underlying = underlying; - return this; - } - - public Builder setMaskValue(double maskValue) { - this.maskValue = maskValue; - return this; - } - - public Builder underlying(LayerConfiguration underlying){ - setUnderlying(underlying); - return this; - } - - public Builder maskValue(double maskValue){ - setMaskValue(maskValue); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public MaskZeroLayer build() { - return new MaskZeroLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java index a4cf67c79..53dd8dffb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers.variational; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -32,6 +35,7 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.VariationalAutoencoderParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.common.util.ArrayUtil; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationIdentity; @@ -39,316 +43,255 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.common.util.ArrayUtil; - -import java.util.Collection; -import java.util.Map; @Data -@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class VariationalAutoencoder extends BasePretrainNetwork { - private int[] encoderLayerSizes; - private int[] decoderLayerSizes; - private ReconstructionDistribution outputDistribution; - private IActivation pzxActivationFn; - private int numSamples; + /** + * Size of the encoder layers, in units. Each encoder layer is functionally equivalent to a {@link + * org.deeplearning4j.nn.conf.layers.DenseLayer}. Typically the number and size of the decoder + * layers (set via {@link VariationalAutoencoderBuilder#encoderLayerSizes(int...)} is similar to the encoder layers. + */ + @Builder.Default private int[] encoderLayerSizes = new int[] {100}; + /** + * Size of the decoder layers, in units. Each decoder layer is functionally equivalent to a {@link + * org.deeplearning4j.nn.conf.layers.DenseLayer}. Typically the number and size of the decoder + * layers is similar to the encoder layers (set via {@link VariationalAutoencoderBuilder#encoderLayerSizes(int...)}. + */ + @Builder.Default private int[] decoderLayerSizes = new int[] {100}; + /** + * The reconstruction distribution for the data given the hidden state - i.e., P(data|Z).
+ * This should be selected carefully based on the type of data being modelled. For example:
+ * - {@link GaussianReconstructionDistribution} + {identity or tanh} for real-valued (Gaussian) + * data
+ * - {@link BernoulliReconstructionDistribution} + sigmoid for binary-valued (0 or 1) data
+ */ + @Builder.Default + private ReconstructionDistribution outputDistribution = + new GaussianReconstructionDistribution(Activation.TANH); + /** + * Activation function for the input to P(z|data).
+ * Care should be taken with this, as some activation functions (relu, etc) are not suitable due + * to being bounded in range [0,infinity). + */ + @Builder.Default private IActivation pzxActivationFunction = new ActivationIdentity(); + /** + * Set the number of samples per data point (from VAE state Z) used when doing pretraining. + * Default value: 1. + * + *

This is parameter L from Kingma and Welling: "In our experiments we found that the number of + * samples L per datapoint can be set to 1 as long as the minibatch size M was large enough, e.g. + * M = 100." + */ + @Builder.Default private int numSamples = 1; - private VariationalAutoencoder(Builder builder) { - super(builder); - this.encoderLayerSizes = builder.encoderLayerSizes; - this.decoderLayerSizes = builder.decoderLayerSizes; - this.outputDistribution = builder.outputDistribution; - this.pzxActivationFn = builder.pzxActivationFn; - this.numSamples = builder.numSamples; + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerValidation.assertNInNOutSet( + "VariationalAutoencoder", getName(), layerIndex, getNIn(), getNOut()); + + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.variational.VariationalAutoencoder ret = + new org.deeplearning4j.nn.layers.variational.VariationalAutoencoder(lconf, networkDataType); + lconf.runInheritance(); + + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return VariationalAutoencoderParamInitializer.getInstance(); + } + + @Override + public boolean isPretrainParam(String paramName) { + if (paramName.startsWith(VariationalAutoencoderParamInitializer.DECODER_PREFIX)) { + return true; + } + if (paramName.startsWith(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_PREFIX)) { + return true; + } + return paramName.startsWith(VariationalAutoencoderParamInitializer.PXZ_PREFIX); + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + // For training: we'll assume unsupervised pretraining, as this has higher memory requirements + + InputType outputType = getOutputType(-1, inputType); + + val actElementsPerEx = outputType.arrayElementsPerExample(); + val numParams = initializer().numParams(this); + int updaterStateSize = (int) getIUpdater().stateSize(numParams); + + int inferenceWorkingMemSizePerEx = 0; + // Forward pass size through the encoder: + for (int i = 1; i < encoderLayerSizes.length; i++) { + inferenceWorkingMemSizePerEx += encoderLayerSizes[i]; } - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("VariationalAutoencoder", getLayerName(), layerIndex, getNIn(), getNOut()); + // Forward pass size through the decoder, during training + // p(Z|X) mean and stdev; pzxSigmaSquared, pzxSigma -> all size equal to nOut + long decoderFwdSizeWorking = 4 * nOut; + // plus, nSamples * decoder size + // For each decoding: random sample (nOut), z (nOut), activations for each decoder layer + decoderFwdSizeWorking += numSamples * (2 * nOut + ArrayUtil.sum(getDecoderLayerSizes())); + // Plus, component of score + decoderFwdSizeWorking += nOut; - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - org.deeplearning4j.nn.layers.variational.VariationalAutoencoder ret = - new org.deeplearning4j.nn.layers.variational.VariationalAutoencoder(lconf, networkDataType); - lconf.runInheritance(); + // Backprop size through the decoder and decoder: approx. 2x forward pass size + long trainWorkingMemSize = 2 * (inferenceWorkingMemSizePerEx + decoderFwdSizeWorking); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + if (getDropOut() != null) { + if (false) { + // TODO drop connect + // Dup the weights... note that this does NOT depend on the minibatch size... + } else { + // Assume we dup the input + trainWorkingMemSize += inputType.arrayElementsPerExample(); + } } - @Override - public ParamInitializer initializer() { - return VariationalAutoencoderParamInitializer.getInstance(); + return new LayerMemoryReport.Builder(name, VariationalAutoencoder.class, inputType, outputType) + .standardMemory(numParams, updaterStateSize) + .workingMemory(0, inferenceWorkingMemSizePerEx, 0, trainWorkingMemSize) + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + public static abstract class VariationalAutoencoderBuilder< + C extends VariationalAutoencoder, B extends VariationalAutoencoderBuilder> + extends BasePretrainNetworkBuilder { + + /** + * Size of the encoder layers, in units. Each encoder layer is functionally equivalent to a + * {@link org.deeplearning4j.nn.conf.layers.DenseLayer}. Typically the number and size of the + * decoder layers (set via {@link #decoderLayerSizes(int...)} is similar to the encoder layers. + * + * @param encoderLayerSizes Size of each encoder layer in the variational autoencoder + */ + public B encoderLayerSizes(int... encoderLayerSizes) { + if (encoderLayerSizes == null || encoderLayerSizes.length < 1) { + throw new IllegalArgumentException("Encoder layer sizes array must have length > 0"); + } + this.encoderLayerSizes$value = encoderLayerSizes; + this.encoderLayerSizes$set = true; + return self(); } - @Override - public boolean isPretrainParam(String paramName) { - if (paramName.startsWith(VariationalAutoencoderParamInitializer.DECODER_PREFIX)) { - return true; - } - if (paramName.startsWith(VariationalAutoencoderParamInitializer.PZX_LOGSTD2_PREFIX)) { - return true; - } - return paramName.startsWith(VariationalAutoencoderParamInitializer.PXZ_PREFIX); + /** + * Size of the decoder layers, in units. Each decoder layer is functionally equivalent to a + * {@link org.deeplearning4j.nn.conf.layers.DenseLayer}. Typically the number and size of the + * decoder layers is similar to the encoder layers (set via {@link #encoderLayerSizes(int...)}. + * + * @param decoderLayerSizes Size of each deccoder layer in the variational autoencoder + */ + public B decoderLayerSizes(int... decoderLayerSizes) { + if (decoderLayerSizes == null || decoderLayerSizes.length < 1) { + throw new IllegalArgumentException("Decoder layer sizes array must have length > 0"); + } + this.decoderLayerSizes$value = decoderLayerSizes; + this.decoderLayerSizes$set = true; + return self(); } - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - //For training: we'll assume unsupervised pretraining, as this has higher memory requirements - - InputType outputType = getOutputType(-1, inputType); - - val actElementsPerEx = outputType.arrayElementsPerExample(); - val numParams = initializer().numParams(this); - int updaterStateSize = (int) getIUpdater().stateSize(numParams); - - int inferenceWorkingMemSizePerEx = 0; - //Forward pass size through the encoder: - for (int i = 1; i < encoderLayerSizes.length; i++) { - inferenceWorkingMemSizePerEx += encoderLayerSizes[i]; - } - - //Forward pass size through the decoder, during training - //p(Z|X) mean and stdev; pzxSigmaSquared, pzxSigma -> all size equal to nOut - long decoderFwdSizeWorking = 4 * nOut; - //plus, nSamples * decoder size - //For each decoding: random sample (nOut), z (nOut), activations for each decoder layer - decoderFwdSizeWorking += numSamples * (2 * nOut + ArrayUtil.sum(getDecoderLayerSizes())); - //Plus, component of score - decoderFwdSizeWorking += nOut; - - //Backprop size through the decoder and decoder: approx. 2x forward pass size - long trainWorkingMemSize = 2 * (inferenceWorkingMemSizePerEx + decoderFwdSizeWorking); - - if (getIDropout() != null) { - if (false) { - //TODO drop connect - //Dup the weights... note that this does NOT depend on the minibatch size... - } else { - //Assume we dup the input - trainWorkingMemSize += inputType.arrayElementsPerExample(); - } - } - - return new LayerMemoryReport.Builder(layerName, VariationalAutoencoder.class, inputType, outputType) - .standardMemory(numParams, updaterStateSize) - .workingMemory(0, inferenceWorkingMemSizePerEx, 0, trainWorkingMemSize) - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); + /** + * The reconstruction distribution for the data given the hidden state - i.e., P(data|Z).
+ * This should be selected carefully based on the type of data being modelled. For example:
+ * - {@link GaussianReconstructionDistribution} + {identity or tanh} for real-valued (Gaussian) + * data
+ * - {@link BernoulliReconstructionDistribution} + sigmoid for binary-valued (0 or 1) data
+ * + * @param distribution Reconstruction distribution + */ + public B reconstructionDistribution(ReconstructionDistribution distribution) { + this.outputDistribution(distribution); + return self(); } - @Getter - @Setter - public static class Builder extends BasePretrainNetwork.Builder { - - /** - * Size of the encoder layers, in units. Each encoder layer is functionally equivalent to a {@link - * org.deeplearning4j.nn.conf.layers.DenseLayer}. Typically the number and size of the decoder layers (set via - * {@link #decoderLayerSizes(int...)} is similar to the encoder layers. - * - */ - private int[] encoderLayerSizes = new int[] {100}; - - /** - * Size of the decoder layers, in units. Each decoder layer is functionally equivalent to a {@link - * org.deeplearning4j.nn.conf.layers.DenseLayer}. Typically the number and size of the decoder layers is similar - * to the encoder layers (set via {@link #encoderLayerSizes(int...)}. - * - */ - private int[] decoderLayerSizes = new int[] {100}; - /** - * The reconstruction distribution for the data given the hidden state - i.e., P(data|Z).
This should be - * selected carefully based on the type of data being modelled. For example:
- {@link - * GaussianReconstructionDistribution} + {identity or tanh} for real-valued (Gaussian) data
- {@link - * BernoulliReconstructionDistribution} + sigmoid for binary-valued (0 or 1) data
- * - */ - private ReconstructionDistribution outputDistribution = new GaussianReconstructionDistribution(Activation.TANH); - - /** - * Activation function for the input to P(z|data).
Care should be taken with this, as some activation - * functions (relu, etc) are not suitable due to being bounded in range [0,infinity). - * - */ - private IActivation pzxActivationFn = new ActivationIdentity(); - - /** - * Set the number of samples per data point (from VAE state Z) used when doing pretraining. Default value: 1. - *

- * This is parameter L from Kingma and Welling: "In our experiments we found that the number of samples L per - * datapoint can be set to 1 as long as the minibatch size M was large enough, e.g. M = 100." - * - */ - private int numSamples = 1; - - - /** - * Size of the encoder layers, in units. Each encoder layer is functionally equivalent to a {@link - * org.deeplearning4j.nn.conf.layers.DenseLayer}. Typically the number and size of the decoder layers (set via - * {@link #decoderLayerSizes(int...)} is similar to the encoder layers. - * - * @param encoderLayerSizes Size of each encoder layer in the variational autoencoder - */ - public Builder encoderLayerSizes(int... encoderLayerSizes) { - this.setEncoderLayerSizes(encoderLayerSizes); - return this; - } - - - /** - * Size of the encoder layers, in units. Each encoder layer is functionally equivalent to a {@link - * org.deeplearning4j.nn.conf.layers.DenseLayer}. Typically the number and size of the decoder layers (set via - * {@link #decoderLayerSizes(int...)} is similar to the encoder layers. - * - * @param encoderLayerSizes Size of each encoder layer in the variational autoencoder - */ - public void setEncoderLayerSizes(int... encoderLayerSizes) { - if (encoderLayerSizes == null || encoderLayerSizes.length < 1) { - throw new IllegalArgumentException("Encoder layer sizes array must have length > 0"); - } - this.encoderLayerSizes = encoderLayerSizes; - } - - - /** - * Size of the decoder layers, in units. Each decoder layer is functionally equivalent to a {@link - * org.deeplearning4j.nn.conf.layers.DenseLayer}. Typically the number and size of the decoder layers is similar - * to the encoder layers (set via {@link #encoderLayerSizes(int...)}. - * - * @param decoderLayerSizes Size of each deccoder layer in the variational autoencoder - */ - public Builder decoderLayerSizes(int... decoderLayerSizes) { - this.setDecoderLayerSizes(decoderLayerSizes); - return this; - } - - /** - * Size of the decoder layers, in units. Each decoder layer is functionally equivalent to a {@link - * org.deeplearning4j.nn.conf.layers.DenseLayer}. Typically the number and size of the decoder layers is similar - * to the encoder layers (set via {@link #encoderLayerSizes(int...)}. - * - * @param decoderLayerSizes Size of each deccoder layer in the variational autoencoder - */ - public void setDecoderLayerSizes(int... decoderLayerSizes) { - if (decoderLayerSizes == null || decoderLayerSizes.length < 1) { - throw new IllegalArgumentException("Decoder layer sizes array must have length > 0"); - } - this.decoderLayerSizes = decoderLayerSizes; - } - - - /** - * The reconstruction distribution for the data given the hidden state - i.e., P(data|Z).
This should be - * selected carefully based on the type of data being modelled. For example:
- {@link - * GaussianReconstructionDistribution} + {identity or tanh} for real-valued (Gaussian) data
- {@link - * BernoulliReconstructionDistribution} + sigmoid for binary-valued (0 or 1) data
- * - * @param distribution Reconstruction distribution - */ - public Builder reconstructionDistribution(ReconstructionDistribution distribution) { - this.setOutputDistribution(distribution); - return this; - } - - /** - * Configure the VAE to use the specified loss function for the reconstruction, instead of a - * ReconstructionDistribution. Note that this is NOT following the standard VAE design (as per Kingma & - * Welling), which assumes a probabilistic output - i.e., some p(x|z). It is however a valid network - * configuration, allowing for optimization of more traditional objectives such as mean squared error.
Note: - * clearly, setting the loss function here will override any previously set recontruction distribution - * - * @param outputActivationFn Activation function for the output/reconstruction - * @param lossFunction Loss function to use - */ - public Builder lossFunction(IActivation outputActivationFn, LossFunctions.LossFunction lossFunction) { - return lossFunction(outputActivationFn, lossFunction.getILossFunction()); - } - - /** - * Configure the VAE to use the specified loss function for the reconstruction, instead of a - * ReconstructionDistribution. Note that this is NOT following the standard VAE design (as per Kingma & - * Welling), which assumes a probabilistic output - i.e., some p(x|z). It is however a valid network - * configuration, allowing for optimization of more traditional objectives such as mean squared error.
Note: - * clearly, setting the loss function here will override any previously set recontruction distribution - * - * @param outputActivationFn Activation function for the output/reconstruction - * @param lossFunction Loss function to use - */ - public Builder lossFunction(Activation outputActivationFn, LossFunctions.LossFunction lossFunction) { - return lossFunction(outputActivationFn.getActivationFunction(), lossFunction.getILossFunction()); - } - - /** - * Configure the VAE to use the specified loss function for the reconstruction, instead of a - * ReconstructionDistribution. Note that this is NOT following the standard VAE design (as per Kingma & - * Welling), which assumes a probabilistic output - i.e., some p(x|z). It is however a valid network - * configuration, allowing for optimization of more traditional objectives such as mean squared error.
Note: - * clearly, setting the loss function here will override any previously set recontruction distribution - * - * @param outputActivationFn Activation function for the output/reconstruction - * @param lossFunction Loss function to use - */ - public Builder lossFunction(IActivation outputActivationFn, ILossFunction lossFunction) { - return reconstructionDistribution(new LossFunctionWrapper(outputActivationFn, lossFunction)); - } - - /** - * Activation function for the input to P(z|data).
Care should be taken with this, as some activation - * functions (relu, etc) are not suitable due to being bounded in range [0,infinity). - * - * @param activationFunction Activation function for p(z|x) - */ - public Builder pzxActivationFn(IActivation activationFunction) { - this.setPzxActivationFn(activationFunction); - return this; - } - - /** - * Activation function for the input to P(z|data).
Care should be taken with this, as some activation - * functions (relu, etc) are not suitable due to being bounded in range [0,infinity). - * - * @param activation Activation function for p(z|x) - */ - public Builder pzxActivationFunction(Activation activation) { - return pzxActivationFn(activation.getActivationFunction()); - } - - /** - * Set the size of the VAE state Z. This is the output size during standard forward pass, and the size of the - * distribution P(Z|data) during pretraining. - * - * @param nOut Size of P(Z|data) and output size - */ - @Override - public Builder nOut(int nOut) { - super.nOut(nOut); - return this; - } - - /** - * Set the number of samples per data point (from VAE state Z) used when doing pretraining. Default value: 1. - *

- * This is parameter L from Kingma and Welling: "In our experiments we found that the number of samples L per - * datapoint can be set to 1 as long as the minibatch size M was large enough, e.g. M = 100." - * - * @param numSamples Number of samples per data point for pretraining - */ - public Builder numSamples(int numSamples) { - this.setNumSamples(numSamples); - return this; - } - - @Override - @SuppressWarnings("unchecked") - public VariationalAutoencoder build() { - return new VariationalAutoencoder(this); - } + /** + * Configure the VAE to use the specified loss function for the reconstruction, instead of a + * ReconstructionDistribution. Note that this is NOT following the standard VAE design (as per + * Kingma & Welling), which assumes a probabilistic output - i.e., some p(x|z). It is however a + * valid network configuration, allowing for optimization of more traditional objectives such as + * mean squared error.
+ * Note: clearly, setting the loss function here will override any previously set recontruction + * distribution + * + * @param outputActivationFn Activation function for the output/reconstruction + * @param lossFunction Loss function to use + */ + public B lossFunction(IActivation outputActivationFn, LossFunctions.LossFunction lossFunction) { + return lossFunction(outputActivationFn, lossFunction.getILossFunction()); } + + /** + * Configure the VAE to use the specified loss function for the reconstruction, instead of a + * ReconstructionDistribution. Note that this is NOT following the standard VAE design (as per + * Kingma & Welling), which assumes a probabilistic output - i.e., some p(x|z). It is however a + * valid network configuration, allowing for optimization of more traditional objectives such as + * mean squared error.
+ * Note: clearly, setting the loss function here will override any previously set recontruction + * distribution + * + * @param outputActivationFn Activation function for the output/reconstruction + * @param lossFunction Loss function to use + */ + public B lossFunction(Activation outputActivationFn, LossFunctions.LossFunction lossFunction) { + return lossFunction( + outputActivationFn.getActivationFunction(), lossFunction.getILossFunction()); + } + + /** + * Configure the VAE to use the specified loss function for the reconstruction, instead of a + * ReconstructionDistribution. Note that this is NOT following the standard VAE design (as per + * Kingma & Welling), which assumes a probabilistic output - i.e., some p(x|z). It is however a + * valid network configuration, allowing for optimization of more traditional objectives such as + * mean squared error.
+ * Note: clearly, setting the loss function here will override any previously set recontruction + * distribution + * + * @param outputActivationFn Activation function for the output/reconstruction + * @param lossFunction Loss function to use + */ + public B lossFunction(IActivation outputActivationFn, ILossFunction lossFunction) { + return reconstructionDistribution(new LossFunctionWrapper(outputActivationFn, lossFunction)); + } + + /** + * Set the size of the VAE state Z. This is the output size during standard forward pass, and the size of the + * distribution P(Z|data) during pretraining. + * + * @param nOut Size of P(Z|data) and output size + */ + public B nOut(int nOut) { + super.nOut(nOut); + return self(); + } + + public B pzxActivationFunction(IActivation activation) { + this.pzxActivationFunction$value = activation; + this.pzxActivationFunction$set = true; + return self(); + } + public B pzxActivationFunction(Activation activation) { + return this.pzxActivationFunction(activation.getActivationFunction()); + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java index 74b71de1f..bfa864c29 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/wrapper/BaseWrapperLayerConfiguration.java @@ -21,10 +21,11 @@ package org.deeplearning4j.nn.conf.layers.wrapper; import java.util.List; -import lombok.Data; import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.dropout.IDropout; @@ -37,27 +38,14 @@ import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -@Data @EqualsAndHashCode(callSuper = false) +@SuperBuilder(builderMethodName = "innerBuilder") public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration { - /** - * The configuration to of another layer to wrap - */ + /** The configuration to of another layer to wrap */ + @Getter @Setter protected LayerConfiguration underlying; - protected BaseWrapperLayerConfiguration(Builder builder) { - super(builder); - } - - protected BaseWrapperLayerConfiguration() { - } - - public BaseWrapperLayerConfiguration(LayerConfiguration underlying) { - this.underlying = underlying; - this.setNetConfiguration(underlying.getNetConfiguration()); - } - /** * Set the net configuration for this configuration as well as for the underlying layer (if not * null there) @@ -68,8 +56,8 @@ public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration { public void setNetConfiguration(NeuralNetConfiguration netConfiguration) { super.setNetConfiguration(netConfiguration); if (underlying.getNetConfiguration() == null) { - underlying.setNetConfiguration( - netConfiguration); //also set netconf for underlying if not set + underlying.setNetConfiguration( + netConfiguration); // also set netconf for underlying if not set } } @@ -85,24 +73,24 @@ public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration { * @return */ @Override - public IDropout getIDropout() { - return underlying.getIDropout(); - } - - /** - * @param activationFn - */ - @Override - public void setActivationFn(IActivation activationFn) { - underlying.setActivationFn(activationFn); + public IDropout getDropOut() { + return underlying.getDropOut(); } /** * @param iDropout */ @Override - public void setIDropout(IDropout iDropout) { - underlying.setIDropout(iDropout); + public void setDropOut(IDropout iDropout) { + underlying.setDropOut(iDropout); + } + + /** + * @param activationFn + */ + @Override + public void setActivation(IActivation activationFn) { + underlying.setActivation(activationFn); } /** @@ -133,14 +121,6 @@ public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration { return underlying.getUpdaterByParam(paramName); } - /** - * @param iUpdater - */ - @Override - public void setIUpdater(IUpdater iUpdater) { - underlying.setIUpdater(iUpdater); - } - /** * @return */ @@ -149,6 +129,14 @@ public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration { return underlying.getIUpdater(); } + /** + * @param iUpdater + */ + @Override + public void setIUpdater(IUpdater iUpdater) { + underlying.setIUpdater(iUpdater); + } + @Override public ParamInitializer initializer() { return WrapperLayerParamInitializer.getInstance(); @@ -185,12 +173,11 @@ public abstract class BaseWrapperLayerConfiguration extends LayerConfiguration { } @Override - public void setLayerName(String layerName) { - super.setLayerName(layerName); + public void setName(String layerName) { + super.setName(layerName); if (underlying != null) { - //May be null at some points during JSON deserialization - underlying.setLayerName(layerName); + // May be null at some points during JSON deserialization + underlying.setName(layerName); } } - } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java index c2e149c25..9e9f123bc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java @@ -35,7 +35,7 @@ public class DummyConfig implements ITraininableLayerConfiguration { private final String name; @Override - public String getLayerName() { + public String getName() { return name; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java index 34a888303..fb7757e87 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java @@ -20,7 +20,12 @@ package org.deeplearning4j.nn.conf.ocnn; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.Collection; +import java.util.List; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -29,241 +34,102 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerValidation; import org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.IActivation; -import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.regularization.Regularization; -import org.nd4j.linalg.lossfunctions.ILossFunction; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; -import java.util.Collection; -import java.util.List; -import java.util.Map; - -@Data -@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties("lossFn") +@SuperBuilder public class OCNNOutputLayer extends BaseOutputLayer { - //embedded hidden layer size - //aka "K" - private int hiddenSize; + /** + * The hidden layer size for the one class neural network. Note this would be nOut on a dense + * layer. NOut in this neural net is always set to 1 though. + */ + @Builder.Default @Getter private int hiddenLayerSize; // embedded hidden layer size aka "K" + /** For nu definition see the paper */ + @Builder.Default @Getter private double nu = 0.04; + /** + * The number of examples to use for computing the quantile for the r value update. This value + * should generally be the same as the number of examples in the dataset + */ + @Builder.Default @Getter private int windowSize = 10000; + /** + * The initial r value to use for ocnn for definition, see the paper, note this is only active + * when {@link #configureR} is specified as true + */ + @Builder.Default @Getter private double initialRValue = 0.1; + /** + * Whether to use the specified {@link #initialRValue} or use the weight initialization with the + * neural network for the r value + */ + @Builder.Default @Getter private boolean configureR = true; + /** + * Psuedo code from keras: start_time = time.time() for epoch in range(100): # Train with each + * example sess.run(updates, feed_dict={X: train_X,r:rvalue}) rvalue = nnScore(train_X, w_1, w_2, + * g) with sess.as_default(): rvalue = rvalue.eval() rvalue = np.percentile(rvalue,q=100*nu) + * print("Epoch = %d, r = %f" % (epoch + 1,rvalue)) + */ + @Builder.Default @Getter @Setter private int lastEpochSinceRUpdated = 0; - private double nu = 0.04; - - private int windowSize = 10000; - - private double initialRValue = 0.1; - - private boolean configureR = true; - - /** - * Psuedo code from keras: start_time = time.time() for epoch in range(100): # Train with each example - * sess.run(updates, feed_dict={X: train_X,r:rvalue}) rvalue = nnScore(train_X, w_1, w_2, g) with sess.as_default(): - * rvalue = rvalue.eval() rvalue = np.percentile(rvalue,q=100*nu) print("Epoch = %d, r = %f" % (epoch + 1,rvalue)) - */ - private int lastEpochSinceRUpdated = 0; - - public OCNNOutputLayer(Builder builder) { - super(builder); - this.hiddenSize = builder.hiddenLayerSize; - this.nu = builder.nu; - setActivationFn( builder.activation) ; - this.windowSize = builder.windowSize; - this.initialRValue = builder.initialRValue; - this.configureR = builder.configureR; + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerValidation.assertNInNOutSet("OCNNOutputLayer", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.ocnn.OCNNOutputLayer ret = + new org.deeplearning4j.nn.layers.ocnn.OCNNOutputLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + ret.setActivation(getActivationFn()); + if (lastEpochSinceRUpdated == 0 && configureR) { + paramTable.get(OCNNParamInitializer.R_KEY).putScalar(0, initialRValue); } + return ret; + } - @JsonCreator - @SuppressWarnings("unused") - public OCNNOutputLayer(@JsonProperty("hiddenSize") int hiddenSize, @JsonProperty("nu") double nu, - @JsonProperty("activation") IActivation activation, @JsonProperty("windowSize") int windowSize, - @JsonProperty("initialRValue") double initialRValue, - @JsonProperty("configureR") boolean configureR) { - this.hiddenSize = hiddenSize; - this.nu = nu; - setActivationFn( activation); - this.windowSize = windowSize; - this.initialRValue = initialRValue; - this.configureR = configureR; + @Override + public long getNOut() { + // we don't change number of outputs here + return 1L; + } + + @Override + public ParamInitializer initializer() { + return OCNNParamInitializer.getInstance(); + } + + @Override + public List getRegularizationByParam(String paramName) { + // Not applicable + return null; + } + + public static abstract class OCNNOutputLayerBuilder< + C extends OCNNOutputLayer, B extends OCNNOutputLayerBuilder> + extends BaseOutputLayerBuilder { + + public B nOut(int nOut) { + return nOut(Long.valueOf(nOut)); } @Override - public ILossFunction getLossFn() { - return lossFn; - } - - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerValidation.assertNInNOutSet("OCNNOutputLayer", getLayerName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - - org.deeplearning4j.nn.layers.ocnn.OCNNOutputLayer ret = - new org.deeplearning4j.nn.layers.ocnn.OCNNOutputLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - ret.setActivation(getActivationFn()); - if (lastEpochSinceRUpdated == 0 && configureR) { - paramTable.get(OCNNParamInitializer.R_KEY).putScalar(0, initialRValue); - } - return ret; - } - - @Override - public long getNOut() { - //we don't change number of outputs here - return 1L; - } - - @Override - public ParamInitializer initializer() { - return OCNNParamInitializer.getInstance(); - } - - - @Override - public List getRegularizationByParam(String paramName) { - //Not applicable - return null; - } - - @Getter - @Setter - @NoArgsConstructor - public static class Builder extends BaseOutputLayer.Builder { - - /** - * The hidden layer size for the one class neural network. Note this would be nOut on a dense layer. NOut in - * this neural net is always set to 1 though. - * - */ - protected int hiddenLayerSize; - - /** - * For nu definition see the paper - * - */ - protected double nu = 0.04; - - /** - * The number of examples to use for computing the quantile for the r value update. This value should generally - * be the same as the number of examples in the dataset - * - */ - protected int windowSize = 10000; - - /** - * The activation function to use with ocnn - * - */ - protected IActivation activation = new ActivationIdentity(); - - /** - * The initial r value to use for ocnn for definition, see the paper, note this is only active when {@link - * #configureR} is specified as true - */ - protected double initialRValue = 0.1; - - /** - * Whether to use the specified {@link #initialRValue} or use the weight initialization with the neural network - * for the r value - */ - protected boolean configureR = true; - - /** - * Whether to use the specified {@link #initialRValue} or use the weight initialization with the neural network - * for the r value - * - * @param configureR true if we should use the initial {@link #initialRValue} - */ - public Builder configureR(boolean configureR) { - this.setConfigureR(configureR); - return this; - } - - - /** - * The initial r value to use for ocnn for definition, see the paper, note this is only active when {@link - * #configureR} is specified as true - * - * @param initialRValue the int - */ - public Builder initialRValue(double initialRValue) { - this.setInitialRValue(initialRValue); - return this; - } - - /** - * The number of examples to use for computing the quantile for the r value update. This value should generally - * be the same as the number of examples in the dataset - * - * @param windowSize the number of examples to use for computing the quantile of the dataset for the r value - * update - */ - public Builder windowSize(int windowSize) { - this.setWindowSize(windowSize); - return this; - } - - - /** - * For nu definition see the paper - * - * @param nu the nu for ocnn - */ - public Builder nu(double nu) { - this.setNu(nu); - return this; - } - - /** - * The activation function to use with ocnn - * - * @param activation the activation function to sue - */ - public Builder activation(IActivation activation) { - this.setActivation(activation); - return this; - } - - /** - * The hidden layer size for the one class neural network. Note this would be nOut on a dense layer. NOut in - * this neural net is always set to 1 though. - * - * @param hiddenLayerSize the hidden layer size to use with ocnn - */ - public Builder hiddenLayerSize(int hiddenLayerSize) { - this.setHiddenLayerSize(hiddenLayerSize); - return this; - } - - @Override - public Builder nOut(int nOut) { - throw new UnsupportedOperationException( - "Unable to specify number of outputs with ocnn. Outputs are fixed to 1."); - } - - @Override - public void setNOut(long nOut){ - throw new UnsupportedOperationException( - "Unable to specify number of outputs with ocnn. Outputs are fixed to 1."); - } - - @Override - @SuppressWarnings("unchecked") - public OCNNOutputLayer build() { - return new OCNNOutputLayer(this); - } + public B nOut(long nOut) { + throw new UnsupportedOperationException( + "Unable to specify number of outputs with ocnn. Outputs are fixed to 1."); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java index 24a17c263..3099ffc00 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java @@ -78,7 +78,7 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im protected boolean requiresDropoutFromLegacy(LayerConfiguration[] layers){ for(LayerConfiguration l : layers){ - if(l.getIDropout() != null){ + if(l.getDropOut() != null){ return false; } } @@ -117,7 +117,7 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im protected boolean requiresLegacyLossHandling(LayerConfiguration[] layers){ for(LayerConfiguration l : layers){ - if(l instanceof BaseOutputLayer && ((BaseOutputLayer)l).getLossFn() == null){ + if(l instanceof BaseOutputLayer && ((BaseOutputLayer)l).getLossFunction() == null){ return true; } } @@ -277,13 +277,13 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im | InvocationTargetException instantiationException){ log.error(instantiationException.getMessage()); } - baseLayerConfiguration.setActivationFn(a); + baseLayerConfiguration.setActivation(a); } } //0.5.0 and earlier: loss function was an enum like "lossFunction" : "NEGATIVELOGLIKELIHOOD", protected void handleLossBackwardCompatibility(BaseOutputLayer baseLayer, ObjectNode on){ - if(baseLayer.getLossFn() == null && on.has("activationFunction")) { + if(baseLayer.getLossFunction() == null && on.has("activationFunction")) { String lfn = on.get("lossFunction").asText(); ILossFunction loss = null; switch (lfn) { @@ -302,7 +302,7 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im case "XENT": loss = new LossBinaryXENT(); } - baseLayer.setLossFn(loss); + baseLayer.setLossFunction(loss); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java index 9f93c43e0..e3a6ba1cd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java @@ -42,10 +42,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; import java.io.StringReader; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; public class ComputationGraphConfigurationDeserializer @@ -138,11 +135,11 @@ public class ComputationGraphConfigurationDeserializer handleActivationBackwardCompatibility((BaseLayerConfiguration)layers[layerIdx], (ObjectNode)next); } - if(requiresLegacyLossHandling && layers[layerIdx] instanceof BaseOutputLayer && ((BaseOutputLayer)layers[layerIdx]).getLossFn() == null){ + if(requiresLegacyLossHandling && layers[layerIdx] instanceof BaseOutputLayer && ((BaseOutputLayer)layers[layerIdx]).getLossFunction() == null){ handleLossBackwardCompatibility((BaseOutputLayer) layers[layerIdx], (ObjectNode)next); } - if(layers[layerIdx].getIDropout() == null){ + if(layers[layerIdx].getDropOut() == null){ //Check for legacy dropout if(next.has("dropOut")){ double d = next.get("dropOut").asDouble(); @@ -152,7 +149,7 @@ public class ComputationGraphConfigurationDeserializer && confNode.get("useDropConnect").asBoolean(false)){ ((BaseLayerConfiguration)layers[layerIdx]).setWeightNoise(new DropConnect(d)); } else { - layers[layerIdx].setIDropout(new Dropout(d)); + layers[layerIdx].setDropOut(new Dropout(d)); } } } @@ -178,7 +175,7 @@ public class ComputationGraphConfigurationDeserializer for(GraphVertex gv : conf.getVertices().values()){ if(gv instanceof LayerVertex && ((LayerVertex) gv).getLayerConfiguration() instanceof BatchNormalization){ BatchNormalization bn = (BatchNormalization) ((LayerVertex) gv).getLayerConfiguration(); - List vars = ((LayerVertex) gv).getNetConfiguration().getNetWideVariables(); + Set vars = ((LayerVertex) gv).getNetConfiguration().getNetWideVariables(); boolean isVariance = vars.contains(BatchNormalizationParamInitializer.GLOBAL_VAR); bn.setUseLogStd(!isVariance); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java index 0b6871524..4f7ac3b78 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java @@ -68,6 +68,7 @@ public class JsonMappers { ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); ret.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false); ret.configure(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY, true); + //ret.configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, false); ret.enable(SerializationFeature.INDENT_OUTPUT); SimpleModule customDeserializerModule = new SimpleModule(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java index 7863aca02..e396df456 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java @@ -40,7 +40,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; import java.io.StringReader; -import java.util.List; +import java.util.Set; public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserializer { @@ -101,7 +101,7 @@ public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserialize } if(attemptIUpdaterFromLegacy) { - if (layers[i].getIDropout() == null) { + if (layers[i].getDropOut() == null) { //Check for legacy dropout/dropconnect if (on.has("dropOut")) { double d = on.get("dropOut").asDouble(); @@ -113,7 +113,7 @@ public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserialize ((BaseLayerConfiguration) layers[i]).setWeightNoise(new DropConnect(d)); } else { if (d > 0.0) { - layers[i].setIDropout(new Dropout(d)); + layers[i].setDropOut(new Dropout(d)); } } } @@ -150,7 +150,7 @@ public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserialize handleActivationBackwardCompatibility((BaseLayerConfiguration) layers[i], on); } - if(requiresLegacyLossHandling && layers[i] instanceof BaseOutputLayer && ((BaseOutputLayer)layers[i]).getLossFn() == null){ + if(requiresLegacyLossHandling && layers[i] instanceof BaseOutputLayer && ((BaseOutputLayer)layers[i]).getLossFunction() == null){ handleLossBackwardCompatibility((BaseOutputLayer) layers[i], on); } } @@ -168,7 +168,7 @@ public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserialize LayerConfiguration l = nnc.getLayerConfigurations().get(0); if(l instanceof BatchNormalization){ BatchNormalization bn = (BatchNormalization)l; - List vars = nnc.getNetWideVariables(); + Set vars = nnc.getNetWideVariables(); boolean isVariance = vars.contains(BatchNormalizationParamInitializer.GLOBAL_VAR); bn.setUseLogStd(!isVariance); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/DefaultGradient.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/DefaultGradient.java index 23d1651f5..3f147df0a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/DefaultGradient.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/DefaultGradient.java @@ -24,10 +24,7 @@ import lombok.Setter; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; public class DefaultGradient implements Gradient { public static final char DEFAULT_FLATTENING_ORDER = 'f'; @@ -48,7 +45,7 @@ public class DefaultGradient implements Gradient { } @Override - public INDArray gradient(List order) { + public INDArray gradient(Set order) { List toFlatten = new ArrayList<>(); if (flatteningOrders == null) { for (String s : order) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/Gradient.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/Gradient.java index 0fb15d3ab..f09ae540d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/Gradient.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/Gradient.java @@ -25,6 +25,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; import java.io.Serializable; import java.util.List; import java.util.Map; +import java.util.Set; public interface Gradient extends Serializable { @@ -40,7 +41,7 @@ public interface Gradient extends Serializable { * * @return */ - INDArray gradient(List order); + INDArray gradient(Set order); /** * The full gradient as one flat vector diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index cc0c13506..e041ca68f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -565,7 +565,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali int numLayers = 0; List tempLayerList = new ArrayList<>(); defaultConfiguration.clearNetWideVariable(); - List variables = defaultConfiguration.netWideVariables(false); + Set variables = defaultConfiguration.getNetWideVariables(false); i = computationGraphConfiguration.getNetworkInputs().size(); for(; i layerVariables = l.getNetConfiguration().netWideVariables(); + Set layerVariables = l.getNetConfiguration().getNetWideVariables(); if (layerVariables != null) { for (String s : layerVariables) { variables.add(gv.getVertexName() + "_" + s); @@ -711,7 +711,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali } for(Layer l : layers){ - String layerName = l.getLayerConfiguration().getLayerName(); + String layerName = l.getLayerConfiguration().getName(); List inputs = computationGraphConfiguration.getVertexInputs().get(layerName); String in = inputs.get(0); //For now: layers should have exactly 1 input @@ -3582,7 +3582,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali * @return Hidden state, or null if layer is not an RNN layer */ public Map rnnGetPreviousState(int layer) { - return rnnGetPreviousState(layers[layer].getLayerConfiguration().getLayerName()); + return rnnGetPreviousState(layers[layer].getLayerConfiguration().getName()); } /** @@ -3615,7 +3615,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali l = ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer)l).getUnderlying(); } if (l instanceof RecurrentLayer) { - states.put(l.getLayerConfiguration().getLayerName(), ((RecurrentLayer) l).rnnGetPreviousState()); + states.put(l.getLayerConfiguration().getName(), ((RecurrentLayer) l).rnnGetPreviousState()); } } return states; @@ -3628,7 +3628,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali * @param state The state to set the specified layer to */ public void rnnSetPreviousState(int layer, Map state) { - rnnSetPreviousState(layers[layer].getLayerConfiguration().getLayerName(), state); + rnnSetPreviousState(layers[layer].getLayerConfiguration().getName(), state); } /** @@ -4409,7 +4409,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali catch (Exception e) { // Some layers, like PReLU, are just BaseLayers (but have parameters) } } - List paraNames = currentLayer.getNetConfiguration().netWideVariables(); + Set paraNames = currentLayer.getNetConfiguration().getNetWideVariables(); for (String aP : paraNames) { String paramS = ArrayUtils.toString(currentLayer.getParamTable().get(aP).shape()); paramShape += aP + ":" + paramS + ", "; @@ -4741,7 +4741,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali throw new IllegalArgumentException("Invalid layer index: " + layer + ". ILayer index must be between 0 and " + (layers.length - 1) + " inclusive"); } - return layerSize(layers[layer].getLayerConfiguration().getLayerName()); + return layerSize(layers[layer].getLayerConfiguration().getName()); } /** @@ -4760,7 +4760,7 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali throw new IllegalArgumentException("Invalid layer index: " + layer + ". ILayer index must be between 0 and " + (layers.length - 1) + " inclusive"); } - return layerInputSize(layers[layer].getLayerConfiguration().getLayerName()); + return layerInputSize(layers[layer].getLayerConfiguration().getName()); } /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java index a0df3e1bb..d053e27b1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java @@ -85,7 +85,7 @@ public class LayerVertex extends BaseGraphVertex { return; this.layer = new FrozenLayer(this.layer); - this.layer.getLayerConfiguration().setLayerName(vertexName); + this.layer.getLayerConfiguration().setName(vertexName); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java index d14f20d85..5d8141549 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java @@ -24,8 +24,8 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; +import com.fasterxml.jackson.annotation.JsonIgnore; import lombok.*; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; @@ -51,6 +51,8 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; /** A layer with input and output, no parameters or gradients */ @NoArgsConstructor(force = true) +//@JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id") +//@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class") public abstract class AbstractLayer implements Layer { private final @Getter List variables = new ArrayList<>(); @@ -77,8 +79,9 @@ public abstract class AbstractLayer impl protected DataType dataType; protected @Getter @Setter int iterationCount; protected @Getter @Setter int epochCount; + @JsonIgnore private @Getter @Setter IModel net; - + @JsonIgnore @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration; public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) { @@ -367,7 +370,7 @@ public abstract class AbstractLayer impl } protected String layerId() { - String name = this.layerConfiguration.getLayerName(); + String name = this.layerConfiguration.getName(); return "(layer name: " + (name == null ? "\"\"" : name) + ", layer index: " @@ -419,13 +422,13 @@ public abstract class AbstractLayer impl input = null; maskArray = null; maskState = null; - if (getTypedLayerConfiguration().getIDropout() != null) { - getTypedLayerConfiguration().getIDropout().clear(); + if (getTypedLayerConfiguration().getDropOut() != null) { + getTypedLayerConfiguration().getDropOut().clear(); } } protected void applyDropOutIfNecessary(boolean training, LayerWorkspaceMgr workspaceMgr) { - if (training && !dropoutApplied && getTypedLayerConfiguration().getIDropout() != null) { + if (training && !dropoutApplied && getTypedLayerConfiguration().getDropOut() != null) { INDArray result; if (inputModificationAllowed) { result = input; @@ -437,16 +440,16 @@ public abstract class AbstractLayer impl input = getTypedLayerConfiguration() - .getIDropout() + .getDropOut() .applyDropout(input, result, getIterationCount(), getEpochCount(), workspaceMgr); dropoutApplied = true; } } protected INDArray backpropDropOutIfPresent(INDArray epsilon) { - if (getTypedLayerConfiguration().getIDropout() != null) { + if (getTypedLayerConfiguration().getDropOut() != null) { getTypedLayerConfiguration() - .getIDropout() + .getDropOut() .backprop(epsilon, epsilon, getIterationCount(), getEpochCount()); } return epsilon; @@ -467,6 +470,7 @@ public abstract class AbstractLayer impl @Override public int getInputMiniBatchSize() { + if(input==null) return 0; return (int) input.size(0); } @@ -560,6 +564,7 @@ public abstract class AbstractLayer impl */ @Override public void setParamTable(Map paramTable) { + throw new RuntimeException("Not implemented"); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java index 1a055c528..8c092b4b1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java @@ -262,7 +262,7 @@ public abstract class BaseLayer INDArray preNorm = zAndPreNorm.getSecond(); INDArray delta = getTypedLayerConfiguration() - .getActivationFn() + .getActivation() .backprop(z, epsilon) .getFirst(); // TODO handle activation function params @@ -371,12 +371,12 @@ public abstract class BaseLayer if (params == null) { log.trace( "setParams(INDArray params, char order): params is null. Skipping setParams in Layer {}[{}] at index {}", - getLayerConfiguration().getLayerName(), + getLayerConfiguration().getName(), getClass().getSimpleName(), getIndex()); return; } - List parameterList = layerConfiguration.getVariables(); // netWideVariables(); + Set parameterList = layerConfiguration.getVariables(); // netWideVariables(); int length = 0; for (String s : parameterList) { length += getParam(s).length(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java index 1450507f7..5fc9bfde7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java @@ -75,7 +75,7 @@ public abstract class BaseOutputLayer getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { - ILossFunction lossFunction = getTypedLayerConfiguration().getLossFn(); + ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); //INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFunction(), maskArray); - INDArray delta = lossFunction.computeGradient(labels2d, preOut, getTypedLayerConfiguration().getActivationFn(), maskArray); + INDArray delta = lossFunction.computeGradient(labels2d, preOut, getTypedLayerConfiguration().getActivation(), maskArray); Gradient gradient = new DefaultGradient(); @@ -349,6 +349,6 @@ public abstract class BaseOutputLayer @@ -108,7 +109,7 @@ public abstract class BasePretrainNetwork parameterList = layerConfiguration.getVariables(); + Set parameterList = layerConfiguration.getVariables(); long paramLength = 0; for (String s : parameterList) { val len = getParam(s).length(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java index 9cc4c82e9..6f079e1c7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java @@ -71,7 +71,7 @@ public class DropoutLayer extends BaseLayer getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { // delta calculation - ILossFunction lossFunction = getTypedLayerConfiguration().getLossFn(); + ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction(); INDArray delta = lossFunction.computeGradient(getLabels2d(), preOut, getTypedLayerConfiguration().getActivationFn(), maskArray); // grab the empty gradient diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java index 166cbd896..7a8f48b92 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java @@ -75,7 +75,7 @@ public class RepeatVector extends AbstractLayer p = preOutput4d(true, true, workspaceMgr); INDArray z = p.getFirst(); - CNN2DFormat f = getTypedLayerConfiguration().getCnn2dDataFormat(); + CNN2DFormat f = getTypedLayerConfiguration().getConvFormat(); if(f != CNN2DFormat.NCHW){ z = z.permute(0,3,1,2); //NHWC to NCHW } @@ -159,7 +159,7 @@ public class ConvolutionLayer extends BaseLayer { boolean ncdhw = getTypedLayerConfiguration().getDataFormat() == Convolution3D.DataFormat.NCDHW; int chDim = ncdhw ? 1 : 4; if (input.size(chDim) != getTypedLayerConfiguration().getNIn() ) { - String layerName = getLayerConfiguration().getLayerName(); + String layerName = getLayerConfiguration().getName(); if (layerName == null) layerName = "(not named)"; throw new DL4JInvalidInputException("Cannot do forward pass in Deconvolution3D layer (layer name = " + layerName diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java index 2b39f70d2..8b7d2daa2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java @@ -59,12 +59,12 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { @Override public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - CNN2DFormat format = getTypedLayerConfiguration().getCnn2dDataFormat(); + CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); boolean nchw = format == CNN2DFormat.NCHW; if (input.rank() != 4) { throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to Convolution layer with shape " + Arrays.toString(input.shape()) - + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getCnn2dDataFormat().dimensionNames() + ". " + + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + ". " + layerId()); } INDArray bias; @@ -152,13 +152,13 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { getParamWithNoise(DepthwiseConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr); if (input.rank() != 4) { - String layerName = layerConfiguration.getLayerName(); + String layerName = layerConfiguration.getName(); if (layerName == null) layerName = "(not named)"; throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to DepthwiseConvolution2D (layer name = " + layerName + ", layer index = " + index + ") with shape " + Arrays.toString(input.shape()) + ". " - + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getCnn2dDataFormat().dimensionNames() + "." + + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + "." + (input.rank() == 2 ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" : "") + " " + layerId()); @@ -166,7 +166,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { INDArray input = this.input.castTo(dataType); //no-op if correct dtype - CNN2DFormat format = getTypedLayerConfiguration().getCnn2dDataFormat(); + CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); boolean nchw = format == CNN2DFormat.NCHW; long inDepth = depthWiseWeights.size(2); @@ -174,7 +174,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { long outDepth = depthMultiplier * inDepth; if (input.size(nchw ? 1 : 3) != inDepth) { - String layerName = layerConfiguration.getLayerName(); + String layerName = layerConfiguration.getName(); if (layerName == null) layerName = "(not named)"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java index dc660bfc8..60533ee2a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java @@ -63,7 +63,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { if (input.rank() != 4) { throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to SubsamplingLayer with shape " + Arrays.toString(input.shape()) - + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getCnn2dDataFormat().dimensionNames() + ". " + + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + ". " + layerId()); } INDArray bias; @@ -74,7 +74,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { INDArray input = this.input.castTo(dataType); - CNN2DFormat format = getTypedLayerConfiguration().getCnn2dDataFormat(); + CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); boolean nchw = format == CNN2DFormat.NCHW; long miniBatch = input.size(0); @@ -167,7 +167,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { getParamWithNoise(SeparableConvolutionParamInitializer.POINT_WISE_WEIGHT_KEY, training, workspaceMgr); INDArray input = this.input.castTo(dataType); - if(getTypedLayerConfiguration().getCnn2dDataFormat() == CNN2DFormat.NHWC) { + if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { input = input.permute(0,3,1,2).dup(); } @@ -176,13 +176,13 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { int wIdx = 3; if (input.rank() != 4) { - String layerName = getLayerConfiguration().getLayerName(); + String layerName = getLayerConfiguration().getName(); if (layerName == null) layerName = "(not named)"; throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to SeparableConvolution2D (layer name = " + layerName + ", layer index = " + index + ") with shape " + Arrays.toString(input.shape()) + ". " - + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getCnn2dDataFormat().dimensionNames() + "." + + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + "." + (input.rank() == 2 ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" : "") @@ -193,13 +193,13 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { long outDepth = pointWiseWeights.size(0); if (input.size(chIdx) != inDepth) { - String layerName = getLayerConfiguration().getLayerName(); + String layerName = getLayerConfiguration().getName(); if (layerName == null) layerName = "(not named)"; String s = "Cannot do forward pass in SeparableConvolution2D layer (layer name = " + layerName + ", layer index = " + index + "): input array channels does not match CNN layer configuration" - + " (data format = " + getTypedLayerConfiguration().getCnn2dDataFormat() + ", data input channels = " + input.size(1) + ", [minibatch,inputDepth,height,width]=" + + " (data format = " + getTypedLayerConfiguration().getConvFormat() + ", data input channels = " + input.size(1) + ", [minibatch,inputDepth,height,width]=" + Arrays.toString(input.shape()) + "; expected" + " input channels = " + inDepth + ") " + layerId(); @@ -287,7 +287,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { .build(); Nd4j.getExecutioner().exec(op); - if(getTypedLayerConfiguration().getCnn2dDataFormat() == CNN2DFormat.NHWC) { + if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { output = output.permute(0,2,3,1); //NCHW to NHWC } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java index 1e5c7b270..371511075 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java @@ -47,7 +47,7 @@ public class SpaceToBatch extends AbstractLayer { @@ -72,7 +73,7 @@ public class SubsamplingLayer extends AbstractLayer getParamTable() { + return null; + } + + /** + * * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException + * * will be triggered when calling this. + * + * @param isBackprop + * @return + */ + @Override + public Map getParamTable(boolean isBackprop) { + return null; + } + + /** + * * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException + * * will be triggered when calling this. + * + * @param paramTable + */ + @Override + public void setParamTable(Map paramTable) { + //-no-op + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java index cf9da710e..12ef14e80 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java @@ -91,7 +91,7 @@ public class Upsampling2D extends AbstractLayer [minibatch, nOut, seqLen] i.e., NWC -> NCW } return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, ret); @@ -185,7 +185,7 @@ public class EmbeddingSequenceLayer extends BaseLayer { @Setter @Getter private IActivation activation = new ActivationReLU(); + private static final IActivation relu = new ActivationReLU(); @@ -64,7 +68,7 @@ public class OCNNOutputLayer extends BaseOutputLayer getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { - ILossFunction lossFunction = getTypedLayerConfiguration().getLossFn(); + ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); INDArray delta = lossFunction.computeGradient(labels2d, preOut, getTypedLayerConfiguration().getActivationFn(), maskArray); org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer conf = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) getLayerConfiguration(); @@ -177,7 +181,7 @@ public class OCNNOutputLayer extends BaseOutputLayer params = Collections.synchronizedMap(new LinkedHashMap()); val nIn = ocnnOutputLayer.getNIn(); - int hiddenLayer = ocnnOutputLayer.getHiddenSize(); + int hiddenLayer = ocnnOutputLayer.getHiddenLayerSize(); Preconditions.checkState(hiddenLayer > 0, "OCNNOutputLayer hidden layer state: must be non-zero."); val firstLayerWeightLength = hiddenLayer; @@ -130,7 +130,7 @@ public class OCNNParamInitializer extends DefaultParamInitializer { org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf; Map params = Collections.synchronizedMap(new LinkedHashMap()); val nIn = ocnnOutputLayer.getNIn(); - val hiddenLayer = ocnnOutputLayer.getHiddenSize(); + val hiddenLayer = ocnnOutputLayer.getHiddenLayerSize(); val firstLayerWeightLength = hiddenLayer; val secondLayerLength = nIn * hiddenLayer; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java index 5eee50a92..f4269e52e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java @@ -170,6 +170,7 @@ public class GlobalPoolingLayer extends AbstractLayer forwardsGradient = LSTMHelpers.backpropGradientHelper(this, this.layerConfiguration.getNetConfiguration(), - this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), + this.getTypedLayerConfiguration().getGateActivationFunction(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), permuteIfNWC(epsilon), truncatedBPTT, tbpttBackwardLength, fwdPass, true, @@ -90,7 +90,7 @@ public class GravesBidirectionalLSTM final Pair backwardsGradient = LSTMHelpers.backpropGradientHelper(this, this.layerConfiguration.getNetConfiguration(), - this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), + this.getTypedLayerConfiguration().getGateActivationFunction(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS), permuteIfNWC(epsilon), truncatedBPTT, tbpttBackwardLength, backPass, false, @@ -155,7 +155,7 @@ public class GravesBidirectionalLSTM cachedPassForward = null; } else { - forwardsEval = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), + forwardsEval = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFunction(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS), training, null, null, @@ -163,7 +163,7 @@ public class GravesBidirectionalLSTM GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, maskArray, true, null, forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, getTypedLayerConfiguration().isHelperAllowFallback()); - backwardsEval = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), + backwardsEval = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFunction(), permuteIfNWC(this.input), getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS), @@ -215,7 +215,7 @@ public class GravesBidirectionalLSTM biasKey = GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS; } - FwdPassReturn ret = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), + FwdPassReturn ret = LSTMHelpers.activateHelper(this, this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFunction(), permuteIfNWC(this.input), getParam(recurrentKey), getParam(inputKey), getParam(biasKey), training, prevOutputActivations, prevMemCellState, forBackprop, forwards, inputKey, maskArray, true, null, forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, getTypedLayerConfiguration().isHelperAllowFallback()); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java index 6626e927e..91d820b37 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java @@ -83,7 +83,7 @@ public class GravesLSTM extends BaseRecurrentLayer p = LSTMHelpers.backpropGradientHelper(this, - this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), + this.layerConfiguration.getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFunction(), permuteIfNWC(this.input), recurrentWeights, inputWeights, permuteIfNWC(epsilon), truncatedBPTT, tbpttBackwardLength, fwdPass, true, GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, GravesLSTMParamInitializer.BIAS_KEY, gradientViews, maskArray, true, null, @@ -128,7 +128,7 @@ public class GravesLSTM extends BaseRecurrentLayer { - public static final String STATE_KEY_PREV_ACTIVATION = "prevAct"; - public static final String STATE_KEY_PREV_MEMCELL = "prevMem"; - protected LSTMHelper helper = null; - protected FwdPassReturn cachedFwdPass; + public static final String STATE_KEY_PREV_ACTIVATION = "prevAct"; + public static final String STATE_KEY_PREV_MEMCELL = "prevMem"; + protected LSTMHelper helper = null; + protected FwdPassReturn cachedFwdPass; - public LSTM(LayerConfiguration conf, DataType dataType) { - super(conf, dataType); - initializeHelper(); + public LSTM(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + initializeHelper(); + } + + void initializeHelper() { + String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); + if ("CUDA".equalsIgnoreCase(backend)) { + helper = + DL4JClassLoading.createNewInstance( + "org.deeplearning4j.cuda.recurrent.CudnnLSTMHelper", LSTMHelper.class, dataType); + log.debug("CudnnLSTMHelper successfully initialized"); + if (!helper.checkSupported( + getTypedLayerConfiguration().getGateActivationFunction(), + getTypedLayerConfiguration().getActivationFn(), + false)) { + helper = null; + } } - - void initializeHelper() { - String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); - if("CUDA".equalsIgnoreCase(backend)) { - helper = DL4JClassLoading.createNewInstance( - "org.deeplearning4j.cuda.recurrent.CudnnLSTMHelper", - LSTMHelper.class, - dataType); - log.debug("CudnnLSTMHelper successfully initialized"); - if (!helper.checkSupported(getTypedLayerConfiguration().getGateActivationFn(), getTypedLayerConfiguration().getActivationFn(), false)) { - helper = null; - } + /* + //Disabled pending: https://github.com/eclipse/deeplearning4j/issues/8331 + else if ("CPU".equalsIgnoreCase(backend) && BaseMKLDNNHelper.mklDnnEnabled()){ + helper = new MKLDNNLSTMHelper(); + log.debug("MKLDNNLSTMHelper successfully initialized"); + if (!helper.checkSupported(layerConf().getGateActivationFn(), layerConf().getActivationFn(), false)) { + helper = null; } - /* - //Disabled pending: https://github.com/eclipse/deeplearning4j/issues/8331 - else if ("CPU".equalsIgnoreCase(backend) && BaseMKLDNNHelper.mklDnnEnabled()){ - helper = new MKLDNNLSTMHelper(); - log.debug("MKLDNNLSTMHelper successfully initialized"); - if (!helper.checkSupported(layerConf().getGateActivationFn(), layerConf().getActivationFn(), false)) { - helper = null; - } - } - */ + } + */ + } + + @Override + public Gradient gradient() { + throw new UnsupportedOperationException( + "gradient() method for layerwise pretraining: not supported for LSTMs (pretraining not possible) " + + layerId()); + } + + @Override + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + return backpropGradientHelper(epsilon, false, -1, workspaceMgr); + } + + @Override + public Pair tbpttBackpropGradient( + INDArray epsilon, int tbpttBackwardLength, LayerWorkspaceMgr workspaceMgr) { + return backpropGradientHelper(epsilon, true, tbpttBackwardLength, workspaceMgr); + } + + private Pair backpropGradientHelper( + final INDArray epsilon, + final boolean truncatedBPTT, + final int tbpttBackwardLength, + LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + + final INDArray inputWeights = + getParamWithNoise(LSTMParamInitializer.INPUT_WEIGHT_KEY, true, workspaceMgr); + final INDArray recurrentWeights = + getParamWithNoise( + LSTMParamInitializer.RECURRENT_WEIGHT_KEY, + true, + workspaceMgr); // Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: + // [wI,wF,wO,wG,wFF,wOO,wGG] + + // First: Do forward pass to get gate activations, zs etc. + FwdPassReturn fwdPass; + if (truncatedBPTT) { + fwdPass = + activateHelper( + true, + stateMap.get(STATE_KEY_PREV_ACTIVATION), + stateMap.get(STATE_KEY_PREV_MEMCELL), + true, + workspaceMgr); + // Store last time step of output activations and memory cell state in tBpttStateMap + tBpttStateMap.put(STATE_KEY_PREV_ACTIVATION, fwdPass.lastAct.detach()); + tBpttStateMap.put(STATE_KEY_PREV_MEMCELL, fwdPass.lastMemCell.detach()); + } else { + fwdPass = activateHelper(true, null, null, true, workspaceMgr); + } + fwdPass.fwdPassOutput = permuteIfNWC(fwdPass.fwdPassOutput); + Pair p = + LSTMHelpers.backpropGradientHelper( + this, + getNetConfiguration(), + this.getTypedLayerConfiguration().getGateActivationFunction(), + permuteIfNWC(this.input), + recurrentWeights, + inputWeights, + permuteIfNWC(epsilon), + truncatedBPTT, + tbpttBackwardLength, + fwdPass, + true, + LSTMParamInitializer.INPUT_WEIGHT_KEY, + LSTMParamInitializer.RECURRENT_WEIGHT_KEY, + LSTMParamInitializer.BIAS_KEY, + gradientViews, + null, + false, + helper, + workspaceMgr, + getTypedLayerConfiguration().isHelperAllowFallback()); + + weightNoiseParams.clear(); + p.setSecond(permuteIfNWC(backpropDropOutIfPresent(p.getSecond()))); + return p; + } + + @Override + public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) { + setInput(input, workspaceMgr); + return activateHelper(training, null, null, false, workspaceMgr).fwdPassOutput; + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + return activateHelper(training, null, null, false, workspaceMgr).fwdPassOutput; + } + + private FwdPassReturn activateHelper( + final boolean training, + final INDArray prevOutputActivations, + final INDArray prevMemCellState, + boolean forBackprop, + LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + Preconditions.checkState( + input.rank() == 3, "3D input expected to RNN layer expected, got " + input.rank()); + + boolean nwc = + TimeSeriesUtils.getFormatFromRnnLayer(getTypedLayerConfiguration()) == RNNFormat.NWC; + + INDArray origInput = input; + if (nwc) { + input = permuteIfNWC(input); } - @Override - public Gradient gradient() { - throw new UnsupportedOperationException( - "gradient() method for layerwise pretraining: not supported for LSTMs (pretraining not possible) " - + layerId()); + applyDropOutIfNecessary(training, workspaceMgr); + + // TODO LSTM cache mode is disabled for now - not passing all tests + cacheMode = CacheMode.NONE; + + if (forBackprop && cachedFwdPass != null) { + FwdPassReturn ret = cachedFwdPass; + cachedFwdPass = null; + return ret; } - @Override - public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { - return backpropGradientHelper(epsilon, false, -1, workspaceMgr); + final INDArray recurrentWeights = + getParamWithNoise( + LSTMParamInitializer.RECURRENT_WEIGHT_KEY, + training, + workspaceMgr); // Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: + // [wI,wF,wO,wG,wFF,wOO,wGG] + final INDArray inputWeights = + getParamWithNoise( + LSTMParamInitializer.INPUT_WEIGHT_KEY, + training, + workspaceMgr); // Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg] + final INDArray biases = + getParamWithNoise( + LSTMParamInitializer.BIAS_KEY, + training, + workspaceMgr); // by row: IFOG //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T + FwdPassReturn fwd = + LSTMHelpers.activateHelper( + this, + getNetConfiguration(), + this.getTypedLayerConfiguration().getGateActivationFunction(), + input, + recurrentWeights, + inputWeights, + biases, + training, + prevOutputActivations, + prevMemCellState, + (training && cacheMode != CacheMode.NONE) || forBackprop, + true, + LSTMParamInitializer.INPUT_WEIGHT_KEY, + maskArray, + false, + helper, + forBackprop ? cacheMode : CacheMode.NONE, + workspaceMgr, + getTypedLayerConfiguration().isHelperAllowFallback()); + + fwd.fwdPassOutput = permuteIfNWC(fwd.fwdPassOutput); + + if (training && cacheMode != CacheMode.NONE) { + cachedFwdPass = fwd; } - @Override - public Pair tbpttBackpropGradient(INDArray epsilon, int tbpttBackwardLength, LayerWorkspaceMgr workspaceMgr) { - return backpropGradientHelper(epsilon, true, tbpttBackwardLength, workspaceMgr); + if (nwc) { + input = origInput; } + return fwd; + } - private Pair backpropGradientHelper(final INDArray epsilon, final boolean truncatedBPTT, - final int tbpttBackwardLength, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(true); + @Override + public Type type() { + return Type.RECURRENT; + } - final INDArray inputWeights = getParamWithNoise(LSTMParamInitializer.INPUT_WEIGHT_KEY, true, workspaceMgr); - final INDArray recurrentWeights = getParamWithNoise(LSTMParamInitializer.RECURRENT_WEIGHT_KEY, true, workspaceMgr); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] + @Override + public boolean isPretrainLayer() { + return false; + } - //First: Do forward pass to get gate activations, zs etc. - FwdPassReturn fwdPass; - if (truncatedBPTT) { - fwdPass = activateHelper(true, stateMap.get(STATE_KEY_PREV_ACTIVATION), - stateMap.get(STATE_KEY_PREV_MEMCELL), true, workspaceMgr); - //Store last time step of output activations and memory cell state in tBpttStateMap - tBpttStateMap.put(STATE_KEY_PREV_ACTIVATION, fwdPass.lastAct.detach()); - tBpttStateMap.put(STATE_KEY_PREV_MEMCELL, fwdPass.lastMemCell.detach()); - } else { - fwdPass = activateHelper(true, null, null, true, workspaceMgr); - } - fwdPass.fwdPassOutput = permuteIfNWC(fwdPass.fwdPassOutput); - Pair p = LSTMHelpers.backpropGradientHelper(this, - getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), permuteIfNWC(this.input), - recurrentWeights, inputWeights, permuteIfNWC(epsilon), truncatedBPTT, tbpttBackwardLength, fwdPass, true, - LSTMParamInitializer.INPUT_WEIGHT_KEY, LSTMParamInitializer.RECURRENT_WEIGHT_KEY, - LSTMParamInitializer.BIAS_KEY, gradientViews, null, false, helper, workspaceMgr, - getTypedLayerConfiguration().isHelperAllowFallback()); + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + // LSTM (standard, not bi-directional) don't make any changes to the data OR the mask arrays + // Any relevant masking occurs during backprop + // They also set the current mask array as inactive: this is for situations like the following: + // in -> dense -> lstm -> dense -> lstm + // The first dense should be masked using the input array, but the second shouldn't. If + // necessary, the second + // dense will be masked via the output layer mask - weightNoiseParams.clear(); - p.setSecond(permuteIfNWC(backpropDropOutIfPresent(p.getSecond()))); - return p; + return new Pair<>(maskArray, MaskState.Passthrough); + } + + @Override + public INDArray rnnTimeStep(INDArray input, LayerWorkspaceMgr workspaceMgr) { + setInput(input, workspaceMgr); + FwdPassReturn fwdPass = + activateHelper( + false, + stateMap.get(STATE_KEY_PREV_ACTIVATION), + stateMap.get(STATE_KEY_PREV_MEMCELL), + false, + workspaceMgr); + INDArray outAct = fwdPass.fwdPassOutput; + // Store last time step of output activations and memory cell state for later use: + stateMap.put(STATE_KEY_PREV_ACTIVATION, fwdPass.lastAct.detach()); + stateMap.put(STATE_KEY_PREV_MEMCELL, fwdPass.lastMemCell.detach()); + + return outAct; + } + + @Override + public INDArray rnnActivateUsingStoredState( + INDArray input, boolean training, boolean storeLastForTBPTT, LayerWorkspaceMgr workspaceMgr) { + setInput(input, workspaceMgr); + FwdPassReturn fwdPass = + activateHelper( + training, + tBpttStateMap.get(STATE_KEY_PREV_ACTIVATION), + tBpttStateMap.get(STATE_KEY_PREV_MEMCELL), + false, + workspaceMgr); + INDArray outAct = fwdPass.fwdPassOutput; + if (storeLastForTBPTT) { + // Store last time step of output activations and memory cell state in tBpttStateMap + tBpttStateMap.put(STATE_KEY_PREV_ACTIVATION, fwdPass.lastAct.detach()); + tBpttStateMap.put(STATE_KEY_PREV_MEMCELL, fwdPass.lastMemCell.detach()); } - @Override - public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) { - setInput(input, workspaceMgr); - return activateHelper(training, null, null, false, workspaceMgr).fwdPassOutput; - } + return outAct; + } - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { - return activateHelper(training, null, null, false, workspaceMgr).fwdPassOutput; - } - - private FwdPassReturn activateHelper(final boolean training, final INDArray prevOutputActivations, - final INDArray prevMemCellState, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(false); - Preconditions.checkState(input.rank() == 3, - "3D input expected to RNN layer expected, got " + input.rank()); - - boolean nwc = TimeSeriesUtils.getFormatFromRnnLayer(getTypedLayerConfiguration()) == RNNFormat.NWC; - - INDArray origInput = input; - if(nwc){ - input = permuteIfNWC(input); - } - - applyDropOutIfNecessary(training, workspaceMgr); - - //TODO LSTM cache mode is disabled for now - not passing all tests - cacheMode = CacheMode.NONE; - - if (forBackprop && cachedFwdPass != null) { - FwdPassReturn ret = cachedFwdPass; - cachedFwdPass = null; - return ret; - } - - final INDArray recurrentWeights = getParamWithNoise(LSTMParamInitializer.RECURRENT_WEIGHT_KEY, training, workspaceMgr); //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] - final INDArray inputWeights = getParamWithNoise(LSTMParamInitializer.INPUT_WEIGHT_KEY, training, workspaceMgr); //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg] - final INDArray biases = getParamWithNoise(LSTMParamInitializer.BIAS_KEY, training, workspaceMgr); //by row: IFOG //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T - FwdPassReturn fwd = LSTMHelpers.activateHelper(this, getNetConfiguration(), this.getTypedLayerConfiguration().getGateActivationFn(), - input, recurrentWeights, inputWeights, biases, training, prevOutputActivations, - prevMemCellState, (training && cacheMode != CacheMode.NONE) || forBackprop, true, - LSTMParamInitializer.INPUT_WEIGHT_KEY, maskArray, false, helper, - forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, getTypedLayerConfiguration().isHelperAllowFallback()); - - fwd.fwdPassOutput = permuteIfNWC(fwd.fwdPassOutput); - - if (training && cacheMode != CacheMode.NONE) { - cachedFwdPass = fwd; - } - - if(nwc){ - input = origInput; - } - - return fwd; - } - - @Override - public Type type() { - return Type.RECURRENT; - } - - @Override - public boolean isPretrainLayer() { - return false; - } - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, - int minibatchSize) { - //LSTM (standard, not bi-directional) don't make any changes to the data OR the mask arrays - //Any relevant masking occurs during backprop - //They also set the current mask array as inactive: this is for situations like the following: - // in -> dense -> lstm -> dense -> lstm - // The first dense should be masked using the input array, but the second shouldn't. If necessary, the second - // dense will be masked via the output layer mask - - return new Pair<>(maskArray, MaskState.Passthrough); - } - - @Override - public INDArray rnnTimeStep(INDArray input, LayerWorkspaceMgr workspaceMgr) { - setInput(input, workspaceMgr); - FwdPassReturn fwdPass = activateHelper(false, stateMap.get(STATE_KEY_PREV_ACTIVATION), - stateMap.get(STATE_KEY_PREV_MEMCELL), false, workspaceMgr); - INDArray outAct = fwdPass.fwdPassOutput; - //Store last time step of output activations and memory cell state for later use: - stateMap.put(STATE_KEY_PREV_ACTIVATION, fwdPass.lastAct.detach()); - stateMap.put(STATE_KEY_PREV_MEMCELL, fwdPass.lastMemCell.detach()); - - return outAct; - } - - - - @Override - public INDArray rnnActivateUsingStoredState(INDArray input, boolean training, boolean storeLastForTBPTT, LayerWorkspaceMgr workspaceMgr) { - setInput(input, workspaceMgr); - FwdPassReturn fwdPass = activateHelper(training, tBpttStateMap.get(STATE_KEY_PREV_ACTIVATION), - tBpttStateMap.get(STATE_KEY_PREV_MEMCELL), false, workspaceMgr); - INDArray outAct = fwdPass.fwdPassOutput; - if (storeLastForTBPTT) { - //Store last time step of output activations and memory cell state in tBpttStateMap - tBpttStateMap.put(STATE_KEY_PREV_ACTIVATION, fwdPass.lastAct.detach()); - tBpttStateMap.put(STATE_KEY_PREV_MEMCELL, fwdPass.lastMemCell.detach()); - } - - return outAct; - } - - @Override - public LayerHelper getHelper() { - return helper; - } + @Override + public LayerHelper getHelper() { + return helper; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java index 0c77739c7..2ee627263 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java @@ -751,7 +751,7 @@ public class LSTMHelpers { cacheVar.put(cm, 2 * r.getCacheModeMemVariablePerEx().get(cm)); } - return new LayerMemoryReport.Builder(r.getLayerName(), r.getClass(), r.getInputType(), r.getOutputType()) + return new LayerMemoryReport.Builder(r.getName(), r.getClass(), r.getInputType(), r.getOutputType()) .standardMemory(2 * r.getParameterSize(), 2 * r.getUpdaterStateSize()) .workingMemory(2 * r.getWorkingMemoryFixedInference(), 2 * r.getWorkingMemoryVariableInference(), fixedTrain, varTrain) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java index e734212e8..f58e054ea 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java @@ -63,7 +63,7 @@ public class RnnLossLayer extends BaseLayer getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { - ILossFunction lossFunction = getTypedLayerConfiguration().getLossFn(); + ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); if (labels2d.size(1) != preOut.size(1)) { throw new DL4JInvalidInputException( diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/IdentityLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/IdentityLayer.java index 565542ab5..19695f550 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/IdentityLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/IdentityLayer.java @@ -21,19 +21,21 @@ package org.deeplearning4j.nn.layers.util; import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLambdaLayer; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; -import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.List; - -@NoArgsConstructor +@SuperBuilder(builderMethodName = "innerBuilder") public class IdentityLayer extends SameDiffLambdaLayer { - public IdentityLayer(String name) { - this.layerName = name; + public static IdentityLayerBuilder builder() { + return innerBuilder(); + } + public static IdentityLayerBuilder builder(String name) { + return innerBuilder() + .name(name); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java index bf21a6dc8..6f3d7c3b0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/variational/VariationalAutoencoder.java @@ -108,7 +108,7 @@ public class VariationalAutoencoder implements Layer { ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) layerConfiguration) .getOutputDistribution(); this.pzxActivationFn = ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) layerConfiguration) - .getPzxActivationFn(); + .getPzxActivationFunction(); this.numSamples = ((org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) layerConfiguration) .getNumSamples(); } @@ -177,7 +177,7 @@ public class VariationalAutoencoder implements Layer { } protected String layerId() { - String name = this.getLayerConfiguration().getLayerName(); + String name = this.getLayerConfiguration().getName(); return "(layer name: " + (name == null ? "\"\"" : name) + ", layer index: " + index + ")"; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 2b27c0179..bca8e7e68 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -20,16 +20,14 @@ package org.deeplearning4j.nn.multilayer; -import com.fasterxml.jackson.annotation.JsonIdentityInfo; -import com.fasterxml.jackson.annotation.ObjectIdGenerators; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import java.io.*; import java.util.*; import java.util.stream.Collectors; -import lombok.Getter; -import lombok.NonNull; -import lombok.Setter; + +import lombok.*; import lombok.extern.slf4j.Slf4j; -import lombok.val; import net.brutex.ai.dnn.api.IModel; import net.brutex.ai.dnn.networks.ArtificialNeuralNetwork; import org.apache.commons.lang3.ArrayUtils; @@ -51,7 +49,6 @@ import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; -import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.layers.FrozenLayer; import org.deeplearning4j.nn.layers.FrozenLayerWithBackprop; import org.deeplearning4j.nn.layers.LayerHelper; @@ -120,7 +117,8 @@ import org.nd4j.linalg.workspace.WorkspaceUtils; * the weights (or parameters) so that predictions get more accurate. */ @Slf4j -@JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id") +// @JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id") +@JsonIgnoreProperties({"helper", "net", "initCalled", "iupdater", "activationFn"}) public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serializable, Classifier, Layer, ITrainableLayer { @@ -287,19 +285,21 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork } /** - * Return the configuration of this layer + * Return the configuration of this layer getLayerConfiguration cannot be called on a + * MultiLayerNetwork. This function is here because of inheritance from Layer (which should be + * fixed). * * @return the configuration */ @Override public LayerConfiguration getLayerConfiguration() { // TODO - throw new RuntimeException( - "getLayerConfiguration cannot be called on a MultiLayerNetwork. This function is here because of inheritance from Layer (which should be fixed)."); + // no op + return null; } /** - * Set a new layer configuration, new init() needs to be called afterwards. + * Set a new layer configuration, new init() needs to be called afterward. * * @param lconf layer configuration */ @@ -567,7 +567,8 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork * @param backpropParamsOnly If true, return backprop params only. If false: return all params * @return Parameters for the network */ - public Map paramTable(boolean backpropParamsOnly) { + @Override + public Map getParamTable(boolean backpropParamsOnly) { // Get all parameters from all layers Map allParams = new LinkedHashMap<>(); for (int i = 0; i < layers.length; i++) { @@ -586,7 +587,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork int idx = paramName.indexOf('_'); int layerIdx = Integer.parseInt(paramName.substring(0, idx)); String subName = paramName.substring(idx + 1); - return ((BaseLayer) getLayer(layerIdx)).updaterDivideByMinibatch(subName); + return getLayer(layerIdx).updaterDivideByMinibatch(subName); } /** @@ -637,12 +638,16 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork getClass().getSimpleName()); return; } + //First clear the current layers array, as we will instantiate fresh from configuration + layers = null; + //clear model parameters, as these will also be recalculated + flattenedParams = null; flattenedGradients = null; /** * Initialize the neural network configuration first. This also triggers inheritance of * configuration setting where needed. */ - getNetConfiguration().setNeuralNet(this); + getNetConfiguration().setNet(this); getNetConfiguration() .init(); // we cannot do this in constructor, as the config might be attached later. @@ -781,7 +786,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork // Set parameters in MultiLayerNetwork.getNetConfiguration() for later use in // BaseOptimizer.setupSearchState() etc getNetConfiguration().clearNetWideVariable(); - List variables = getNetConfiguration().netWideVariables(false); + Set variables = getNetConfiguration().getNetWideVariables(false); for (int i = 0; i < layers.length; i++) { if (layers[i] == null) { throw new IllegalStateException( @@ -1080,7 +1085,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork try { mgr.validateArrayLocation(arrayType, array, false, layerIdx > 0); } catch (ND4JWorkspaceException e) { - String layerName = layers[layerIdx].getLayerConfiguration().getLayerName(); + String layerName = layers[layerIdx].getLayerConfiguration().getName(); String clazz; if (isPreprocessor) { clazz = getNetConfiguration().getInputPreProcess(layerIdx).getClass().getName(); @@ -1361,7 +1366,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork if (layers[i].input() == null) { log.error( "Input for layer {} at index {} cannot be null.", - layers[i].getLayerConfiguration().getLayerName(), + layers[i].getLayerConfiguration().getName(), i); throw new RuntimeException("Layer input is null."); } @@ -1856,6 +1861,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork } @Override + @JsonIgnore public ITraininableLayerConfiguration getTrainingConfig() { throw new UnsupportedOperationException("Not supported"); } @@ -1877,8 +1883,9 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork * @return 1d parameter vector */ @Override + @JsonIgnore public INDArray getParams() { - throw new RuntimeException("Calling getParams on the MultiLazerNetwork !?"); + throw new RuntimeException("Calling getParams on the MultiLayerNetwork !?"); } /** @@ -2243,7 +2250,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) // Usually not required here. Exception: OutputLayer - // dropout + // dropout .with(ArrayType.ACTIVATION_GRAD, WS_LAYER_ACT_1, WS_LAYER_ACT_X_CONFIG) .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) @@ -2266,7 +2273,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork ArrayType.INPUT, WS_ALL_LAYERS_ACT, WS_ALL_LAYERS_ACT_CONFIG) // Usually not required here. Exception: OutputLayer - // dropout + // dropout .with(ArrayType.ACTIVATION_GRAD, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG) .with(ArrayType.FF_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) .with(ArrayType.BP_WORKING_MEM, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG) @@ -2522,7 +2529,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork long nSubsets = timeSeriesLength / fwdLen; if (timeSeriesLength % fwdLen != 0) { nSubsets++; // Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size 100, - // 1 of size 20) + // 1 of size 20) } rnnClearPreviousState(); @@ -3454,14 +3461,14 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork public Layer getLayer(@NotNull String name) { return Arrays.stream(layers) - .filter(l -> !l.getLayerConfiguration().getLayerName().equals(name)) + .filter(l -> !l.getLayerConfiguration().getName().equals(name)) .findFirst() .get(); } public List getLayerNames() { return Arrays.stream(layers) - .map(l -> l.getLayerConfiguration().getLayerName()) + .map(l -> l.getLayerConfiguration().getName()) .collect(Collectors.toList()); } @@ -4102,7 +4109,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork long nSubsets = tsLength / fwdLen; if (tsLength % fwdLen != 0) { nSubsets++; // Example: 100 fwdLen with timeSeriesLength=120 -> want 2 subsets (1 of size - // 100, 1 of size 20) + // 100, 1 of size 20) } for (int i = 0; i < nSubsets; i++) { val startTimeIdx = i * fwdLen; @@ -4311,7 +4318,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork int frozenParams = 0; for (org.deeplearning4j.nn.api.Layer currentLayer : getLayers()) { - String name = currentLayer.getLayerConfiguration().getLayerName(); + String name = currentLayer.getLayerConfiguration().getName(); if (name == null) { name = String.valueOf(currentLayer.getIndex()); } @@ -4748,6 +4755,11 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork ModelSerializer.writeModel(this, oos, true); } + @Override + public Map getParamTable() { + return getParamTable(false); + } + private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException { val mln = ModelSerializer.restoreMultiLayerNetwork(ois, true); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BidirectionalParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BidirectionalParamInitializer.java index 27905d60f..be4f1ad30 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BidirectionalParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BidirectionalParamInitializer.java @@ -20,21 +20,15 @@ package org.deeplearning4j.nn.params; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; + +import java.util.*; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -import static org.nd4j.linalg.indexing.NDArrayIndex.interval; - public class BidirectionalParamInitializer extends AbstractParamInitializer { public static final String FORWARD_PREFIX = "f"; public static final String BACKWARD_PREFIX = "b"; @@ -97,21 +91,21 @@ public class BidirectionalParamInitializer extends AbstractParamInitializer { } @Override - public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + public Map init(LayerConfiguration lconf, INDArray paramsView, boolean initializeParams) { val n = paramsView.length()/2; INDArray forwardView = paramsView.get(interval(0,0,true), interval(0, n)); INDArray backwardView = paramsView.get(interval(0,0,true), interval(n, 2*n)); - conf.clearVariables(); + lconf.clearVariables(); - LayerConfiguration c1 = conf.clone(); - LayerConfiguration c2 = conf.clone(); + LayerConfiguration c1 = lconf.clone(); + LayerConfiguration c2 = lconf.clone(); //c1.setLayer(underlying); //c2.setLayer(underlying); Map origFwd = underlying.initializer().init(c1, forwardView, initializeParams); Map origBwd = underlying.initializer().init(c2, backwardView, initializeParams); - List variables = addPrefixes(c1.getVariables(), c2.getVariables()); - conf.setVariables(variables); + LinkedHashSet variables = addPrefixes(c1.getVariables(), c2.getVariables()); + lconf.setVariables(variables); Map out = new LinkedHashMap<>(); for( Map.Entry e : origFwd.entrySet()){ @@ -136,8 +130,8 @@ public class BidirectionalParamInitializer extends AbstractParamInitializer { return out; } - private List addPrefixes(List fwd, List bwd){ - List out = new ArrayList<>(); + private LinkedHashSet addPrefixes(Set fwd, Set bwd){ + LinkedHashSet out = new LinkedHashSet<>(); for(String s : fwd){ out.add(FORWARD_PREFIX + s); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java index a7f444c91..2278af04a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java @@ -199,7 +199,7 @@ public class DefaultParamInitializer extends AbstractParamInitializer { if( layerConf.getWeightInit() == null) { // set a default and set warning layerConf.setWeightInit(new WeightInitXavier()); - log.warn("Weight Initializer function was not set on layer {} of class {}, it will default to {}", conf.getLayerName(), + log.warn("Weight Initializer function was not set on layer {} of class {}, it will default to {}", conf.getName(), conf.getClass().getSimpleName(), WeightInitXavier.class.getSimpleName()); } return createWeightMatrix(layerConf.getNIn(), layerConf.getNOut(), layerConf.getWeightInit(), @@ -226,11 +226,11 @@ public class DefaultParamInitializer extends AbstractParamInitializer { protected boolean hasBias(LayerConfiguration layer){ if(layer instanceof BaseOutputLayer ) { - return ((BaseOutputLayer) layer).hasBias(); + return ((BaseOutputLayer) layer).isHasBias(); } else if(layer instanceof DenseLayer){ - return ((DenseLayer)layer).hasBias(); + return ((DenseLayer)layer).isHasBias(); } else if(layer instanceof EmbeddingLayer){ - return ((EmbeddingLayer)layer).hasBias(); + return ((EmbeddingLayer)layer).isHasBias(); } else if(layer instanceof EmbeddingSequenceLayer){ return ((EmbeddingSequenceLayer)layer).hasBias(); } @@ -239,7 +239,7 @@ public class DefaultParamInitializer extends AbstractParamInitializer { protected boolean hasLayerNorm(LayerConfiguration layer){ if(layer instanceof DenseLayer){ - return ((DenseLayer) layer).hasLayerNorm(); + return ((DenseLayer) layer).isHasLayerNorm(); } return false; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java index 265027812..c770ab7d2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java @@ -119,8 +119,8 @@ public class GravesLSTMParamInitializer extends AbstractParamInitializer { val recurrentWShape = new long[] {nL, 4 * nL + 3}; IWeightInit rwInit; - if(layerConf.getWeightInitFnRecurrent() != null){ - rwInit = layerConf.getWeightInitFnRecurrent(); + if(layerConf.getWeightInitRecurrent() != null){ + rwInit = layerConf.getWeightInitRecurrent(); } else { rwInit = layerConf.getWeightInit(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java index 040822a8a..c59d0c603 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/LSTMParamInitializer.java @@ -128,8 +128,8 @@ public class LSTMParamInitializer extends AbstractParamInitializer { val recurrentWShape = new long[] {nL, 4 * nL}; IWeightInit rwInit; - if(layerConf.getWeightInitFnRecurrent() != null){ - rwInit = layerConf.getWeightInitFnRecurrent(); + if(layerConf.getWeightInitRecurrent() != null){ + rwInit = layerConf.getWeightInitRecurrent(); } else { rwInit = layerConf.getWeightInit(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index 0846e0bf5..5ea7b5000 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -23,8 +23,6 @@ package org.deeplearning4j.nn.params; import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffVertex; @@ -117,7 +115,7 @@ public class SameDiffParamInitializer extends AbstractParamInitializer { public Map subsetAndReshape(List params, Map paramShapes, INDArray view, AbstractSameDiffLayer sdl, SameDiffVertex sdv){ Class clazz = (sdl != null ? sdl.getClass() : sdv.getClass()); - String layerName = (sdl != null ? sdl.getLayerName() : ""); //TODO + String layerName = (sdl != null ? sdl.getName() : ""); //TODO Map out = new LinkedHashMap<>(); int soFar = 0; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java index 488c00396..5b1deb4d8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java @@ -104,8 +104,8 @@ public class SimpleRnnParamInitializer extends AbstractParamInitializer { m.put(WEIGHT_KEY, w); IWeightInit rwInit; - if (c.getWeightInitFnRecurrent() != null) { - rwInit = c.getWeightInitFnRecurrent(); + if (c.getWeightInitRecurrent() != null) { + rwInit = c.getWeightInitRecurrent(); } else { rwInit = c.getWeightInit(); } @@ -167,7 +167,7 @@ public class SimpleRnnParamInitializer extends AbstractParamInitializer { protected boolean hasLayerNorm(LayerConfiguration layer){ if(layer instanceof SimpleRnn){ - return ((SimpleRnn) layer).hasLayerNorm(); + return ((SimpleRnn) layer).isHasLayerNorm(); } return false; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java index b62e77e83..a0a4f059d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java @@ -69,7 +69,7 @@ import org.nd4j.linalg.learning.regularization.WeightDecay; @Data public class FineTuneConfiguration { - protected IActivation activationFn; + protected IActivation activation; protected IWeightInit weightInitFn; protected Double biasInit; protected List regularization; @@ -151,7 +151,7 @@ public class FineTuneConfiguration { if (d != null) { d = d.clone(); //Clone to avoid shared state between layers } - layerConfiguration.setIDropout(d); + layerConfiguration.setDropOut(d); } if (constraints != null) { layerConfiguration.setConstraints(constraints.orElse(null)); @@ -160,8 +160,8 @@ public class FineTuneConfiguration { if (layerConfiguration != null && layerConfiguration instanceof BaseLayerConfiguration) { BaseLayerConfiguration bl = (BaseLayerConfiguration) layerConfiguration; - if (activationFn != null) { - bl.setActivationFn(activationFn); + if (activation != null) { + bl.setActivation(activation); } if (weightInitFn != null) { bl.setWeightInit(weightInitFn); @@ -241,7 +241,7 @@ public class FineTuneConfiguration { //Perform validation if (layerConfiguration != null) { - LayerValidation.generalValidation(layerConfiguration.getLayerName(), layerConfiguration, get(dropout), regularization, + LayerValidation.generalValidation(layerConfiguration.getName(), layerConfiguration, get(dropout), regularization, regularizationBias, get(constraints), null, null); } @@ -263,8 +263,8 @@ public class FineTuneConfiguration { public NeuralNetConfiguration appliedNeuralNetConfigurationBuilder() { NeuralNetConfiguration.NeuralNetConfigurationBuilder confBuilder = NeuralNetConfiguration.builder(); - if (activationFn != null) { - confBuilder.activation(activationFn); + if (activation != null) { + confBuilder.activation(activation); } if (weightInitFn != null) { confBuilder.weightInit(weightInitFn); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java index 708568d19..5a35a492c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java @@ -372,7 +372,7 @@ public class TransferLearning { layers[i] = new FrozenLayer(layers[i]); if (origNNC.getVariables() != null) { - List vars = origNNC.getVariables(); + Set vars = origNNC.getVariables(); origNNC.clearVariables(); layerNNC.clearVariables(); for (String s : vars) { @@ -382,8 +382,8 @@ public class TransferLearning { } LayerConfiguration origLayerConf = editedModel.getNetConfiguration().getFlattenedLayerConfigurations().get(i); - LayerConfiguration newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf); - newLayerConf.setLayerName(origLayerConf.getLayerName()); + LayerConfiguration newLayerConf = org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.builder().innerConfiguration(origLayerConf).build(); + newLayerConf.setName(origLayerConf.getName()); editedModel.getNetConfiguration().getNetConfigurations().get(i).setLayer(newLayerConf); } editedModel.setLayers(layers); @@ -533,8 +533,8 @@ public class TransferLearning { //Set default layer names, if not set - as per NeuralNetConfiguration.ListBuilder.build() for (int i = 0; i < allConfs.size(); i++) { - if (allConfs.get(i).getLayerName() == null) { - allConfs.get(i).setLayerName("layer" + i); + if (allConfs.get(i).getName() == null) { + allConfs.get(i).setName("layer" + i); } } @@ -592,7 +592,7 @@ public class TransferLearning { NeuralNetConfiguration nnc = lv.getNetConfiguration().clone(); fineTuneConfiguration.applyToLayerConfiguration(lv.getLayerConfiguration()); vertices.put(gv.getKey(), new LayerVertex(nnc, lv.getPreProcessor())); - lv.getLayerConfiguration().setLayerName(gv.getKey()); + lv.getLayerConfiguration().setName(gv.getKey()); } } @@ -1009,8 +1009,8 @@ public class TransferLearning { String layerName = gv.getVertexName(); LayerVertex currLayerVertex = (LayerVertex) newConfig.getVertices().get(layerName); LayerConfiguration origLayerConf = currLayerVertex.getLayerConfiguration(); - LayerConfiguration newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf); - newLayerConf.setLayerName(origLayerConf.getLayerName()); + LayerConfiguration newLayerConf = org.deeplearning4j.nn.conf.layers.misc.FrozenLayer.builder().innerConfiguration(origLayerConf).build(); + newLayerConf.setName(origLayerConf.getName()); //Complication here(and reason for clone on next line): inner LayerConfiguration (implementation) // NeuralNetConfiguration.layer (config) should keep the original layer config. While network // NNC should have the frozen layer @@ -1019,10 +1019,10 @@ public class TransferLearning { currLayerVertex.getNetConfiguration().setLayer(newLayerConf); //Make sure the underlying layer doesn't change: - List vars = currLayerVertex.getNetConfiguration().netWideVariables(true); + Set vars = currLayerVertex.getNetConfiguration().getNetWideVariables(true); currLayerVertex.getNetConfiguration().clearNetWideVariable(); for (String s : vars) { - newNNC.netWideVariables(false).add(s); + newNNC.getNetWideVariables(false).add(s); } //We also need to place the layer in the CompGraph LayerConfiguration[] (replacing the old one) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java index e7a74999c..e8c7c1e6c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java @@ -88,7 +88,7 @@ public abstract class BaseMultiLayerUpdater implements Updater String var = variables.get(j); long paramSizeThisVariable = layerParamTable.get(var).length(); IUpdater u = layers[i].getTrainingConfig().getUpdaterByParam(var); - Preconditions.checkNotNull(u, "Updater for parameter %s, layer \"%s\" was null", var, layers[i].getTrainingConfig().getLayerName()); + Preconditions.checkNotNull(u, "Updater for parameter %s, layer \"%s\" was null", var, layers[i].getTrainingConfig().getName()); int updaterStateSizeThisVariable = (int) u.stateSize(paramSizeThisVariable); INDArray gradientViewSubset = null; @@ -238,7 +238,8 @@ public abstract class BaseMultiLayerUpdater implements Updater */ public synchronized INDArray getStateViewArrayCopy(){ Nd4j.getExecutioner().commit(); - return updaterStateViewArray.dup(); + if(updaterStateViewArray!=null) return updaterStateViewArray.dup(); + return Nd4j.empty(); } @Override @@ -269,7 +270,7 @@ public abstract class BaseMultiLayerUpdater implements Updater ITrainableLayer[] layers = getOrderedLayers(); if (layers.length == 1 && isSingleLayerUpdater()) { - layerGradients.put(layers[0].getTrainingConfig().getLayerName(), gradient); + layerGradients.put(layers[0].getTrainingConfig().getName(), gradient); } else { for (Map.Entry gradientPair : gradient.gradientForVariable().entrySet()) { String key = gradientPair.getKey(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java index 3dafbb3f9..1dfe4a7b9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java @@ -42,7 +42,7 @@ public class LayerUpdater extends BaseMultiLayerUpdater { } layersByName = new HashMap<>(); - layersByName.put(layer.getLayerConfiguration().getLayerName(), layer); + layersByName.put(layer.getLayerConfiguration().getName()+"_"+layer.getIndex(), layer); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java index 952258bcf..45ffa6159 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java @@ -43,7 +43,7 @@ public class ComputationGraphUpdater extends BaseMultiLayerUpdater(); ITrainableLayer[] layers = getOrderedLayers(); for (ITrainableLayer l : layers) { - layersByName.put(l.getTrainingConfig().getLayerName(), l); + layersByName.put(l.getTrainingConfig().getName(), l); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/IWeightInit.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/IWeightInit.java index d0c524c22..676589d87 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/IWeightInit.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/IWeightInit.java @@ -44,4 +44,6 @@ public interface IWeightInit extends Serializable { * @param paramView View of parameters to initialize (and reshape) */ INDArray init(double fanIn, double fanOut, long[] shape, char order, INDArray paramView); + + WeightInit enumValue(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInit.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInit.java index 50d03d7aa..a588a7236 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInit.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInit.java @@ -21,73 +21,109 @@ package org.deeplearning4j.nn.weights; import org.deeplearning4j.nn.conf.distribution.Distribution; +import org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer; +import org.deeplearning4j.nn.weights.embeddings.WeightInitEmbedding; public enum WeightInit { - DISTRIBUTION, ZERO, ONES, SIGMOID_UNIFORM, NORMAL, LECUN_NORMAL, UNIFORM, XAVIER, XAVIER_UNIFORM, XAVIER_FAN_IN, XAVIER_LEGACY, RELU, - RELU_UNIFORM, IDENTITY, LECUN_UNIFORM, VAR_SCALING_NORMAL_FAN_IN, VAR_SCALING_NORMAL_FAN_OUT, VAR_SCALING_NORMAL_FAN_AVG, - VAR_SCALING_UNIFORM_FAN_IN, VAR_SCALING_UNIFORM_FAN_OUT, VAR_SCALING_UNIFORM_FAN_AVG; + DISTRIBUTION, + ZERO, + ONES, + SIGMOID_UNIFORM, + NORMAL, + LECUN_NORMAL, + UNIFORM, + XAVIER, + XAVIER_UNIFORM, + XAVIER_FAN_IN, + XAVIER_LEGACY, + RELU, + RELU_UNIFORM, + IDENTITY, + LECUN_UNIFORM, + VAR_SCALING_NORMAL_FAN_IN, + VAR_SCALING_NORMAL_FAN_OUT, + VAR_SCALING_NORMAL_FAN_AVG, + CONSTANT, + EMBEDDING, + VAR_SCALING_UNIFORM_FAN_IN, + VAR_SCALING_UNIFORM_FAN_OUT, + VAR_SCALING_UNIFORM_FAN_AVG; - - /** - * Create an instance of the weight initialization function - * - * @return a new {@link IWeightInit} instance - */ - public IWeightInit getWeightInitFunction() { - return getWeightInitFunction(null); + /** + * Create an instance of the weight initialization function + * + * @return a new {@link IWeightInit} instance + */ + public IWeightInit getWeightInitFunction(Distribution distribution) { + switch (this) { + case DISTRIBUTION: + return new WeightInitDistribution(distribution); + default: + return getWeightInitFunction(); } + } - /** - * Create an instance of the weight initialization function - * - * @param distribution Distribution of the weights (Only used in case DISTRIBUTION) - * @return a new {@link IWeightInit} instance - */ - public IWeightInit getWeightInitFunction(Distribution distribution) { - switch (this) { - case ZERO: - return new WeightInitConstant(0.0); - case ONES: - return new WeightInitConstant(1.0); - case DISTRIBUTION: - return new WeightInitDistribution(distribution); - case SIGMOID_UNIFORM: - return new WeightInitSigmoidUniform(); - case LECUN_NORMAL: //Fall through: these 3 are equivalent - case XAVIER_FAN_IN: - case NORMAL: - return new WeightInitNormal(); - case UNIFORM: - return new WeightInitUniform(); - case XAVIER: - return new WeightInitXavier(); - case XAVIER_UNIFORM: - return new WeightInitXavierUniform(); - case XAVIER_LEGACY: - return new WeightInitXavierLegacy(); - case RELU: - return new WeightInitRelu(); - case RELU_UNIFORM: - return new WeightInitReluUniform(); - case IDENTITY: - return new WeightInitIdentity(); - case LECUN_UNIFORM: - return new WeightInitLecunUniform(); - case VAR_SCALING_NORMAL_FAN_IN: - return new WeightInitVarScalingNormalFanIn(); - case VAR_SCALING_NORMAL_FAN_OUT: - return new WeightInitVarScalingNormalFanOut(); - case VAR_SCALING_NORMAL_FAN_AVG: - return new WeightInitVarScalingNormalFanAvg(); - case VAR_SCALING_UNIFORM_FAN_IN: - return new WeightInitVarScalingUniformFanIn(); - case VAR_SCALING_UNIFORM_FAN_OUT: - return new WeightInitVarScalingUniformFanOut(); - case VAR_SCALING_UNIFORM_FAN_AVG: - return new WeightInitVarScalingUniformFanAvg(); - - default: - throw new UnsupportedOperationException("Unknown or not supported weight initialization function: " + this); - } + public IWeightInit getWeightInitFunction( + EmbeddingInitializer initializer) { // EmbeddingInitializer + switch (this) { + case EMBEDDING: + return new WeightInitEmbedding(initializer); + default: + return getWeightInitFunction(); } + } + /** + * Create an instance of the weight initialization function + * + * @return a new {@link IWeightInit} instance + */ + public IWeightInit getWeightInitFunction() { + switch (this) { + case CONSTANT: + return new WeightInitConstant(); + case ZERO: + return new WeightInitConstant(0.0); + case ONES: + return new WeightInitConstant(1.0); + + case SIGMOID_UNIFORM: + return new WeightInitSigmoidUniform(); + case LECUN_NORMAL: // Fall through: these 3 are equivalent + case XAVIER_FAN_IN: + case NORMAL: + return new WeightInitNormal(); + case UNIFORM: + return new WeightInitUniform(); + case XAVIER: + return new WeightInitXavier(); + case XAVIER_UNIFORM: + return new WeightInitXavierUniform(); + case XAVIER_LEGACY: + return new WeightInitXavierLegacy(); + case RELU: + return new WeightInitRelu(); + case RELU_UNIFORM: + return new WeightInitReluUniform(); + case IDENTITY: + return new WeightInitIdentity(); + case LECUN_UNIFORM: + return new WeightInitLecunUniform(); + case VAR_SCALING_NORMAL_FAN_IN: + return new WeightInitVarScalingNormalFanIn(); + case VAR_SCALING_NORMAL_FAN_OUT: + return new WeightInitVarScalingNormalFanOut(); + case VAR_SCALING_NORMAL_FAN_AVG: + return new WeightInitVarScalingNormalFanAvg(); + case VAR_SCALING_UNIFORM_FAN_IN: + return new WeightInitVarScalingUniformFanIn(); + case VAR_SCALING_UNIFORM_FAN_OUT: + return new WeightInitVarScalingUniformFanOut(); + case VAR_SCALING_UNIFORM_FAN_AVG: + return new WeightInitVarScalingUniformFanAvg(); + + default: + throw new UnsupportedOperationException( + "Unknown or not supported weight initialization function: " + this); + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitConstant.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitConstant.java index 40ba9755c..b91989d63 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitConstant.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitConstant.java @@ -42,4 +42,13 @@ public class WeightInitConstant implements IWeightInit { paramView.assign(value); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.CONSTANT; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitDistribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitDistribution.java index 296305e17..e9da0c596 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitDistribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitDistribution.java @@ -51,4 +51,13 @@ public class WeightInitDistribution implements IWeightInit { } return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.DISTRIBUTION; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java index 2bb8a9ba1..35e15b380 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java @@ -59,6 +59,15 @@ public class WeightInitIdentity implements IWeightInit { } } +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.IDENTITY; + } + private INDArray setIdentity2D(long[] shape, char order, INDArray paramView) { INDArray ret; if (order == Nd4j.order()) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitLecunUniform.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitLecunUniform.java index 05bb63c0e..bd57e78be 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitLecunUniform.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitLecunUniform.java @@ -38,4 +38,13 @@ public class WeightInitLecunUniform implements IWeightInit { Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-b, b)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.LECUN_UNIFORM; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitNormal.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitNormal.java index 76a635749..116fe27da 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitNormal.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitNormal.java @@ -38,4 +38,13 @@ public class WeightInitNormal implements IWeightInit { Nd4j.randn(paramView).divi(FastMath.sqrt(fanIn)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.NORMAL; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitRelu.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitRelu.java index 340d0626b..37f5f5256 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitRelu.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitRelu.java @@ -33,4 +33,13 @@ public class WeightInitRelu implements IWeightInit { Nd4j.randn(paramView).muli(FastMath.sqrt(2.0 / fanIn)); //N(0, 2/nIn) return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.RELU; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitReluUniform.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitReluUniform.java index 9801e20b8..de60d65f9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitReluUniform.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitReluUniform.java @@ -35,4 +35,13 @@ public class WeightInitReluUniform implements IWeightInit { Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-u, u)); //U(-sqrt(6/fanIn), sqrt(6/fanIn) return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.RELU_UNIFORM; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitSigmoidUniform.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitSigmoidUniform.java index 4b401cbc2..07eb609d0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitSigmoidUniform.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitSigmoidUniform.java @@ -35,4 +35,13 @@ public class WeightInitSigmoidUniform implements IWeightInit { Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-r, r)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.SIGMOID_UNIFORM; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUniform.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUniform.java index 71d2be216..3669a104f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUniform.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUniform.java @@ -34,4 +34,13 @@ public class WeightInitUniform implements IWeightInit { Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-a, a)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.UNIFORM; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java index 47167cc89..0c7458689 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java @@ -50,4 +50,13 @@ public class WeightInitVarScalingNormalFanAvg implements IWeightInit { Nd4j.exec(new TruncatedNormalDistribution(paramView, 0.0, std)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.VAR_SCALING_NORMAL_FAN_AVG; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanIn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanIn.java index f19667bb4..5b25fa03d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanIn.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanIn.java @@ -48,4 +48,13 @@ public class WeightInitVarScalingNormalFanIn implements IWeightInit { Nd4j.exec(new TruncatedNormalDistribution(paramView, 0.0, std)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.VAR_SCALING_NORMAL_FAN_IN; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java index 300601b68..b559db308 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java @@ -50,4 +50,13 @@ public class WeightInitVarScalingNormalFanOut implements IWeightInit { Nd4j.exec(new TruncatedNormalDistribution(paramView, 0.0, std)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.VAR_SCALING_NORMAL_FAN_OUT; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java index 87036d2c5..7bbd172a4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java @@ -50,4 +50,13 @@ public class WeightInitVarScalingUniformFanAvg implements IWeightInit { Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-scalingFanAvg, scalingFanAvg)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.VAR_SCALING_UNIFORM_FAN_AVG; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java index 0d47c80e9..95d0dee3a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java @@ -45,4 +45,13 @@ public class WeightInitVarScalingUniformFanIn implements IWeightInit { Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-scalingFanIn, scalingFanIn)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.VAR_SCALING_UNIFORM_FAN_IN; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java index c853cb631..f062b5e5e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java @@ -44,4 +44,13 @@ public class WeightInitVarScalingUniformFanOut implements IWeightInit { Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-scalingFanOut, scalingFanOut)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.VAR_SCALING_UNIFORM_FAN_OUT; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavier.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavier.java index 8d9cbf8f6..532dbbe32 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavier.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavier.java @@ -33,4 +33,13 @@ public class WeightInitXavier implements IWeightInit { Nd4j.randn(paramView).muli(FastMath.sqrt(2.0 / (fanIn + fanOut))); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.XAVIER; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavierLegacy.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavierLegacy.java index e2b544bc5..4c721eeff 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavierLegacy.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavierLegacy.java @@ -38,4 +38,13 @@ public class WeightInitXavierLegacy implements IWeightInit { Nd4j.randn(paramView).divi(FastMath.sqrt(shape[0] + shape[1])); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.XAVIER_LEGACY; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavierUniform.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavierUniform.java index a18bf5f46..0d5492370 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavierUniform.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitXavierUniform.java @@ -40,4 +40,13 @@ public class WeightInitXavierUniform implements IWeightInit { Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-s, s)); return paramView.reshape(order, shape); } + +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.XAVIER_UNIFORM; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/WeightInitEmbedding.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/WeightInitEmbedding.java index 4ca7f2635..70227b8ca 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/WeightInitEmbedding.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/WeightInitEmbedding.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.weights.embeddings; import lombok.EqualsAndHashCode; import lombok.NonNull; import org.deeplearning4j.nn.weights.IWeightInit; +import org.deeplearning4j.nn.weights.WeightInit; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.ndarray.INDArray; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @@ -66,6 +67,15 @@ public class WeightInitEmbedding implements IWeightInit { return reshaped; } +/** +* + * @return +*/ + @Override + public WeightInit enumValue() { + return WeightInit.EMBEDDING; + } + public long[] shape(){ if(serializableInit != null){ return new long[]{serializableInit.vocabSize(), serializableInit.vectorSize()}; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java index 0cd76263c..d5731f04d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java @@ -315,7 +315,7 @@ public abstract class BaseOptimizer implements ConvexOptimizer { */ @Override public void setupSearchState(Pair pair) { - INDArray gradient = pair.getFirst().gradient(conf.netWideVariables()); + INDArray gradient = pair.getFirst().gradient(conf.getNetWideVariables()); INDArray params = model.getModelParams().dup(); //Need dup here: params returns an array that isn't a copy (hence changes to this are problematic for line search methods) searchState.put(GRADIENT_KEY, gradient); searchState.put(SCORE_KEY, pair.getSecond()); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java index cd3bd3f2c..c5305dbc0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java @@ -78,10 +78,10 @@ public class Convolution1DUtils { * @return the format for the layer */ public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) { - Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getLayerName() + " does not have an RNNFormat"); + Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getName() + " does not have an RNNFormat"); if(layer instanceof SimpleRnn) { SimpleRnn simpleRnn = (SimpleRnn) layer; - return simpleRnn.getRnnDataFormat(); + return simpleRnn.getDataFormat(); } else if(layer instanceof Convolution1D) { Convolution1D convolution1D = (Convolution1D) layer; return convolution1D.getRnnDataFormat(); @@ -90,16 +90,16 @@ public class Convolution1DUtils { return convolution1DLayer.getRnnDataFormat(); } else if(layer instanceof Subsampling1DLayer) { Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer; - return subsampling1DLayer.getCnn2dDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC; + return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC; } else if(layer instanceof LSTM) { LSTM lstm = (LSTM) layer; - return lstm.getRnnDataFormat(); + return lstm.getDataFormat(); } else if(layer instanceof EmbeddingSequenceLayer) { EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer; - return embeddingSequenceLayer.getOutputFormat(); + return embeddingSequenceLayer.getOutputDataFormat(); } else { - throw new IllegalArgumentException("Illegal layer type " + layer.getClass().getName() + " and name " + layer.getLayerName()); + throw new IllegalArgumentException("Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName()); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java index e7adaa86a..887b1e0a7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java @@ -27,7 +27,6 @@ import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.convolutional.Cropping2D; @@ -202,19 +201,19 @@ public class ConvolutionUtils { public static CNN2DFormat getFormatForLayer(LayerConfiguration layer) { if(layer instanceof Convolution1DLayer) { Convolution1DLayer convolution1DLayer = (Convolution1DLayer) layer; - return convolution1DLayer.getCnn2dDataFormat(); + return convolution1DLayer.getDataFormat(); } else if(layer instanceof ConvolutionLayer) { ConvolutionLayer convolutionLayer = (ConvolutionLayer) layer; - return convolutionLayer.getCnn2dDataFormat(); + return convolutionLayer.getConvFormat(); } else if(layer instanceof SubsamplingLayer) { SubsamplingLayer subsamplingLayer = (SubsamplingLayer) layer; - return subsamplingLayer.getCnn2dDataFormat(); + return subsamplingLayer.getDataFormat(); } else if(layer instanceof SpaceToBatchLayer) { SpaceToBatchLayer spaceToBatchLayer = (SpaceToBatchLayer) layer; - return spaceToBatchLayer.getFormat(); + return spaceToBatchLayer.getDataFormat(); } else if(layer instanceof Upsampling2D) { Upsampling2D upsampling2D = (Upsampling2D) layer; - return upsampling2D.getFormat(); + return upsampling2D.getDataFormat(); } else if(layer instanceof SpaceToDepthLayer) { SpaceToDepthLayer spaceToDepthLayer = (SpaceToDepthLayer) layer; return spaceToDepthLayer.getDataFormat(); @@ -223,13 +222,13 @@ public class ConvolutionUtils { return zeroPaddingLayer.getDataFormat(); } else if(layer instanceof SeparableConvolution2D) { SeparableConvolution2D separableConvolution2D = (SeparableConvolution2D) layer; - return separableConvolution2D.getCnn2dDataFormat(); + return separableConvolution2D.getDataFormat(); } else if(layer instanceof Deconvolution2D) { Deconvolution2D deconvolution2D = (Deconvolution2D) layer; - return deconvolution2D.getCnn2dDataFormat(); + return deconvolution2D.getConvFormat(); } else if(layer instanceof DepthwiseConvolution2D) { DepthwiseConvolution2D depthwiseConvolution2D = (DepthwiseConvolution2D) layer; - return depthwiseConvolution2D.getCnn2dDataFormat(); + return depthwiseConvolution2D.getDataFormat(); } else if(layer instanceof Cropping2D) { Cropping2D cropping2D = (Cropping2D) layer; return cropping2D.getDataFormat(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java index b2e10ece5..ea39214a1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/CrashReportingUtil.java @@ -470,7 +470,7 @@ public class CrashReportingUtil { sb.append(String.format(format, "Idx", "Name", "ILayer Type", "ILayer # Parameters", "ILayer Parameter Memory")).append("\n"); for(Layer layer : layers){ long numParams = layer.numParams(); - sb.append(String.format(format, layer.getIndex(), layer.getLayerConfiguration().getLayerName(), + sb.append(String.format(format, layer.getIndex(), layer.getLayerConfiguration().getName(), layer.getClass().getSimpleName(), numParams, fBytes(numParams * bytesPerElement))).append("\n"); } @@ -503,7 +503,7 @@ public class CrashReportingUtil { } int idx = l.getIndex(); - String layerName = l.getLayerConfiguration().getLayerName(); + String layerName = l.getLayerConfiguration().getName(); if(layerName == null) layerName = String.valueOf(idx); @@ -567,7 +567,7 @@ public class CrashReportingUtil { bytes = 0; } totalActivationBytes += bytes; - sb.append(String.format(format, i, layers[i].getLayerConfiguration().getLayerName(), layers[i].getClass().getSimpleName(), + sb.append(String.format(format, i, layers[i].getLayerConfiguration().getName(), layers[i].getClass().getSimpleName(), inputTypes.get(i), Arrays.toString(shape), (numElements < 0 ? "" : String.valueOf(numElements)), fBytes(bytes))).append("\n"); last = bytes; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java index f19dd8a47..61cd18690 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/NetworkUtils.java @@ -237,7 +237,7 @@ public class NetworkUtils { private static void setLearningRate(ComputationGraph net, double newLr, ISchedule lrSchedule) { org.deeplearning4j.nn.api.Layer[] layers = net.getLayers(); for (int i = 0; i < layers.length; i++) { - setLearningRate(net, layers[i].getLayerConfiguration().getLayerName(), newLr, lrSchedule, false); + setLearningRate(net, layers[i].getLayerConfiguration().getName(), newLr, lrSchedule, false); } refreshUpdater(net); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java index 1829fbd40..f22fc2159 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/OutputLayerUtil.java @@ -78,21 +78,21 @@ public class OutputLayerUtil { boolean isLossLayer = false; if (layer instanceof BaseOutputLayer && !(layer instanceof OCNNOutputLayer)) { activation = ((BaseOutputLayer) layer).getActivationFn(); - loss = ((BaseOutputLayer) layer).getLossFn(); + loss = ((BaseOutputLayer) layer).getLossFunction(); nOut = ((BaseOutputLayer) layer).getNOut(); } else if (layer instanceof LossLayer) { activation = ((LossLayer) layer).getActivationFn(); - loss = ((LossLayer) layer).getLossFn(); + loss = ((LossLayer) layer).getLossFunction(); nOut = ((LossLayer) layer).getNOut(); isLossLayer = true; } else if (layer instanceof RnnLossLayer) { activation = ((RnnLossLayer) layer).getActivationFn(); - loss = ((RnnLossLayer) layer).getLossFn(); + loss = ((RnnLossLayer) layer).getLossFunction(); nOut = ((RnnLossLayer) layer).getNOut(); isLossLayer = true; } else if (layer instanceof CnnLossLayer) { activation = ((CnnLossLayer) layer).getActivationFn(); - loss = ((CnnLossLayer) layer).getLossFn(); + loss = ((CnnLossLayer) layer).getLossFunction(); nOut = ((CnnLossLayer) layer).getNOut(); isLossLayer = true; } else { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java index 4723211b9..a72a10f5d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/TimeSeriesUtils.java @@ -444,7 +444,7 @@ public class TimeSeriesUtils { */ public static RNNFormat getFormatFromRnnLayer(LayerConfiguration layer){ if(layer instanceof BaseRecurrentLayer){ - return ((BaseRecurrentLayer) layer).getRnnDataFormat(); + return ((BaseRecurrentLayer) layer).getDataFormat(); } else if(layer instanceof MaskZeroLayer){ return getFormatFromRnnLayer(((MaskZeroLayer) layer).getUnderlying()); } else if(layer instanceof Bidirectional){ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties b/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties index 51c081db4..f642801f7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties +++ b/cavis-dnn/cavis-dnn-nn/src/main/resources/simplelogger.properties @@ -19,7 +19,8 @@ # # -org.slf4j.simpleLogger.defaultLogLevel = debug +org.slf4j.simpleLogger.defaultLogLevel = info org.slf4j.simpleLogger.log.org.deeplearning4j.optimize.listeners = info -org.slf4j.simplelogger.log.org.nd4j.linalg.dataset = info \ No newline at end of file +org.slf4j.simplelogger.log.org.nd4j.linalg.dataset = info +org.slf4j.simplelogger.log.org.nd4j.jita.workspace = info diff --git a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java index 4529cc26e..73cc5ab35 100644 --- a/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java +++ b/cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/api/dnnTest.java @@ -51,8 +51,8 @@ class dnnTest { @Test void testFFLayer() { int numFeatures = 600; - int batchSize = 5000; - int numRows = 10000; + int batchSize = 50000; + int numRows = 100000; AtomicInteger cnt = new AtomicInteger(0); FloatsDataSetIterator iterator = new FloatsDataSetIterator(floatIterable(numRows, numFeatures), batchSize); @@ -72,7 +72,7 @@ class dnnTest { /** * new * DenseLayer.Builder().nIn(INPUT).nOut(X_DIM*Y_DIM*CHANNELS).weightInit(WeightInit.NORMAL).build(), - * new ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), new + * ActivationLayer.builder(new ActivationLReLU(0.2)).build(), new * DenseLayer.Builder().nIn(X_DIM*Y_DIM*CHANNELS).nOut(X_DIM*Y_DIM).build(), new * ActivationLayer.Builder(new ActivationLReLU(0.2)).build(), new * DenseLayer.Builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM).build(), new @@ -86,19 +86,19 @@ class dnnTest { .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(100) .weightInit(new WeightInitXavier()) - .activation(new ActivationSigmoid()) + .activation(Activation.SIGMOID) .weightNoise(new WeightNoise(new NormalDistribution(0.5, 0.7))) // .inputType(InputType.convolutional(28, 28, 1)) - .layer(new DenseLayer.Builder().nIn(numFeatures).nOut(20).build()) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) - .layer(new DenseLayer.Builder().nIn(20).nOut(40).build()) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) - .layer(new DenseLayer.Builder().nIn(40).nOut(12).build()) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) - .layer(new DenseLayer.Builder().nIn(12).nOut(8).build()) - .layer(new ActivationLayer.Builder(new ActivationLReLU(0.2)).build()) + .layer(DenseLayer.builder().nIn(numFeatures).nOut(20).build()) + .layer(ActivationLayer.builder(new ActivationLReLU(0.2)).build()) + .layer(DenseLayer.builder().nIn(20).nOut(40).build()) + .layer(ActivationLayer.builder(new ActivationLReLU(0.2)).build()) + .layer(DenseLayer.builder().nIn(40).nOut(12).build()) + .layer(ActivationLayer.builder(new ActivationLReLU(0.2)).build()) + .layer(DenseLayer.builder().nIn(12).nOut(8).build()) + .layer(ActivationLayer.builder(new ActivationLReLU(0.2)).build()) .layer( - new OutputLayer.Builder(LossFunctions.LossFunction.SQUARED_LOSS) + OutputLayer.builder(LossFunctions.LossFunction.SQUARED_LOSS) .activation(Activation.SIGMOID) .nOut(numFeatures) .build()) @@ -108,7 +108,7 @@ class dnnTest { net.addTrainingListeners(new ScoreToChartListener("dnnTest")); FloatsDataSetIterator dset = new FloatsDataSetIterator(floatIterable(numRows, numFeatures), batchSize); - for (int i = 0; i < 20000000; i++) { + for (int i = 0; i < 20; i++) { net.fit(dset); System.out.println("Score: " + net.getScore()); } diff --git a/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java b/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java index cfeaf0821..5e503ad9b 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper-parameterserver/src/test/java/org/deeplearning4j/parallelism/parameterserver/ParameterServerParallelWrapperTest.java @@ -60,18 +60,18 @@ public class ParameterServerParallelWrapperTest extends BaseDL4JTest { .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)).list() - .layer(0, new ConvolutionLayer.Builder(5, 5) + .layer(0, ConvolutionLayer.builder(5, 5) //nIn and nOut specify channels. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) + .layer(2, ConvolutionLayer.builder(5, 5) //Note that nIn needed be specified in later layers .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(3, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(4, DenseLayer.builder().activation(Activation.RELU).nOut(500).build()) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, 1)); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java index d952ebf4a..585f53c0d 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/InplaceParallelInferenceTest.java @@ -43,8 +43,8 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { val conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") - .layer("out1", new OutputLayer.Builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") + .layer("out0", OutputLayer.builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") + .layer("out1", OutputLayer.builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") .setOutputs("out0", "out1") .build(); @@ -71,9 +71,9 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { val conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") - .layer("out1", new OutputLayer.Builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") - .layer("out2", new OutputLayer.Builder().nIn(nIn).nOut(8).activation(Activation.SOFTMAX).build(), "in") + .layer("out0", OutputLayer.builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") + .layer("out1", OutputLayer.builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") + .layer("out2", OutputLayer.builder().nIn(nIn).nOut(8).activation(Activation.SOFTMAX).build(), "in") .setOutputs("out0", "out1", "out2") .build(); @@ -104,8 +104,8 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { val conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") - .layer("out1", new OutputLayer.Builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") + .layer("out0", OutputLayer.builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") + .layer("out1", OutputLayer.builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") .setOutputs("out0", "out1") .build(); @@ -137,8 +137,8 @@ public class InplaceParallelInferenceTest extends BaseDL4JTest { val conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") - .layer("out1", new OutputLayer.Builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") + .layer("out0", OutputLayer.builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") + .layer("out1", OutputLayer.builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") .setOutputs("out0", "out1") .build(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java index cdf908911..b9b8848be 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelInferenceTest.java @@ -417,8 +417,8 @@ public class ParallelInferenceTest extends BaseDL4JTest { .activation(Activation.TANH) .seed(12345) .list() - .layer(new LSTM.Builder().nIn(nIn).nOut(5).build()) - .layer(new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()) + .layer(LSTM.builder().nIn(nIn).nOut(5).build()) + .layer(RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -462,8 +462,8 @@ public class ParallelInferenceTest extends BaseDL4JTest { .activation(Activation.TANH) .seed(12345) .list() - .layer(new LSTM.Builder().nIn(nIn).nOut(5).build()) - .layer(new RnnOutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()) + .layer(LSTM.builder().nIn(nIn).nOut(5).build()) + .layer(RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -530,8 +530,8 @@ public class ParallelInferenceTest extends BaseDL4JTest { .activation(Activation.TANH) .seed(12345) .list() - .layer(new ConvolutionLayer.Builder().nIn(nIn).nOut(5).build()) - .layer(new CnnLossLayer.Builder().activation(Activation.SOFTMAX).build()) + .layer(ConvolutionLayer.builder().nIn(nIn).nOut(5).build()) + .layer(CnnLossLayer.builder().activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -579,8 +579,8 @@ public class ParallelInferenceTest extends BaseDL4JTest { .seed(12345) .convolutionMode(ConvolutionMode.Same) .list() - .layer(new ConvolutionLayer.Builder().nIn(nIn).nOut(5).build()) - .layer(new CnnLossLayer.Builder().activation(Activation.SOFTMAX).build()) + .layer(ConvolutionLayer.builder().nIn(nIn).nOut(5).build()) + .layer(CnnLossLayer.builder().activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -628,8 +628,8 @@ public class ParallelInferenceTest extends BaseDL4JTest { .activation(Activation.TANH) .seed(12345) .list() - .layer(new DenseLayer.Builder().nIn(nIn).nOut(5).build()) - .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()) + .layer(DenseLayer.builder().nIn(nIn).nOut(5).build()) + .layer(OutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -692,9 +692,9 @@ public class ParallelInferenceTest extends BaseDL4JTest { .activation(Activation.TANH) .seed(12345) .list() - .layer(new LSTM.Builder().nIn(nIn).nOut(5).build()) - .layer(new GlobalPoolingLayer(PoolingType.AVG)) - .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()) + .layer(LSTM.builder().nIn(nIn).nOut(5).build()) + .layer(GlobalPoolingLayer.builder(PoolingType.AVG).build()) + .layer(OutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -759,8 +759,8 @@ public class ParallelInferenceTest extends BaseDL4JTest { val conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") - .layer("out1", new OutputLayer.Builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") + .layer("out0", OutputLayer.builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") + .layer("out1", OutputLayer.builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") .setOutputs("out0", "out1") .build(); @@ -801,9 +801,9 @@ public class ParallelInferenceTest extends BaseDL4JTest { val conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).build(), "in") - .layer("out1", new OutputLayer.Builder().nIn(nIn).nOut(6).build(), "in") - .layer("out2", new OutputLayer.Builder().nIn(nIn).nOut(8).build(), "in") + .layer("out0", OutputLayer.builder().nIn(nIn).nOut(4).build(), "in") + .layer("out1", OutputLayer.builder().nIn(nIn).nOut(6).build(), "in") + .layer("out2", OutputLayer.builder().nIn(nIn).nOut(8).build(), "in") .setOutputs("out0", "out1", "out2") .build(); @@ -832,8 +832,8 @@ public class ParallelInferenceTest extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .layer("out0", new OutputLayer.Builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") - .layer("out1", new OutputLayer.Builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") + .layer("out0", OutputLayer.builder().nIn(nIn).nOut(4).activation(Activation.SOFTMAX).build(), "in") + .layer("out1", OutputLayer.builder().nIn(nIn).nOut(6).activation(Activation.SOFTMAX).build(), "in") .setOutputs("out0", "out1") .build(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java index 471cafbfd..e6c853e28 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/ParallelWrapperTest.java @@ -74,18 +74,18 @@ public class ParallelWrapperTest extends BaseDL4JTest { //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)).list() - .layer(0, new ConvolutionLayer.Builder(5, 5) + .layer(0, ConvolutionLayer.builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) + .layer(2, ConvolutionLayer.builder(5, 5) //Note that nIn needed be specified in later layers .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(3, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(4, DenseLayer.builder().activation(Activation.RELU).nOut(500).build()) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, nChannels)); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java index 4389d8f68..21c0cf976 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestListeners.java @@ -56,7 +56,7 @@ public class TestListeners extends BaseDL4JTest { TestListener.clearCounts(); NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list().layer(0, - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) .activation(Activation.TANH).build()); NeuralNetConfiguration conf = builder.build(); @@ -72,7 +72,7 @@ public class TestListeners extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder() .addInputs("in").addLayer("0", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) .activation(Activation.TANH).build(), "in") .setOutputs("0").build(); @@ -88,7 +88,7 @@ public class TestListeners extends BaseDL4JTest { TestListener.clearCounts(); NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list().layer(0, - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) .activation(Activation.TANH).build()); NeuralNetConfiguration conf = builder.build(); @@ -110,7 +110,7 @@ public class TestListeners extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder() .addInputs("in").addLayer("0", - new OutputLayer.Builder(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10) .activation(Activation.TANH).build(), "in") .setOutputs("0").build(); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java index a003d99fb..2c957c389 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStopping.java @@ -60,7 +60,7 @@ public class TestParallelEarlyStopping extends BaseDL4JTest { // .updater(Updater.SGD) // .weightInit(WeightInit.XAVIER) // .list() - // .layer(0,new OutputLayer.Builder().nIn(4).nOut(3).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + // .layer(0,OutputLayer.builder().nIn(4).nOut(3).lossFunction(LossFunctions.LossFunction.MCXENT).build()) // // .build(); // MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -103,7 +103,7 @@ public class TestParallelEarlyStopping extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -136,7 +136,7 @@ public class TestParallelEarlyStopping extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) + .layer(0, OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java index 66a9b76c4..663fcef15 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/TestParallelEarlyStoppingUI.java @@ -58,8 +58,8 @@ public class TestParallelEarlyStoppingUI extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1, new OutputLayer.Builder().nIn(3).nOut(3) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1, OutputLayer.builder().nIn(3).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContextTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContextTest.java index 306338b11..398a48258 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContextTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/DefaultTrainerContextTest.java @@ -54,18 +54,18 @@ public class DefaultTrainerContextTest extends BaseDL4JTest { //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) - .layer(0, new ConvolutionLayer.Builder(5, 5) + .layer(0, ConvolutionLayer.builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) + .layer(2, ConvolutionLayer.builder(5, 5) //Note that nIn needed be specified in later layers .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(3, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(4, DenseLayer.builder().activation(Activation.RELU).nOut(500).build()) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, nChannels)); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContextTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContextTest.java index b61f820f7..8e8c34cdf 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContextTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/factory/SymmetricTrainerContextTest.java @@ -55,18 +55,18 @@ public class SymmetricTrainerContextTest extends BaseDL4JTest { //.learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(0.001).lrPolicyPower(0.75) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)).list() - .layer(0, new ConvolutionLayer.Builder(5, 5) + .layer(0, ConvolutionLayer.builder(5, 5) //nIn and nOut specify depth. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) + .layer(2, ConvolutionLayer.builder(5, 5) //Note that nIn needed be specified in later layers .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(3, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(4, DenseLayer.builder().activation(Activation.RELU).nOut(500).build()) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, nChannels)); diff --git a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java index da27c4c63..f67f63f88 100644 --- a/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java +++ b/cavis-dnn/cavis-dnn-parallelwrapper/src/test/java/org/deeplearning4j/parallelism/main/ParallelWrapperMainTest.java @@ -70,18 +70,18 @@ public class ParallelWrapperMainTest extends BaseDL4JTest { .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)).list() - .layer(0, new ConvolutionLayer.Builder(5, 5) + .layer(0, ConvolutionLayer.builder(5, 5) //nIn and nOut specify channels. nIn here is the nChannels and nOut is the number of filters to be applied .nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()) - .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(2, new ConvolutionLayer.Builder(5, 5) + .layer(2, ConvolutionLayer.builder(5, 5) //Note that nIn needed be specified in later layers .stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()) - .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(3, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) .stride(2, 2).build()) - .layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()) - .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(4, DenseLayer.builder().activation(Activation.RELU).nOut(500).build()) + .layer(5, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutionalFlat(28, 28, nChannels)); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java index e8412455a..17a1c7335 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/BaseSparkTest.java @@ -124,9 +124,9 @@ public abstract class BaseSparkTest extends BaseDL4JTest implements Serializable protected NeuralNetConfiguration getBasicConf() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .updater(new Nesterovs(0.1, 0.9)).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(nOut) .activation(Activation.SOFTMAX).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java index b3ed152dc..fe67e3d3c 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSpark.java @@ -70,7 +70,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -126,7 +126,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(10.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY) + .layer(0, OutputLayer.builder().nIn(4).nOut(3).activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -165,7 +165,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -211,7 +211,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -248,7 +248,7 @@ public class TestEarlyStoppingSpark extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).list() - .layer(0, new OutputLayer.Builder().nIn(4).nOut(3) + .layer(0, OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java index 1a196af4f..4c17adff0 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestEarlyStoppingSparkCompGraph.java @@ -74,7 +74,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); @@ -128,7 +128,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(2.0)) //Intentionally huge LR .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3).activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); @@ -169,7 +169,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-6)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); @@ -217,7 +217,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.0)).weightInit(WeightInit.XAVIER).graphBuilder() .addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); @@ -256,7 +256,7 @@ public class TestEarlyStoppingSparkCompGraph extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd()).weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(4).nOut(3) + .addLayer("0", OutputLayer.builder().nIn(4).nOut(3) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("0").build(); ComputationGraph net = new ComputationGraph(conf); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestKryo.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestKryo.java index 7815303f0..6bb3bd8b5 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestKryo.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/TestKryo.java @@ -68,7 +68,7 @@ public class TestKryo extends BaseSparkKryoTest { m.put(0, 0.5); m.put(10, 0.1); NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() - .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) + .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).layer(0, OutputLayer.builder().nIn(10).nOut(10).build()) .build(); testSerialization(mlc, si); @@ -78,23 +78,23 @@ public class TestKryo extends BaseSparkKryoTest { .dist(new UniformDistribution(-1, 1)) .updater(new Adam(new MapSchedule(ScheduleType.ITERATION,m)))) .graphBuilder() - .addInputs("in").addLayer("out", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in") + .addInputs("in").addLayer("out", OutputLayer.builder().nIn(10).nOut(10).build(), "in") .setOutputs("out").build(); testSerialization(cgc, si); //Check main layers: - LayerConfiguration[] layers = new LayerConfiguration[] {new OutputLayer.Builder().nIn(10).nOut(10).build(), - new RnnOutputLayer.Builder().nIn(10).nOut(10).build(), new LossLayer.Builder().build(), - new CenterLossOutputLayer.Builder().nIn(10).nOut(10).build(), - new DenseLayer.Builder().nIn(10).nOut(10).build(), - new ConvolutionLayer.Builder().nIn(10).nOut(10).build(), new SubsamplingLayer.Builder().build(), - new Convolution1DLayer.Builder(2, 2).nIn(10).nOut(10).build(), - new ActivationLayer.Builder().activation(Activation.TANH).build(), - new GlobalPoolingLayer.Builder().build(), new GravesLSTM.Builder().nIn(10).nOut(10).build(), - new LSTM.Builder().nIn(10).nOut(10).build(), new DropoutLayer.Builder(0.5).build(), - new BatchNormalization.Builder().build(), new LocalResponseNormalization.Builder().build()}; + LayerConfiguration[] layers = new LayerConfiguration[] {OutputLayer.builder().nIn(10).nOut(10).build(), + RnnOutputLayer.builder().nIn(10).nOut(10).build(), LossLayer.builder().build(), + CenterLossOutputLayer.builder().nIn(10).nOut(10).build(), + DenseLayer.builder().nIn(10).nOut(10).build(), + ConvolutionLayer.builder().nIn(10).nOut(10).build(), SubsamplingLayer.builder().build(), + Convolution1DLayer.builder(2, 2).nIn(10).nOut(10).build(), + ActivationLayer.builder().activation(Activation.TANH).build(), + GlobalPoolingLayer.builder().build(), GravesLSTM.builder().nIn(10).nOut(10).build(), + LSTM.builder().nIn(10).nOut(10).build(), DropoutLayer.builder(0.5).build(), + BatchNormalization.builder().build(), LocalResponseNormalization.builder().build()}; for (LayerConfiguration l : layers) { testSerialization(l, si); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java index cc32d9723..97b53a077 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/datavec/TestPreProcessedData.java @@ -85,9 +85,9 @@ public class TestPreProcessedData extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(4).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX) .build()) .build(); @@ -136,9 +136,9 @@ public class TestPreProcessedData extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(4).nOut(3) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX) .build(), "0") @@ -190,9 +190,9 @@ public class TestPreProcessedData extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(4).nOut(3) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX) .build(), "0") diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java index 402ecb46a..397a04d4e 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/TestKryoWarning.java @@ -40,7 +40,7 @@ public class TestKryoWarning { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().list() - .layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) + .layer(0, OutputLayer.builder().nIn(10).nOut(10).build()) .build(); TrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1).build(); @@ -57,7 +57,7 @@ public class TestKryoWarning { try { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("0", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("0") + .addLayer("0", OutputLayer.builder().nIn(10).nOut(10).build(), "in").setOutputs("0") .build(); TrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1).build(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java index d8b0ddb0a..78023db6a 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/customlayer/TestCustomLayer.java @@ -52,9 +52,9 @@ public class TestCustomLayer extends BaseSparkTest { //Custom layers are tested more extensively in dl4j core NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) .layer(1, new CustomLayer(3.14159)).layer(2, - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nIn(10).nOut(10).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java index 20727ed03..bd2a3b808 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/graph/TestSparkComputationGraph.java @@ -78,8 +78,8 @@ public class TestSparkComputationGraph extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in") - .addLayer("l0", new DenseLayer.Builder().nIn(4).nOut(10).build(), "in") - .addLayer("l1", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("l0", DenseLayer.builder().nIn(4).nOut(10).build(), "in") + .addLayer("l1", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(10).nOut(2).build(), "l0") .setOutputs("l1").build(); @@ -106,8 +106,8 @@ public class TestSparkComputationGraph extends BaseSparkTest { ComputationGraphConfiguration config = NeuralNetConfiguration.builder() .updater(new Sgd(0.1)) .graphBuilder().addInputs("in") - .addLayer("dense", new DenseLayer.Builder().nIn(4).nOut(2).build(), "in").addLayer("out", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) + .addLayer("dense", DenseLayer.builder().nIn(4).nOut(2).build(), "in").addLayer("out", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(3) .build(), "dense") .setOutputs("out").build(); @@ -140,9 +140,9 @@ public class TestSparkComputationGraph extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.1) .seed(123).updater(new Nesterovs(0.1, 0.9)).graphBuilder() .addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(nOut) .activation(Activation.SOFTMAX).build(), "0") @@ -219,9 +219,9 @@ public class TestSparkComputationGraph extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(Updater.RMSPROP.getIUpdaterWithDefaultConfig()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(4).nOut(4) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(4).nOut(3).activation(Activation.SOFTMAX) .build(), "0") @@ -420,8 +420,8 @@ public class TestSparkComputationGraph extends BaseSparkTest { .graphBuilder() .addInputs("input1", "input2") .addVertex("avg",new ElementWiseVertex(ElementWiseVertex.Op.Average),"input1","input2") - .addLayer("dense",new DenseLayer.Builder().dropOut(0.9).nIn(featSize).nOut(featSize / 2).build(),"avg") - .addLayer("output",new OutputLayer.Builder().nIn(featSize / 2).nOut(2).lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).hasBias(false).build(),"dense") + .addLayer("dense",DenseLayer.builder().dropOut(0.9).nIn(featSize).nOut(featSize / 2).build(),"avg") + .addLayer("output",OutputLayer.builder().nIn(featSize / 2).nOut(2).lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).hasBias(false).build(),"dense") .setOutputs("output") .build(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java index 688135888..9563a98eb 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/misc/TestFrozenLayers.java @@ -23,7 +23,6 @@ package org.deeplearning4j.spark.impl.misc; import org.apache.spark.api.java.JavaRDD; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.DenseLayer.Builder; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.FrozenLayer; @@ -56,7 +55,7 @@ public class TestFrozenLayers extends BaseSparkTest { @Test public void testSparkFrozenLayers() { - NeuralNetConfiguration.NeuralNetConfigurationBuilder overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) + var overallConf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.TANH); FineTuneConfiguration finetune = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); @@ -65,10 +64,10 @@ public class TestFrozenLayers extends BaseSparkTest { int nOut = 3; MultiLayerNetwork origModel = new MultiLayerNetwork((NeuralNetConfiguration) overallConf.clone() - .layer(0, new Builder().nIn(6).nOut(5).build()) - .layer(1, new Builder().nIn(5).nOut(4).build()) - .layer(2, new Builder().nIn(4).nOut(3).build()) - .layer(3, new OutputLayer.Builder( + .layer(0, DenseLayer.builder().nIn(6).nOut(5).build()) + .layer(1, DenseLayer.builder().nIn(5).nOut(4).build()) + .layer(2, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(3, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); @@ -141,10 +140,10 @@ public class TestFrozenLayers extends BaseSparkTest { ComputationGraph origModel = new ComputationGraph(NeuralNetConfiguration.builder().updater(new Sgd(0.1)) .activation(Activation.TANH).graphBuilder().addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(6).nOut(5).build(), "in") - .addLayer("1", new DenseLayer.Builder().nIn(5).nOut(4).build(), "0") - .addLayer("2", new DenseLayer.Builder().nIn(4).nOut(3).build(), "1") - .addLayer("3", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("0", DenseLayer.builder().nIn(6).nOut(5).build(), "in") + .addLayer("1", DenseLayer.builder().nIn(5).nOut(4).build(), "0") + .addLayer("2", DenseLayer.builder().nIn(4).nOut(3).build(), "1") + .addLayer("3", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build(), "2") diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java index 7a638199c..b4e24d459 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java @@ -57,8 +57,8 @@ public class TestMiscFunctions extends BaseSparkTest { public void testFeedForwardWithKey() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(3).build()) - .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) .activation(Activation.SOFTMAX).build()) .build(); @@ -108,9 +108,9 @@ public class TestMiscFunctions extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .list() - .layer( new LSTM.Builder().nIn(4).nOut(3).build()) - .layer(new GlobalPoolingLayer(PoolingType.AVG)) - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) + .layer( LSTM.builder().nIn(4).nOut(3).build()) + .layer(GlobalPoolingLayer.builder(PoolingType.AVG).build()) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3) .activation(Activation.SOFTMAX).build()) .build(); @@ -163,9 +163,9 @@ public class TestMiscFunctions extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER) .graphBuilder().addInputs("in1", "in2") - .addLayer("0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in1") - .addLayer("1", new DenseLayer.Builder().nIn(4).nOut(3).build(), "in2").addLayer("2", - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(6).nOut(3) + .addLayer("0", DenseLayer.builder().nIn(4).nOut(3).build(), "in1") + .addLayer("1", DenseLayer.builder().nIn(4).nOut(3).build(), "in2").addLayer("2", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(6).nOut(3) .activation(Activation.SOFTMAX).build(), "0", "1") .setOutputs("2").build(); @@ -219,8 +219,8 @@ public class TestMiscFunctions extends BaseSparkTest { int nIn = 10; - NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution( new GaussianReconstructionDistribution(Activation.IDENTITY)) .nIn(nIn).nOut(5).encoderLayerSizes(12).decoderLayerSizes(13).build()) @@ -259,8 +259,8 @@ public class TestMiscFunctions extends BaseSparkTest { int nIn = 10; NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() - .list().layer(0, - new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, + org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new LossFunctionWrapper( Activation.IDENTITY, new LossMSE())) .nIn(nIn).nOut(5).encoderLayerSizes(12).decoderLayerSizes(13) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java index 7de0dc285..a819df930 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestSparkDl4jMultiLayer.java @@ -103,9 +103,9 @@ public class TestSparkDl4jMultiLayer extends BaseSparkTest { .updater(new Adam(1e-3)) .l2(1e-5) .list() - .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(500).build()) - .layer(1, new DenseLayer.Builder().nIn(500).nOut(100).build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(0, DenseLayer.builder().nIn(28 * 28).nOut(500).build()) + .layer(1, DenseLayer.builder().nIn(500).nOut(100).build()) + .layer(2, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX).nIn(100).nOut(10).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java index 7ba980f62..746ecfc0c 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestCompareParameterAveragingSparkVsSingleMachine.java @@ -66,7 +66,7 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder() + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()).layer(1, OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build()) .build(); return conf; @@ -77,11 +77,11 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).list() - .layer(0, new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) + .layer(0, ConvolutionLayer.builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) .activation(Activation.TANH).build()) - .layer(1, new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) + .layer(1, ConvolutionLayer.builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0) .activation(Activation.TANH).build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10) .build()) .inputType(InputType.convolutional(10, 10, 3)).build(); return conf; @@ -93,8 +93,8 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).graphBuilder() .addInputs("in") - .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", - new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) + .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in").addLayer("1", + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10) .nOut(10).build(), "0") .setOutputs("1").build(); @@ -107,11 +107,11 @@ public class TestCompareParameterAveragingSparkVsSingleMachine { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).graphBuilder() .addInputs("in") - .addLayer("0", new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1) + .addLayer("0", ConvolutionLayer.builder().nOut(3).kernelSize(2, 2).stride(1, 1) .padding(0, 0).activation(Activation.TANH).build(), "in") - .addLayer("1", new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1) + .addLayer("1", ConvolutionLayer.builder().nOut(3).kernelSize(2, 2).stride(1, 1) .padding(0, 0).activation(Activation.TANH).build(), "0") - .addLayer("2", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10) + .addLayer("2", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10) .build(), "1") .setOutputs("2").setInputTypes(InputType.convolutional(10, 10, 3)) .build(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java index e4a720a51..c26a1b7cd 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/paramavg/TestSparkMultiLayerParameterAveraging.java @@ -122,9 +122,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { DataSet d = new IrisDataSetIterator(150, 150).next(); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(100).nOut(3) .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER) .build()) @@ -160,9 +160,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { .updater(new Adam(1e-6)) .weightInit(WeightInit.XAVIER) .list() - .layer(new BatchNormalization.Builder().nIn(4).nOut(4).build()) - .layer(new DenseLayer.Builder().nIn(4).nOut(32).activation(Activation.RELU).build()) - .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(32).nOut(3) + .layer(BatchNormalization.builder().nIn(4).nOut(4).build()) + .layer(DenseLayer.builder().nIn(4).nOut(32).activation(Activation.RELU).build()) + .layer(org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(32).nOut(3) .activation(Activation.SOFTMAX).build()) .build(); @@ -270,9 +270,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { } NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder( LossFunctions.LossFunction.MSE).nIn(3).nOut(nOut).activation(Activation.SOFTMAX) .build()) .build(); @@ -295,9 +295,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().l1(0.1).l2(0.1) .seed(123).updater(new Nesterovs(0.1, 0.9)).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(nOut) .activation(Activation.SOFTMAX).build()) .build(); @@ -384,9 +384,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build()) .build(); @@ -448,9 +448,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build()) .build(); @@ -518,9 +518,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build()) .build(); @@ -607,9 +607,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build(), "0") @@ -680,9 +680,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(4).nOut(4) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(4).nOut(3).activation(Activation.SOFTMAX) .build()) .build(); @@ -764,9 +764,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build()) .build(); @@ -815,9 +815,9 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .graphBuilder().addInputs("in") - .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) + .addLayer("0", org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build(), "in") - .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .addLayer("1", org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build(), "0") @@ -855,7 +855,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .weightInit(WeightInit.XAVIER).list() - .layer(0, new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12) + .layer(0, VariationalAutoencoder.builder().nIn(8).nOut(10).encoderLayerSizes(12) .decoderLayerSizes(13).reconstructionDistribution( new GaussianReconstructionDistribution(Activation.IDENTITY)) .build()) @@ -891,7 +891,7 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { Nd4j.getRandom().setSeed(12345); ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345).updater(new RmsProp()) .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in") - .addLayer("0", new VariationalAutoencoder.Builder().nIn(8).nOut(10).encoderLayerSizes(12) + .addLayer("0", VariationalAutoencoder.builder().nIn(8).nOut(10).encoderLayerSizes(12) .decoderLayerSizes(13).reconstructionDistribution( new GaussianReconstructionDistribution(Activation.IDENTITY)) .build(), "in") @@ -931,8 +931,8 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).build()) - .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(layerSize).build()) + .layer(1, OutputLayer.builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).lossFunction( LossFunctions.LossFunction.MCXENT) .build()) @@ -986,8 +986,8 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().weightInit(WeightInit.XAVIER).list() - .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).build()) - .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut) + .layer(0, DenseLayer.builder().nIn(nIn).nOut(layerSize).build()) + .layer(1, OutputLayer.builder().nIn(layerSize).nOut(nOut) .activation(Activation.SOFTMAX).lossFunction( LossFunctions.LossFunction.MCXENT) .build()) @@ -1040,13 +1040,13 @@ public class TestSparkMultiLayerParameterAveraging extends BaseSparkTest { } NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3).build()) .build(); ComputationGraphConfiguration conf2 = NeuralNetConfiguration.builder() .graphBuilder() .addInputs("in") - .addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).build(), "in") + .addLayer("out", OutputLayer.builder().nIn(4).nOut(3).build(), "in") .setOutputs("out") .build(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java index 5b735e5a2..c807c4aaa 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/impl/stats/TestTrainingStatsCollection.java @@ -68,8 +68,8 @@ public class TestTrainingStatsCollection extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new OutputLayer.Builder().nIn(10).nOut(10).build()) + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, OutputLayer.builder().nIn(10).nOut(10).build()) .build(); int miniBatchSizePerWorker = 10; diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java index 1104f8667..76a84eebd 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-core/src/test/java/org/deeplearning4j/spark/ui/TestListeners.java @@ -61,9 +61,9 @@ public class TestListeners extends BaseSparkTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() - .layer(0, new DenseLayer.Builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) + .layer(0, DenseLayer.builder().nIn(4).nOut(100).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(100).nOut(3) .activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER) .build()) diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/BaseSparkTest.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/BaseSparkTest.java index f767e9e28..d91c89c83 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/BaseSparkTest.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/BaseSparkTest.java @@ -122,9 +122,9 @@ public abstract class BaseSparkTest extends BaseDL4JTest implements Serializable protected NeuralNetConfiguration getBasicConf() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(123) .updater(new Nesterovs(0.1, 0.9)) - .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(nIn).nOut(3) + .layer(0, org.deeplearning4j.nn.conf.layers.DenseLayer.builder().nIn(nIn).nOut(3) .activation(Activation.TANH).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( + .layer(1, org.deeplearning4j.nn.conf.layers.OutputLayer.builder().lossFunction( LossFunctions.LossFunction.MCXENT).nIn(3).nOut(nOut) .activation(Activation.SOFTMAX).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java index 144121116..5a270d016 100644 --- a/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java +++ b/cavis-dnn/cavis-dnn-spark/cavis-dnn-spark-parameterserver/src/test/java/org/deeplearning4j/spark/parameterserver/train/GradientSharingTrainingTest.java @@ -128,7 +128,7 @@ public class GradientSharingTrainingTest extends BaseSparkTest { .updater(new AMSGrad(0.1)) .graphBuilder() .addInputs("in") - .layer("out", new OutputLayer.Builder().nIn(784).nOut(10).activation(Activation.SOFTMAX) + .layer("out", OutputLayer.builder().nIn(784).nOut(10).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("out") .build(); @@ -263,15 +263,15 @@ public class GradientSharingTrainingTest extends BaseSparkTest { .weightInit(WeightInit.XAVIER) .seed(12345) .list() - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); } else { conf = NeuralNetConfiguration.builder() .weightInit(WeightInit.XAVIER) .seed(12345) .list() - .layer(new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()) - .layer(new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .layer(DenseLayer.builder().nIn(4).nOut(4).activation(Activation.TANH).build()) + .layer(OutputLayer.builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); } MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -349,7 +349,7 @@ public class GradientSharingTrainingTest extends BaseSparkTest { .updater(new AMSGrad(0.001)) .graphBuilder() .addInputs("in") - .layer("out", new OutputLayer.Builder().nIn(784).nOut(10).activation(Activation.SOFTMAX) + .layer("out", OutputLayer.builder().nIn(784).nOut(10).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build(), "in") .setOutputs("out") .build(); diff --git a/cavis-native/cavis-native-lib/build.gradle b/cavis-native/cavis-native-lib/build.gradle index 8ecbeafc1..9ae949f6c 100644 --- a/cavis-native/cavis-native-lib/build.gradle +++ b/cavis-native/cavis-native-lib/build.gradle @@ -235,7 +235,10 @@ chipList.each { thisChip -> /* Get VCVARS in case we want to build CUDA * MinGW64 g++ on MSYS is used otherwise */ - if (thisChip.equals('cuda') && osdetector.os.startsWith("win") && !VISUAL_STUDIO_INSTALL_DIR.isEmpty()) { + if (thisChip.equals('cuda') && osdetector.os.startsWith("win") + && project.hasProperty("skip-native") + && !project.getProperty("skip-native").equals("true") + && !VISUAL_STUDIO_INSTALL_DIR.isEmpty()) { def proc = ["cmd.exe", "/c", "${VISUAL_STUDIO_VCVARS_CMD} > nul && set"].execute() it.environmentVariables = it.environmentVariables ?: [:] def lines = proc.text.split("\\r?\\n") @@ -329,7 +332,8 @@ chipList.each { thisChip -> thisTask.properties = getBuildPlatform( thisChip, thisTask ) - if(thisChip.equals('cuda') && osdetector.os.startsWith("win") && !VISUAL_STUDIO_INSTALL_DIR.isEmpty()) { + if(thisChip.equals('cuda') && osdetector.os.startsWith("win") && project.hasProperty("skip-native") + && !project.getProperty("skip-native").equals("true") && !VISUAL_STUDIO_INSTALL_DIR.isEmpty()) { def proc = ["cmd.exe", "/c", "${VISUAL_STUDIO_VCVARS_CMD} > nul && where.exe cl.exe"].execute() def outp = proc.text def cl = outp.replace("\\", "\\\\").trim() diff --git a/cavis-ui/cavis-ui-common/src/main/java/org/deeplearning4j/ui/weights/ConvolutionalIterationListener.java b/cavis-ui/cavis-ui-common/src/main/java/org/deeplearning4j/ui/weights/ConvolutionalIterationListener.java index 44caf37c4..9195df8a7 100644 --- a/cavis-ui/cavis-ui-common/src/main/java/org/deeplearning4j/ui/weights/ConvolutionalIterationListener.java +++ b/cavis-ui/cavis-ui-common/src/main/java/org/deeplearning4j/ui/weights/ConvolutionalIterationListener.java @@ -145,7 +145,7 @@ public class ConvolutionalIterationListener extends BaseTrainingListener { throw new RuntimeException("layers.length != activations.size(). Got layers.length="+layers.length+", activations.size()="+activations.size()); for( int i=0; i paramkeys = l.getLayerConfiguration().initializer().paramKeys(l.getLayerConfiguration()); for (String s : paramkeys) { double lr = conf.getUpdaterByParam(s).getLearningRate(l.getIterationCount(), l.getEpochCount()); diff --git a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java index 24e5e1ed9..bc8bab8c7 100644 --- a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java +++ b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestStatsListener.java @@ -54,7 +54,7 @@ public class TestStatsListener extends BaseDL4JTest { NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, - new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(4).nOut(3).build()) .build(); diff --git a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java index 1dc5cb1a6..e7c0800b6 100644 --- a/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java +++ b/cavis-ui/cavis-ui-model/src/test/java/org/deeplearning4j/ui/stats/TestTransferStatsCollection.java @@ -43,8 +43,8 @@ public class TestTransferStatsCollection extends BaseDL4JTest { public void test() throws IOException { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) - .layer(1, new OutputLayer.Builder().activation(Activation.SOFTMAX).nIn(10).nOut(10).build()).build(); + .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) + .layer(1, OutputLayer.builder().activation(Activation.SOFTMAX).nIn(10).nOut(10).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); diff --git a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java index aebfaffa7..3455d97fe 100644 --- a/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java +++ b/cavis-ui/cavis-ui-vertx/src/main/java/org/deeplearning4j/ui/module/train/TrainModuleUtils.java @@ -23,13 +23,8 @@ package org.deeplearning4j.ui.module.train; import com.fasterxml.jackson.annotation.JsonIgnore; import lombok.AllArgsConstructor; import lombok.Data; -import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.graph.GraphVertex; -import org.deeplearning4j.nn.conf.graph.LayerVertex; import org.deeplearning4j.nn.conf.layers.*; -import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; -import org.deeplearning4j.nn.params.VariationalAutoencoderParamInitializer; import java.util.*; @@ -66,7 +61,7 @@ public class TrainModuleUtils { int layerIdx = 1; for (LayerConfiguration c : list) { LayerConfiguration layer = c; - String layerName = layer.getLayerName(); + String layerName = layer.getName(); if (layerName == null) layerName = "layer" + layerIdx; vertexNames.add(layerName); @@ -243,7 +238,7 @@ public class TrainModuleUtils { } else { //VAE or similar... LayerConfiguration layer = config.getFirstLayer(); - String layerName = layer.getLayerName(); + String layerName = layer.getName(); if (layerName == null) layerName = "layer0"; vertexNames.add(layerName); @@ -287,8 +282,8 @@ public class TrainModuleUtils { map.put("Pooling Type", layer1.getPoolingType().toString()); } else if (layer instanceof BaseOutputLayer) { BaseOutputLayer ol = (BaseOutputLayer) layer; - if(ol.getLossFn() != null) - map.put("Loss Function", ol.getLossFn().toString()); + if(ol.getLossFunction() != null) + map.put("Loss Function", ol.getLossFunction().toString()); } return map; diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java index 09dbe9846..606432176 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestRemoteReceiver.java @@ -134,8 +134,8 @@ public class TestRemoteReceiver extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(4).build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().activation(Activation.TANH).nIn(4).nOut(4).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(4).nOut(3).build()) .build(); diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java index d51f74aba..97b3b1456 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUI.java @@ -96,18 +96,18 @@ public class TestVertxUI extends BaseDL4JTest { .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(1e-5)) .layer(0, - new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(10, 11) + VariationalAutoencoder.builder().nIn(4).nOut(3).encoderLayerSizes(10, 11) .decoderLayerSizes(12, 13).weightInit(WeightInit.XAVIER) .pzxActivationFunction(Activation.IDENTITY) .reconstructionDistribution( new GaussianReconstructionDistribution()) .activation(Activation.LEAKYRELU).build()) - .layer(1, new VariationalAutoencoder.Builder().nIn(3).nOut(3).encoderLayerSizes(7) + .layer(1, VariationalAutoencoder.builder().nIn(3).nOut(3).encoderLayerSizes(7) .decoderLayerSizes(8).weightInit(WeightInit.XAVIER) .pzxActivationFunction(Activation.IDENTITY) .reconstructionDistribution(new GaussianReconstructionDistribution()) .activation(Activation.LEAKYRELU).build()) - .layer(2, new OutputLayer.Builder().nIn(3).nOut(3).build()) + .layer(2, OutputLayer.builder().nIn(3).nOut(3).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -135,8 +135,8 @@ public class TestVertxUI extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(4).build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().activation(Activation.TANH).nIn(4).nOut(4).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(4).nOut(3).build()) .build(); @@ -162,9 +162,9 @@ public class TestVertxUI extends BaseDL4JTest { uiServer.attach(ss); ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("L0", new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(4).build(), + .addLayer("L0", DenseLayer.builder().activation(Activation.TANH).nIn(4).nOut(4).build(), "in") - .addLayer("L1", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .addLayer("L1", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(4).nOut(3).build(), "L0") .setOutputs("L1").build(); @@ -184,9 +184,9 @@ public class TestVertxUI extends BaseDL4JTest { public void testAutoAttach() throws Exception { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().graphBuilder().addInputs("in") - .addLayer("L0", new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(4).build(), + .addLayer("L0", DenseLayer.builder().activation(Activation.TANH).nIn(4).nOut(4).build(), "in") - .addLayer("L1", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .addLayer("L1", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(4).nOut(3).build(), "L0") .setOutputs("L1").build(); diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java index e17681c4c..f8e300be4 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIManual.java @@ -97,11 +97,11 @@ public class TestVertxUIManual extends BaseDL4JTest { .updater(new Sgd(0.03)) .l2(1e-4) - .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(3) + .layer(0, DenseLayer.builder().nIn(numInputs).nOut(3) .build()) - .layer(1, new DenseLayer.Builder().nIn(3).nOut(3) + .layer(1, DenseLayer.builder().nIn(3).nOut(3) .build()) - .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(2, OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX) .nIn(3).nOut(outputNum).build()) .build(); @@ -193,8 +193,8 @@ public class TestVertxUIManual extends BaseDL4JTest { statsProvider.put(sessionId, ss); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(layerSize).build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().activation(Activation.TANH).nIn(4).nOut(layerSize).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(3).build()) .build(); diff --git a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java index fb21f9561..4cea47965 100644 --- a/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java +++ b/cavis-ui/cavis-ui-vertx/src/test/java/org/deeplearning4j/ui/TestVertxUIMultiSession.java @@ -89,8 +89,8 @@ public class TestVertxUIMultiSession extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .updater(new Adam(1e-2)) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(layerSize).build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().activation(Activation.TANH).nIn(4).nOut(layerSize).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(3).build()) .build(); @@ -154,8 +154,8 @@ public class TestVertxUIMultiSession extends BaseDL4JTest { statsStorageForSession.put(sessionId, ss); NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) - .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(layerSize).build()) - .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .layer(0, DenseLayer.builder().activation(Activation.TANH).nIn(4).nOut(layerSize).build()) + .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(3).build()) .build(); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java index 9e55c5b26..e3fdce02b 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/AlexNet.java @@ -87,33 +87,33 @@ public class AlexNet extends ZooModel { .l2(5 * 1e-4) .miniBatch(false) - .layer(0, new ConvolutionLayer.Builder(new int[]{11,11}, new int[]{4, 4}) + .layer(0, ConvolutionLayer.builder(new int[]{11,11}, new int[]{4, 4}) .name("cnn1") .cudnnAlgoMode(ConvolutionLayer.AlgoMode.PREFER_FASTEST) .convolutionMode(ConvolutionMode.Truncate) .nIn(inputShape[0]) .nOut(96) .build()) - .layer(1, new LocalResponseNormalization.Builder().build()) - .layer(2, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .layer(1, LocalResponseNormalization.builder().build()) + .layer(2, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(3,3) .stride(2,2) .padding(1,1) .name("maxpool1") .build()) - .layer(3, new ConvolutionLayer.Builder(new int[]{5,5}, new int[]{1,1}, new int[]{2,2}) + .layer(3, ConvolutionLayer.builder(new int[]{5,5}, new int[]{1,1}, new int[]{2,2}) .name("cnn2") .cudnnAlgoMode(ConvolutionLayer.AlgoMode.PREFER_FASTEST) .convolutionMode(ConvolutionMode.Truncate) .nOut(256) .biasInit(nonZeroBias) .build()) - .layer(4, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[]{3, 3}, new int[]{2, 2}) + .layer(4, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[]{3, 3}, new int[]{2, 2}) .convolutionMode(ConvolutionMode.Truncate) .name("maxpool2") .build()) - .layer(5, new LocalResponseNormalization.Builder().build()) - .layer(6, new ConvolutionLayer.Builder() + .layer(5, LocalResponseNormalization.builder().build()) + .layer(6, ConvolutionLayer.builder() .kernelSize(3,3) .stride(1,1) .convolutionMode(ConvolutionMode.Same) @@ -121,37 +121,37 @@ public class AlexNet extends ZooModel { .cudnnAlgoMode(ConvolutionLayer.AlgoMode.PREFER_FASTEST) .nOut(384) .build()) - .layer(7, new ConvolutionLayer.Builder(new int[]{3,3}, new int[]{1,1}) + .layer(7, ConvolutionLayer.builder(new int[]{3,3}, new int[]{1,1}) .name("cnn4") .cudnnAlgoMode(ConvolutionLayer.AlgoMode.PREFER_FASTEST) .nOut(384) .biasInit(nonZeroBias) .build()) - .layer(8, new ConvolutionLayer.Builder(new int[]{3,3}, new int[]{1,1}) + .layer(8, ConvolutionLayer.builder(new int[]{3,3}, new int[]{1,1}) .name("cnn5") .cudnnAlgoMode(ConvolutionLayer.AlgoMode.PREFER_FASTEST) .nOut(256) .biasInit(nonZeroBias) .build()) - .layer(9, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[]{3,3}, new int[]{2,2}) + .layer(9, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[]{3,3}, new int[]{2,2}) .name("maxpool3") .convolutionMode(ConvolutionMode.Truncate) .build()) - .layer(10, new DenseLayer.Builder() + .layer(10, DenseLayer.builder() .name("ffn1") .nIn(256*6*6) .nOut(4096) .weightInit(new NormalDistribution(0, 0.005)) .biasInit(nonZeroBias) .build()) - .layer(11, new DenseLayer.Builder() + .layer(11, DenseLayer.builder() .name("ffn2") .nOut(4096) .weightInit(new NormalDistribution(0, 0.005)) .biasInit(nonZeroBias) .dropOut(0.5) .build()) - .layer(12, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .layer(12, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .name("output") .nOut(numClasses) .activation(Activation.SOFTMAX) diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java index c94161a4b..2182b5940 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Darknet19.java @@ -126,7 +126,7 @@ public class Darknet19 extends ZooModel { int layerNumber = 19; graphBuilder .addLayer("convolution2d_" + layerNumber, - new ConvolutionLayer.Builder(1,1) + ConvolutionLayer.builder(1,1) .nIn(1024) .nOut(numClasses) .weightInit(WeightInit.XAVIER) @@ -136,12 +136,12 @@ public class Darknet19 extends ZooModel { .activation(Activation.IDENTITY) .build(), "activation_" + (layerNumber - 1)) - .addLayer("globalpooling", new GlobalPoolingLayer.Builder(PoolingType.AVG) + .addLayer("globalpooling", GlobalPoolingLayer.builder(PoolingType.AVG) .build(), "convolution2d_" + layerNumber) - .addLayer("softmax", new ActivationLayer.Builder() + .addLayer("softmax", ActivationLayer.builder() .activation(Activation.SOFTMAX) .build(), "globalpooling") - .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .addLayer("loss", LossLayer.builder().lossFunction(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD.getILossFunction()) .build(), "softmax") .setOutputs("loss"); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java index 07ce6b985..d71331666 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/FaceNetNN4Small2.java @@ -91,52 +91,52 @@ public class FaceNetNN4Small2 extends ZooModel { graph.addInputs("input1") .addLayer("stem-cnn1", - new ConvolutionLayer.Builder(new int[] {7, 7}, new int[] {2, 2}, + ConvolutionLayer.builder(new int[] {7, 7}, new int[] {2, 2}, new int[] {3, 3}).nIn(inputShape[0]).nOut(64) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "input1") - .addLayer("stem-batch1", new BatchNormalization.Builder(false).nIn(64).nOut(64).build(), + .addLayer("stem-batch1", BatchNormalization.builder(false).nIn(64).nOut(64).build(), "stem-cnn1") - .addLayer("stem-activation1", new ActivationLayer.Builder().activation(Activation.RELU).build(), + .addLayer("stem-activation1", ActivationLayer.builder().activation(Activation.RELU).build(), "stem-batch1") // pool -> norm .addLayer("stem-pool1", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, new int[] {2, 2}, new int[] {1, 1}).build(), "stem-activation1") - .addLayer("stem-lrn1", new LocalResponseNormalization.Builder(1, 5, 1e-4, 0.75).build(), + .addLayer("stem-lrn1", LocalResponseNormalization.builder(1, 5, 1e-4, 0.75).build(), "stem-pool1") // Inception 2 .addLayer("inception-2-cnn1", - new ConvolutionLayer.Builder(new int[] {1, 1}).nIn(64).nOut(64) + ConvolutionLayer.builder(new int[] {1, 1}).nIn(64).nOut(64) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE).build(), "stem-lrn1") - .addLayer("inception-2-batch1", new BatchNormalization.Builder(false).nIn(64).nOut(64).build(), + .addLayer("inception-2-batch1", BatchNormalization.builder(false).nIn(64).nOut(64).build(), "inception-2-cnn1") .addLayer("inception-2-activation1", - new ActivationLayer.Builder().activation(Activation.RELU).build(), + ActivationLayer.builder().activation(Activation.RELU).build(), "inception-2-batch1") .addLayer("inception-2-cnn2", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {1, 1}, + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {1, 1}, new int[] {1, 1}).nIn(64).nOut(192) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "inception-2-activation1") .addLayer("inception-2-batch2", - new BatchNormalization.Builder(false).nIn(192).nOut(192).build(), + BatchNormalization.builder(false).nIn(192).nOut(192).build(), "inception-2-cnn2") .addLayer("inception-2-activation2", - new ActivationLayer.Builder().activation(Activation.RELU).build(), + ActivationLayer.builder().activation(Activation.RELU).build(), "inception-2-batch2") // norm -> pool - .addLayer("inception-2-lrn1", new LocalResponseNormalization.Builder(1, 5, 1e-4, 0.75).build(), + .addLayer("inception-2-lrn1", LocalResponseNormalization.builder(1, 5, 1e-4, 0.75).build(), "inception-2-activation2") .addLayer("inception-2-pool1", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, new int[] {2, 2}, new int[] {1, 1}).build(), "inception-2-lrn1"); @@ -154,39 +154,39 @@ public class FaceNetNN4Small2 extends ZooModel { // SubsamplingLayer.PoolingType.PNORM, 2, true, "inception-3b"); graph.addLayer("3c-1x1", - new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(320).nOut(128) + ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(320).nOut(128) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE).build(), "inception-3b") .addLayer("3c-1x1-norm", FaceNetHelper.batchNorm(128, 128), "3c-1x1") - .addLayer("3c-transfer1", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("3c-transfer1", ActivationLayer.builder().activation(transferFunction).build(), "3c-1x1-norm") .addLayer("3c-3x3", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}).nIn(128) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}).nIn(128) .nOut(256).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "3c-transfer1") .addLayer("3c-3x3-norm", FaceNetHelper.batchNorm(256, 256), "3c-3x3") - .addLayer("3c-transfer2", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("3c-transfer2", ActivationLayer.builder().activation(transferFunction).build(), "3c-3x3-norm") .addLayer("3c-2-1x1", - new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(320) + ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(320) .nOut(32).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "inception-3b") .addLayer("3c-2-1x1-norm", FaceNetHelper.batchNorm(32, 32), "3c-2-1x1") - .addLayer("3c-2-transfer3", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("3c-2-transfer3", ActivationLayer.builder().activation(transferFunction).build(), "3c-2-1x1-norm") .addLayer("3c-2-5x5", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}).nIn(32) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}).nIn(32) .nOut(64).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "3c-2-transfer3") .addLayer("3c-2-5x5-norm", FaceNetHelper.batchNorm(64, 64), "3c-2-5x5") - .addLayer("3c-2-transfer4", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("3c-2-transfer4", ActivationLayer.builder().activation(transferFunction).build(), "3c-2-5x5-norm") - .addLayer("3c-pool", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, + .addLayer("3c-pool", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, new int[] {2, 2}, new int[] {1, 1}).build(), "inception-3b") .addVertex("inception-3c", new MergeVertex(), "3c-transfer2", "3c-2-transfer4", "3c-pool"); @@ -202,39 +202,39 @@ public class FaceNetNN4Small2 extends ZooModel { // SubsamplingLayer.PoolingType.MAX, 2, 1, true, "inception-4a"); graph.addLayer("4e-1x1", - new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(640).nOut(160) + ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(640).nOut(160) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE).build(), "inception-4a") .addLayer("4e-1x1-norm", FaceNetHelper.batchNorm(160, 160), "4e-1x1") - .addLayer("4e-transfer1", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("4e-transfer1", ActivationLayer.builder().activation(transferFunction).build(), "4e-1x1-norm") .addLayer("4e-3x3", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}).nIn(160) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}).nIn(160) .nOut(256).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "4e-transfer1") .addLayer("4e-3x3-norm", FaceNetHelper.batchNorm(256, 256), "4e-3x3") - .addLayer("4e-transfer2", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("4e-transfer2", ActivationLayer.builder().activation(transferFunction).build(), "4e-3x3-norm") .addLayer("4e-2-1x1", - new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(640) + ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(640) .nOut(64).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "inception-4a") .addLayer("4e-2-1x1-norm", FaceNetHelper.batchNorm(64, 64), "4e-2-1x1") - .addLayer("4e-2-transfer3", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("4e-2-transfer3", ActivationLayer.builder().activation(transferFunction).build(), "4e-2-1x1-norm") .addLayer("4e-2-5x5", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}).nIn(64) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}).nIn(64) .nOut(128).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "4e-2-transfer3") .addLayer("4e-2-5x5-norm", FaceNetHelper.batchNorm(128, 128), "4e-2-5x5") - .addLayer("4e-2-transfer4", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("4e-2-transfer4", ActivationLayer.builder().activation(transferFunction).build(), "4e-2-5x5-norm") - .addLayer("4e-pool", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, + .addLayer("4e-pool", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, new int[] {2, 2}, new int[] {1, 1}).build(), "inception-4a") .addVertex("inception-4e", new MergeVertex(), "4e-transfer2", "4e-2-transfer4", "4e-pool"); @@ -245,40 +245,40 @@ public class FaceNetNN4Small2 extends ZooModel { // SubsamplingLayer.PoolingType.PNORM, 2, true, "inception-4e"); graph.addLayer("5a-1x1", - new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(1024).nOut(256) + ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(1024).nOut(256) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE).build(), "inception-4e").addLayer("5a-1x1-norm", FaceNetHelper.batchNorm(256, 256), "5a-1x1") - .addLayer("5a-transfer1", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("5a-transfer1", ActivationLayer.builder().activation(transferFunction).build(), "5a-1x1-norm") .addLayer("5a-2-1x1", - new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(1024) + ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(1024) .nOut(96).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "inception-4e") .addLayer("5a-2-1x1-norm", FaceNetHelper.batchNorm(96, 96), "5a-2-1x1") - .addLayer("5a-2-transfer2", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("5a-2-transfer2", ActivationLayer.builder().activation(transferFunction).build(), "5a-2-1x1-norm") .addLayer("5a-2-3x3", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {1, 1}).nIn(96) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {1, 1}).nIn(96) .nOut(384).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "5a-2-transfer2") .addLayer("5a-2-3x3-norm", FaceNetHelper.batchNorm(384, 384), "5a-2-3x3") - .addLayer("5a-transfer3", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("5a-transfer3", ActivationLayer.builder().activation(transferFunction).build(), "5a-2-3x3-norm") .addLayer("5a-3-pool", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.PNORM, + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.PNORM, new int[] {3, 3}, new int[] {1, 1}).pnorm(2).build(), "inception-4e") .addLayer("5a-3-1x1reduce", - new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(1024) + ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(1024) .nOut(96).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "5a-3-pool") .addLayer("5a-3-1x1reduce-norm", FaceNetHelper.batchNorm(96, 96), "5a-3-1x1reduce") - .addLayer("5a-3-transfer4", new ActivationLayer.Builder().activation(Activation.RELU).build(), + .addLayer("5a-3-transfer4", ActivationLayer.builder().activation(Activation.RELU).build(), "5a-3-1x1reduce-norm") .addVertex("inception-5a", new MergeVertex(), "5a-transfer1", "5a-transfer3", "5a-3-transfer4"); @@ -290,53 +290,53 @@ public class FaceNetNN4Small2 extends ZooModel { // SubsamplingLayer.PoolingType.MAX, 1, 1, true, "inception-5a"); graph.addLayer("5b-1x1", - new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(736).nOut(256) + ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(736).nOut(256) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE).build(), "inception-5a").addLayer("5b-1x1-norm", FaceNetHelper.batchNorm(256, 256), "5b-1x1") - .addLayer("5b-transfer1", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("5b-transfer1", ActivationLayer.builder().activation(transferFunction).build(), "5b-1x1-norm") .addLayer("5b-2-1x1", - new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(736) + ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(736) .nOut(96).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "inception-5a") .addLayer("5b-2-1x1-norm", FaceNetHelper.batchNorm(96, 96), "5b-2-1x1") - .addLayer("5b-2-transfer2", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("5b-2-transfer2", ActivationLayer.builder().activation(transferFunction).build(), "5b-2-1x1-norm") .addLayer("5b-2-3x3", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {1, 1}).nIn(96) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {1, 1}).nIn(96) .nOut(384).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "5b-2-transfer2") .addLayer("5b-2-3x3-norm", FaceNetHelper.batchNorm(384, 384), "5b-2-3x3") - .addLayer("5b-2-transfer3", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("5b-2-transfer3", ActivationLayer.builder().activation(transferFunction).build(), "5b-2-3x3-norm") .addLayer("5b-3-pool", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, new int[] {1, 1}, new int[] {1, 1}).build(), "inception-5a") .addLayer("5b-3-1x1reduce", - new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}).nIn(736) + ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}).nIn(736) .nOut(96).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), "5b-3-pool") .addLayer("5b-3-1x1reduce-norm", FaceNetHelper.batchNorm(96, 96), "5b-3-1x1reduce") - .addLayer("5b-3-transfer4", new ActivationLayer.Builder().activation(transferFunction).build(), + .addLayer("5b-3-transfer4", ActivationLayer.builder().activation(transferFunction).build(), "5b-3-1x1reduce-norm") .addVertex("inception-5b", new MergeVertex(), "5b-transfer1", "5b-2-transfer3", "5b-3-transfer4"); graph.addLayer("avgpool", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG, new int[] {3, 3}, + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.AVG, new int[] {3, 3}, new int[] {3, 3}).build(), "inception-5b") - .addLayer("bottleneck",new DenseLayer.Builder().nOut(embeddingSize) + .addLayer("bottleneck",DenseLayer.builder().nOut(embeddingSize) .activation(Activation.IDENTITY).build(),"avgpool") .addVertex("embeddings", new L2NormalizeVertex(new int[] {}, 1e-6), "bottleneck") - .addLayer("lossLayer", new CenterLossOutputLayer.Builder() + .addLayer("lossLayer",CenterLossOutputLayer.builder() .lossFunction(LossFunctions.LossFunction.SQUARED_LOSS) .activation(Activation.SOFTMAX).nOut(numClasses).lambda(1e-4).alpha(0.9) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build(), diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java index 2d5d69dda..5eaf223bf 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/InceptionResNetV1.java @@ -77,13 +77,13 @@ public class InceptionResNetV1 extends ZooModel { graph.addInputs("input1").setInputTypes(InputType.convolutional(inputShape[2], inputShape[1], inputShape[0])) // Logits - .addLayer("bottleneck", new DenseLayer.Builder().nIn(5376).nOut(embeddingSize).build(), + .addLayer("bottleneck", DenseLayer.builder().nIn(5376).nOut(embeddingSize).build(), "avgpool") // Embeddings .addVertex("embeddings", new L2NormalizeVertex(new int[] {1}, 1e-10), "bottleneck") // Output .addLayer("outputLayer", - new CenterLossOutputLayer.Builder() + CenterLossOutputLayer.builder() .lossFunction(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX).alpha(0.9).lambda(1e-4) .nIn(embeddingSize).nOut(numClasses).build(), @@ -116,55 +116,55 @@ public class InceptionResNetV1 extends ZooModel { graph // stem .addLayer("stem-cnn1", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}) .nIn(inputShape[0]).nOut(32) .cudnnAlgoMode(cudnnAlgoMode).build(), input) .addLayer("stem-batch1", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(32).nOut(32) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(32).nOut(32) .build(), "stem-cnn1") .addLayer("stem-cnn2", - new ConvolutionLayer.Builder(new int[] {3, 3}).nIn(32).nOut(32) + ConvolutionLayer.builder(new int[] {3, 3}).nIn(32).nOut(32) .cudnnAlgoMode(cudnnAlgoMode).build(), "stem-batch1") .addLayer("stem-batch2", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(32).nOut(32) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(32).nOut(32) .build(), "stem-cnn2") .addLayer("stem-cnn3", - new ConvolutionLayer.Builder(new int[] {3, 3}) + ConvolutionLayer.builder(new int[] {3, 3}) .convolutionMode(ConvolutionMode.Same).nIn(32).nOut(64) .cudnnAlgoMode(cudnnAlgoMode).build(), "stem-batch2") - .addLayer("stem-batch3", new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(64) + .addLayer("stem-batch3", BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(64) .nOut(64).build(), "stem-cnn3") - .addLayer("stem-pool4", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, + .addLayer("stem-pool4", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, new int[] {2, 2}).build(), "stem-batch3") .addLayer("stem-cnn5", - new ConvolutionLayer.Builder(new int[] {1, 1}).nIn(64).nOut(80) + ConvolutionLayer.builder(new int[] {1, 1}).nIn(64).nOut(80) .cudnnAlgoMode(cudnnAlgoMode).build(), "stem-pool4") .addLayer("stem-batch5", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(80).nOut(80) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(80).nOut(80) .build(), "stem-cnn5") .addLayer("stem-cnn6", - new ConvolutionLayer.Builder(new int[] {3, 3}).nIn(80).nOut(128) + ConvolutionLayer.builder(new int[] {3, 3}).nIn(80).nOut(128) .cudnnAlgoMode(cudnnAlgoMode).build(), "stem-batch5") .addLayer("stem-batch6", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(128).nOut(128) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(128).nOut(128) .build(), "stem-cnn6") .addLayer("stem-cnn7", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}).nIn(128) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}).nIn(128) .nOut(192).cudnnAlgoMode(cudnnAlgoMode) .build(), "stem-batch6") - .addLayer("stem-batch7", new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(192) + .addLayer("stem-batch7", BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(192) .nOut(192).build(), "stem-cnn7"); @@ -176,45 +176,45 @@ public class InceptionResNetV1 extends ZooModel { graph // 3x3 .addLayer("reduceA-cnn1", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}).nIn(192) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}).nIn(192) .nOut(192).cudnnAlgoMode(cudnnAlgoMode) .build(), "resnetA") .addLayer("reduceA-batch1", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(192).nOut(192) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(192).nOut(192) .build(), "reduceA-cnn1") // 1x1 -> 3x3 -> 3x3 .addLayer("reduceA-cnn2", - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(192).nOut(128) .cudnnAlgoMode(cudnnAlgoMode).build(), "resnetA") .addLayer("reduceA-batch2", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(128).nOut(128) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(128).nOut(128) .build(), "reduceA-cnn2") .addLayer("reduceA-cnn3", - new ConvolutionLayer.Builder(new int[] {3, 3}) + ConvolutionLayer.builder(new int[] {3, 3}) .convolutionMode(ConvolutionMode.Same).nIn(128).nOut(128) .cudnnAlgoMode(cudnnAlgoMode).build(), "reduceA-batch2") .addLayer("reduceA-batch3", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(128).nOut(128) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(128).nOut(128) .build(), "reduceA-cnn3") .addLayer("reduceA-cnn4", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}).nIn(128) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}).nIn(128) .nOut(192).cudnnAlgoMode(cudnnAlgoMode) .build(), "reduceA-batch3") .addLayer("reduceA-batch4", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(192).nOut(192) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(192).nOut(192) .build(), "reduceA-cnn4") // maxpool .addLayer("reduceA-pool5", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, new int[] {2, 2}).build(), "resnetA") // --> @@ -229,73 +229,73 @@ public class InceptionResNetV1 extends ZooModel { graph // 3x3 pool .addLayer("reduceB-pool1", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, new int[] {2, 2}).build(), "resnetB") // 1x1 -> 3x3 .addLayer("reduceB-cnn2", - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(576).nOut(256) .cudnnAlgoMode(cudnnAlgoMode).build(), "resnetB") .addLayer("reduceB-batch1", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) .build(), "reduceB-cnn2") .addLayer("reduceB-cnn3", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}).nIn(256) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}).nIn(256) .nOut(256).cudnnAlgoMode(cudnnAlgoMode) .build(), "reduceB-batch1") .addLayer("reduceB-batch2", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) .build(), "reduceB-cnn3") // 1x1 -> 3x3 .addLayer("reduceB-cnn4", - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(576).nOut(256) .cudnnAlgoMode(cudnnAlgoMode).build(), "resnetB") .addLayer("reduceB-batch3", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) .build(), "reduceB-cnn4") .addLayer("reduceB-cnn5", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}).nIn(256) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}).nIn(256) .nOut(256).cudnnAlgoMode(cudnnAlgoMode) .build(), "reduceB-batch3") .addLayer("reduceB-batch4", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) .build(), "reduceB-cnn5") // 1x1 -> 3x3 -> 3x3 .addLayer("reduceB-cnn6", - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(576).nOut(256) .cudnnAlgoMode(cudnnAlgoMode).build(), "resnetB") .addLayer("reduceB-batch5", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) .build(), "reduceB-cnn6") .addLayer("reduceB-cnn7", - new ConvolutionLayer.Builder(new int[] {3, 3}) + ConvolutionLayer.builder(new int[] {3, 3}) .convolutionMode(ConvolutionMode.Same).nIn(256).nOut(256) .cudnnAlgoMode(cudnnAlgoMode).build(), "reduceB-batch5") .addLayer("reduceB-batch6", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) .build(), "reduceB-cnn7") .addLayer("reduceB-cnn8", - new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {2, 2}).nIn(256) + ConvolutionLayer.builder(new int[] {3, 3}, new int[] {2, 2}).nIn(256) .nOut(256).cudnnAlgoMode(cudnnAlgoMode) .build(), "reduceB-batch6") .addLayer("reduceB-batch7", - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(256).nOut(256) .build(), "reduceB-cnn8") // --> @@ -308,7 +308,7 @@ public class InceptionResNetV1 extends ZooModel { // Average pooling graph.addLayer("avgpool", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG, new int[] {1, 1}).build(), + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.AVG, new int[] {1, 1}).build(), "resnetC"); return graph; diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/LeNet.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/LeNet.java index 6dc75af5f..abf20cf40 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/LeNet.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/LeNet.java @@ -90,7 +90,7 @@ public class LeNet extends ZooModel { .convolutionMode(ConvolutionMode.Same) // block 1 - .layer(new ConvolutionLayer.Builder() + .layer(ConvolutionLayer.builder() .name("cnn1") .kernelSize(5, 5) .stride(1, 1) @@ -98,31 +98,31 @@ public class LeNet extends ZooModel { .nOut(20) .activation(Activation.RELU) .build()) - .layer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .layer(SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .name("maxpool1") .kernelSize(2, 2) .stride(2, 2) .build()) // block 2 - .layer(new ConvolutionLayer.Builder() + .layer(ConvolutionLayer.builder() .name("cnn2") .kernelSize(5, 5) .stride(1, 1) .nOut(50) .activation(Activation.RELU).build()) - .layer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) + .layer(SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX) .name("maxpool2") .kernelSize(2, 2) .stride(2, 2) .build()) // fully connected - .layer(new DenseLayer.Builder() + .layer(DenseLayer.builder() .name("ffn1") .activation(Activation.RELU) .nOut(500) .build()) // output - .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .name("output") .nOut(numClasses) .activation(Activation.SOFTMAX) // radial basis function required diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/NASNet.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/NASNet.java index 35f617773..1c4a65cd8 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/NASNet.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/NASNet.java @@ -124,14 +124,14 @@ public class NASNet extends ZooModel { .graphBuilder(); if(!skipReduction) { - graph.addLayer("stem_conv1", new ConvolutionLayer.Builder(3, 3).stride(2, 2).nOut(stemFilters).hasBias(false) + graph.addLayer("stem_conv1", ConvolutionLayer.builder(3, 3).stride(2, 2).nOut(stemFilters).hasBias(false) .cudnnAlgoMode(cudnnAlgoMode).build(), "input"); } else { - graph.addLayer("stem_conv1", new ConvolutionLayer.Builder(3, 3).stride(1, 1).nOut(stemFilters).hasBias(false) + graph.addLayer("stem_conv1", ConvolutionLayer.builder(3, 3).stride(1, 1).nOut(stemFilters).hasBias(false) .cudnnAlgoMode(cudnnAlgoMode).build(), "input"); } - graph.addLayer("stem_bn1", new BatchNormalization.Builder().eps(1e-3).gamma(0.9997).build(), "stem_conv1"); + graph.addLayer("stem_bn1",BatchNormalization.builder().eps(1e-3).gamma(0.9997).build(), "stem_conv1"); String inputX = "stem_bn1"; String inputP = null; @@ -175,9 +175,9 @@ public class NASNet extends ZooModel { // output graph - .addLayer("act", new ActivationLayer(Activation.RELU), inputX) - .addLayer("avg_pool", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), "act") - .addLayer("output", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("act", ActivationLayer.builder(Activation.RELU.getActivationFunction()).build(), inputX) + .addLayer("avg_pool", GlobalPoolingLayer.builder(PoolingType.AVG).build(), "act") + .addLayer("output", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).build(), "avg_pool") .setOutputs("output") diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java index 70abb5722..a0a0ffef6 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/ResNet50.java @@ -96,31 +96,31 @@ public class ResNet50 extends ZooModel { String shortcutName = "short" + stage + block + "_branch"; graph.addLayer(convName + "2a", - new ConvolutionLayer.Builder(new int[] {1, 1}).nOut(filters[0]).cudnnAlgoMode(cudnnAlgoMode) + ConvolutionLayer.builder(new int[] {1, 1}).nOut(filters[0]).cudnnAlgoMode(cudnnAlgoMode) .build(), input) - .addLayer(batchName + "2a", new BatchNormalization(), convName + "2a") + .addLayer(batchName + "2a", BatchNormalization.builder().build(), convName + "2a") .addLayer(activationName + "2a", - new ActivationLayer.Builder().activation(Activation.RELU).build(), + ActivationLayer.builder().activation(Activation.RELU).build(), batchName + "2a") - .addLayer(convName + "2b", new ConvolutionLayer.Builder(kernelSize).nOut(filters[1]) + .addLayer(convName + "2b", ConvolutionLayer.builder(kernelSize).nOut(filters[1]) .cudnnAlgoMode(cudnnAlgoMode).convolutionMode(ConvolutionMode.Same).build(), activationName + "2a") - .addLayer(batchName + "2b", new BatchNormalization(), convName + "2b") + .addLayer(batchName + "2b", BatchNormalization.builder().build(), convName + "2b") .addLayer(activationName + "2b", - new ActivationLayer.Builder().activation(Activation.RELU).build(), + ActivationLayer.builder().activation(Activation.RELU).build(), batchName + "2b") .addLayer(convName + "2c", - new ConvolutionLayer.Builder(new int[] {1, 1}).nOut(filters[2]) + ConvolutionLayer.builder(new int[] {1, 1}).nOut(filters[2]) .cudnnAlgoMode(cudnnAlgoMode).build(), activationName + "2b") - .addLayer(batchName + "2c", new BatchNormalization(), convName + "2c") + .addLayer(batchName + "2c", BatchNormalization.builder().build(), convName + "2c") .addVertex(shortcutName, new ElementWiseVertex(ElementWiseVertex.Op.Add), batchName + "2c", input) - .addLayer(convName, new ActivationLayer.Builder().activation(Activation.RELU).build(), + .addLayer(convName, ActivationLayer.builder().activation(Activation.RELU).build(), shortcutName); } @@ -136,37 +136,37 @@ public class ResNet50 extends ZooModel { String activationName = "act" + stage + block + "_branch"; String shortcutName = "short" + stage + block + "_branch"; - graph.addLayer(convName + "2a", new ConvolutionLayer.Builder(new int[] {1, 1}, stride).nOut(filters[0]).build(), + graph.addLayer(convName + "2a", ConvolutionLayer.builder(new int[] {1, 1}, stride).nOut(filters[0]).build(), input) - .addLayer(batchName + "2a", new BatchNormalization(), convName + "2a") + .addLayer(batchName + "2a", BatchNormalization.builder().build(), convName + "2a") .addLayer(activationName + "2a", - new ActivationLayer.Builder().activation(Activation.RELU).build(), + ActivationLayer.builder().activation(Activation.RELU).build(), batchName + "2a") .addLayer(convName + "2b", - new ConvolutionLayer.Builder(kernelSize).nOut(filters[1]) + ConvolutionLayer.builder(kernelSize).nOut(filters[1]) .convolutionMode(ConvolutionMode.Same).build(), activationName + "2a") - .addLayer(batchName + "2b", new BatchNormalization(), convName + "2b") + .addLayer(batchName + "2b", BatchNormalization.builder().build(), convName + "2b") .addLayer(activationName + "2b", - new ActivationLayer.Builder().activation(Activation.RELU).build(), + ActivationLayer.builder().activation(Activation.RELU).build(), batchName + "2b") .addLayer(convName + "2c", - new ConvolutionLayer.Builder(new int[] {1, 1}).nOut(filters[2]).build(), + ConvolutionLayer.builder(new int[] {1, 1}).nOut(filters[2]).build(), activationName + "2b") - .addLayer(batchName + "2c", new BatchNormalization(), convName + "2c") + .addLayer(batchName + "2c", BatchNormalization.builder().build(), convName + "2c") // shortcut .addLayer(convName + "1", - new ConvolutionLayer.Builder(new int[] {1, 1}, stride).nOut(filters[2]).build(), + ConvolutionLayer.builder(new int[] {1, 1}, stride).nOut(filters[2]).build(), input) - .addLayer(batchName + "1", new BatchNormalization(), convName + "1") + .addLayer(batchName + "1", BatchNormalization.builder().build(), convName + "1") .addVertex(shortcutName, new ElementWiseVertex(ElementWiseVertex.Op.Add), batchName + "2c", batchName + "1") - .addLayer(convName, new ActivationLayer.Builder().activation(Activation.RELU).build(), + .addLayer(convName, ActivationLayer.builder().activation(Activation.RELU).build(), shortcutName); } @@ -190,15 +190,15 @@ public class ResNet50 extends ZooModel { graph.addInputs("input").setInputTypes(InputType.convolutional(inputShape[2], inputShape[1], inputShape[0])) // stem - .addLayer("stem-zero", new ZeroPaddingLayer.Builder(3, 3).build(), "input") + .addLayer("stem-zero", ZeroPaddingLayer.builder(3, 3).build(), "input") .addLayer("stem-cnn1", - new ConvolutionLayer.Builder(new int[] {7, 7}, new int[] {2, 2}).nOut(64) + ConvolutionLayer.builder(new int[] {7, 7}, new int[] {2, 2}).nOut(64) .build(), "stem-zero") - .addLayer("stem-batch1", new BatchNormalization(), "stem-cnn1") - .addLayer("stem-act1", new ActivationLayer.Builder().activation(Activation.RELU).build(), + .addLayer("stem-batch1", BatchNormalization.builder().build(), "stem-cnn1") + .addLayer("stem-act1", ActivationLayer.builder().activation(Activation.RELU).build(), "stem-batch1") - .addLayer("stem-maxpool1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, + .addLayer("stem-maxpool1", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}, new int[] {2, 2}).build(), "stem-act1"); convBlock(graph, new int[] {3, 3}, new int[] {64, 64, 256}, "2", "a", new int[] {2, 2}, "stem-maxpool1"); @@ -222,11 +222,11 @@ public class ResNet50 extends ZooModel { identityBlock(graph, new int[] {3, 3}, new int[] {512, 512, 2048}, "5", "c", "res5b_branch"); graph.addLayer("avgpool", - new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}).build(), + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {3, 3}).build(), "res5c_branch") // TODO add flatten/reshape layer here .addLayer("output", - new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(numClasses).activation(Activation.SOFTMAX).build(), "avgpool") .setOutputs("output"); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java index f5b1c41ee..2bdb9765d 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SimpleCNN.java @@ -79,55 +79,55 @@ public class SimpleCNN extends ZooModel { .convolutionMode(ConvolutionMode.Same) // block 1 - .layer(0, new ConvolutionLayer.Builder(new int[] {7, 7}).name("image_array") + .layer(0, ConvolutionLayer.builder(new int[] {7, 7}).name("image_array") .nIn(inputShape[0]).nOut(16).build()) - .layer(1, new BatchNormalization.Builder().build()) - .layer(2, new ConvolutionLayer.Builder(new int[] {7, 7}).nIn(16).nOut(16) + .layer(1,BatchNormalization.builder().build()) + .layer(2, ConvolutionLayer.builder(new int[] {7, 7}).nIn(16).nOut(16) .build()) - .layer(3, new BatchNormalization.Builder().build()) - .layer(4, new ActivationLayer.Builder().activation(Activation.RELU).build()) - .layer(5, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG, + .layer(3,BatchNormalization.builder().build()) + .layer(4, ActivationLayer.builder().activation(Activation.RELU).build()) + .layer(5, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.AVG, new int[] {2, 2}).build()) - .layer(6, new DropoutLayer.Builder(0.5).build()) + .layer(6, DropoutLayer.builder(0.5).build()) // block 2 - .layer(7, new ConvolutionLayer.Builder(new int[] {5, 5}).nOut(32).build()) - .layer(8, new BatchNormalization.Builder().build()) - .layer(9, new ConvolutionLayer.Builder(new int[] {5, 5}).nOut(32).build()) - .layer(10, new BatchNormalization.Builder().build()) - .layer(11, new ActivationLayer.Builder().activation(Activation.RELU).build()) - .layer(12, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG, + .layer(7, ConvolutionLayer.builder(new int[] {5, 5}).nOut(32).build()) + .layer(8,BatchNormalization.builder().build()) + .layer(9, ConvolutionLayer.builder(new int[] {5, 5}).nOut(32).build()) + .layer(10,BatchNormalization.builder().build()) + .layer(11, ActivationLayer.builder().activation(Activation.RELU).build()) + .layer(12, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.AVG, new int[] {2, 2}).build()) - .layer(13, new DropoutLayer.Builder(0.5).build()) + .layer(13, DropoutLayer.builder(0.5).build()) // block 3 - .layer(14, new ConvolutionLayer.Builder(new int[] {3, 3}).nOut(64).build()) - .layer(15, new BatchNormalization.Builder().build()) - .layer(16, new ConvolutionLayer.Builder(new int[] {3, 3}).nOut(64).build()) - .layer(17, new BatchNormalization.Builder().build()) - .layer(18, new ActivationLayer.Builder().activation(Activation.RELU).build()) - .layer(19, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG, + .layer(14, ConvolutionLayer.builder(new int[] {3, 3}).nOut(64).build()) + .layer(15,BatchNormalization.builder().build()) + .layer(16, ConvolutionLayer.builder(new int[] {3, 3}).nOut(64).build()) + .layer(17,BatchNormalization.builder().build()) + .layer(18, ActivationLayer.builder().activation(Activation.RELU).build()) + .layer(19, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.AVG, new int[] {2, 2}).build()) - .layer(20, new DropoutLayer.Builder(0.5).build()) + .layer(20, DropoutLayer.builder(0.5).build()) // block 4 - .layer(21, new ConvolutionLayer.Builder(new int[] {3, 3}).nOut(128).build()) - .layer(22, new BatchNormalization.Builder().build()) - .layer(23, new ConvolutionLayer.Builder(new int[] {3, 3}).nOut(128).build()) - .layer(24, new BatchNormalization.Builder().build()) - .layer(25, new ActivationLayer.Builder().activation(Activation.RELU).build()) - .layer(26, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG, + .layer(21, ConvolutionLayer.builder(new int[] {3, 3}).nOut(128).build()) + .layer(22,BatchNormalization.builder().build()) + .layer(23, ConvolutionLayer.builder(new int[] {3, 3}).nOut(128).build()) + .layer(24,BatchNormalization.builder().build()) + .layer(25, ActivationLayer.builder().activation(Activation.RELU).build()) + .layer(26, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.AVG, new int[] {2, 2}).build()) - .layer(27, new DropoutLayer.Builder(0.5).build()) + .layer(27, DropoutLayer.builder(0.5).build()) // block 5 - .layer(28, new ConvolutionLayer.Builder(new int[] {3, 3}).nOut(256).build()) - .layer(29, new BatchNormalization.Builder().build()) - .layer(30, new ConvolutionLayer.Builder(new int[] {3, 3}).nOut(numClasses) + .layer(28, ConvolutionLayer.builder(new int[] {3, 3}).nOut(256).build()) + .layer(29,BatchNormalization.builder().build()) + .layer(30, ConvolutionLayer.builder(new int[] {3, 3}).nOut(numClasses) .build()) - .layer(31, new GlobalPoolingLayer.Builder(PoolingType.AVG).build()) - .layer(32, new ActivationLayer.Builder().activation(Activation.SOFTMAX).build()) + .layer(31, GlobalPoolingLayer.builder(PoolingType.AVG).build()) + .layer(32, ActivationLayer.builder().activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(inputShape[2], inputShape[1], inputShape[0])) diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SqueezeNet.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SqueezeNet.java index e63e36cea..44ea244ad 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SqueezeNet.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/SqueezeNet.java @@ -116,19 +116,19 @@ public class SqueezeNet extends ZooModel { graph // stem - .addLayer("conv1", new ConvolutionLayer.Builder(3,3).stride(2,2).nOut(64) + .addLayer("conv1", ConvolutionLayer.builder(3,3).stride(2,2).nOut(64) .cudnnAlgoMode(cudnnAlgoMode).build(), "input") - .addLayer("conv1_act", new ActivationLayer(Activation.RELU), "conv1") - .addLayer("pool1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2).build(), "conv1_act"); + .addLayer("conv1_act", ActivationLayer.builder(Activation.RELU).build(), "conv1") + .addLayer("pool1", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2).build(), "conv1_act"); // fire modules fireModule(graph, 2, 16, 64, "pool1"); fireModule(graph, 3, 16, 64, "fire2"); - graph.addLayer("pool3", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2).build(), "fire3"); + graph.addLayer("pool3", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2).build(), "fire3"); fireModule(graph, 4, 32, 128, "pool3"); fireModule(graph, 5, 32, 128, "fire4"); - graph.addLayer("pool5", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2).build(), "fire5"); + graph.addLayer("pool5", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2).build(), "fire5"); fireModule(graph, 6, 48, 192, "pool5"); fireModule(graph, 7, 48, 192, "fire6"); @@ -137,14 +137,14 @@ public class SqueezeNet extends ZooModel { graph // output - .addLayer("drop9", new DropoutLayer.Builder(0.5).build(), "fire9") - .addLayer("conv10", new ConvolutionLayer.Builder(1,1).nOut(numClasses) + .addLayer("drop9", DropoutLayer.builder(0.5).build(), "fire9") + .addLayer("conv10", ConvolutionLayer.builder(1,1).nOut(numClasses) .cudnnAlgoMode(cudnnAlgoMode).build(), "drop9") - .addLayer("conv10_act", new ActivationLayer(Activation.RELU), "conv10") - .addLayer("avg_pool", new GlobalPoolingLayer(PoolingType.AVG), "conv10_act") + .addLayer("conv10_act", ActivationLayer.builder(Activation.RELU).build(), "conv10") + .addLayer("avg_pool", GlobalPoolingLayer.builder(PoolingType.AVG).build(), "conv10_act") - .addLayer("softmax", new ActivationLayer(Activation.SOFTMAX), "avg_pool") - .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.MCXENT).build(), "softmax") + .addLayer("softmax", ActivationLayer.builder(Activation.SOFTMAX).build(), "avg_pool") + .addLayer("loss", LossLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT.getILossFunction()).build(), "softmax") .setOutputs("loss") @@ -157,18 +157,18 @@ public class SqueezeNet extends ZooModel { String prefix = "fire"+fireId; graphBuilder - .addLayer(prefix+"_sq1x1", new ConvolutionLayer.Builder(1, 1).nOut(squeeze) + .addLayer(prefix+"_sq1x1", ConvolutionLayer.builder(1, 1).nOut(squeeze) .cudnnAlgoMode(cudnnAlgoMode).build(), input) - .addLayer(prefix+"_relu_sq1x1", new ActivationLayer(Activation.RELU), prefix+"_sq1x1") + .addLayer(prefix+"_relu_sq1x1", ActivationLayer.builder(Activation.RELU).build(), prefix+"_sq1x1") - .addLayer(prefix+"_exp1x1", new ConvolutionLayer.Builder(1, 1).nOut(expand) + .addLayer(prefix+"_exp1x1", ConvolutionLayer.builder(1, 1).nOut(expand) .cudnnAlgoMode(cudnnAlgoMode).build(), prefix+"_relu_sq1x1") - .addLayer(prefix+"_relu_exp1x1", new ActivationLayer(Activation.RELU), prefix+"_exp1x1") + .addLayer(prefix+"_relu_exp1x1", ActivationLayer.builder(Activation.RELU).build(), prefix+"_exp1x1") - .addLayer(prefix+"_exp3x3", new ConvolutionLayer.Builder(3,3).nOut(expand) + .addLayer(prefix+"_exp3x3", ConvolutionLayer.builder(3,3).nOut(expand) .convolutionMode(ConvolutionMode.Same) .cudnnAlgoMode(cudnnAlgoMode).build(), prefix+"_relu_sq1x1") - .addLayer(prefix+"_relu_exp3x3", new ActivationLayer(Activation.RELU), prefix+"_exp3x3") + .addLayer(prefix+"_relu_exp3x3", ActivationLayer.builder(Activation.RELU).build(), prefix+"_exp3x3") .addVertex(prefix, new MergeVertex(), prefix+"_relu_exp1x1", prefix+"_relu_exp3x3"); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java index 962b8f677..78618ab9d 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TextGenerationLSTM.java @@ -80,10 +80,10 @@ public class TextGenerationLSTM extends ZooModel { .inferenceWorkspaceMode(workspaceMode) .cudnnAlgoMode(cudnnAlgoMode) - .layer(0, new GravesLSTM.Builder().nIn(inputShape[1]).nOut(256).activation(Activation.TANH) + .layer(0, GravesLSTM.builder().nIn(inputShape[1]).nOut(256).activation(Activation.TANH) .build()) - .layer(1, new GravesLSTM.Builder().nOut(256).activation(Activation.TANH).build()) - .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .layer(1, GravesLSTM.builder().nOut(256).activation(Activation.TANH).build()) + .layer(2, RnnOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nOut(totalUniqueCharacters).build()) .backpropType(BackpropType.TruncatedBPTT).tbpttFwdLength(50).tbpttBackLength(50) diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java index e5281d33d..8c68538c3 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/TinyYOLO.java @@ -120,7 +120,7 @@ public class TinyYOLO extends ZooModel { int layerNumber = 9; graphBuilder .addLayer("convolution2d_" + layerNumber, - new ConvolutionLayer.Builder(1,1) + ConvolutionLayer.builder(1,1) .nIn(1024) .nOut(nBoxes * (5 + numClasses)) .weightInit(WeightInit.XAVIER) @@ -131,8 +131,8 @@ public class TinyYOLO extends ZooModel { .build(), "activation_" + (layerNumber - 1)) .addLayer("outputs", - new Yolo2OutputLayer.Builder() - .boundingBoxPriors(priors) + Yolo2OutputLayer.builder() + .boundingBoxes(priors) .build(), "convolution2d_" + layerNumber) .setOutputs("outputs"); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/UNet.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/UNet.java index f9400ba8e..7bfa1edf0 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/UNet.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/UNet.java @@ -104,110 +104,110 @@ public class UNet extends ZooModel { graph - .addLayer("conv1-1", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(64) + .addLayer("conv1-1", ConvolutionLayer.builder(3,3).stride(1,1).nOut(64) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "input") - .addLayer("conv1-2", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(64) + .addLayer("conv1-2", ConvolutionLayer.builder(3,3).stride(1,1).nOut(64) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "conv1-1") - .addLayer("pool1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2,2) + .addLayer("pool1", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2,2) .build(), "conv1-2") - .addLayer("conv2-1", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(128) + .addLayer("conv2-1", ConvolutionLayer.builder(3,3).stride(1,1).nOut(128) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "pool1") - .addLayer("conv2-2", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(128) + .addLayer("conv2-2", ConvolutionLayer.builder(3,3).stride(1,1).nOut(128) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "conv2-1") - .addLayer("pool2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2,2) + .addLayer("pool2", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2,2) .build(), "conv2-2") - .addLayer("conv3-1", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(256) + .addLayer("conv3-1", ConvolutionLayer.builder(3,3).stride(1,1).nOut(256) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "pool2") - .addLayer("conv3-2", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(256) + .addLayer("conv3-2", ConvolutionLayer.builder(3,3).stride(1,1).nOut(256) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "conv3-1") - .addLayer("pool3", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2,2) + .addLayer("pool3", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2,2) .build(), "conv3-2") - .addLayer("conv4-1", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(512) + .addLayer("conv4-1", ConvolutionLayer.builder(3,3).stride(1,1).nOut(512) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "pool3") - .addLayer("conv4-2", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(512) + .addLayer("conv4-2", ConvolutionLayer.builder(3,3).stride(1,1).nOut(512) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "conv4-1") - .addLayer("drop4", new DropoutLayer.Builder(0.5).build(), "conv4-2") - .addLayer("pool4", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2,2) + .addLayer("drop4", DropoutLayer.builder(0.5).build(), "conv4-2") + .addLayer("pool4", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2,2) .build(), "drop4") - .addLayer("conv5-1", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(1024) + .addLayer("conv5-1", ConvolutionLayer.builder(3,3).stride(1,1).nOut(1024) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "pool4") - .addLayer("conv5-2", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(1024) + .addLayer("conv5-2", ConvolutionLayer.builder(3,3).stride(1,1).nOut(1024) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "conv5-1") - .addLayer("drop5", new DropoutLayer.Builder(0.5).build(), "conv5-2") + .addLayer("drop5", DropoutLayer.builder(0.5).build(), "conv5-2") // up6 - .addLayer("up6-1", new Upsampling2D.Builder(2).build(), "drop5") - .addLayer("up6-2", new ConvolutionLayer.Builder(2,2).stride(1,1).nOut(512) + .addLayer("up6-1", Upsampling2D.builder(2).build(), "drop5") + .addLayer("up6-2", ConvolutionLayer.builder(2,2).stride(1,1).nOut(512) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "up6-1") .addVertex("merge6", new MergeVertex(), "drop4", "up6-2") - .addLayer("conv6-1", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(512) + .addLayer("conv6-1", ConvolutionLayer.builder(3,3).stride(1,1).nOut(512) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "merge6") - .addLayer("conv6-2", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(512) + .addLayer("conv6-2", ConvolutionLayer.builder(3,3).stride(1,1).nOut(512) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "conv6-1") // up7 - .addLayer("up7-1", new Upsampling2D.Builder(2).build(), "conv6-2") - .addLayer("up7-2", new ConvolutionLayer.Builder(2,2).stride(1,1).nOut(256) + .addLayer("up7-1", Upsampling2D.builder(2).build(), "conv6-2") + .addLayer("up7-2", ConvolutionLayer.builder(2,2).stride(1,1).nOut(256) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "up7-1") .addVertex("merge7", new MergeVertex(), "conv3-2", "up7-2") - .addLayer("conv7-1", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(256) + .addLayer("conv7-1", ConvolutionLayer.builder(3,3).stride(1,1).nOut(256) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "merge7") - .addLayer("conv7-2", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(256) + .addLayer("conv7-2", ConvolutionLayer.builder(3,3).stride(1,1).nOut(256) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "conv7-1") // up8 - .addLayer("up8-1", new Upsampling2D.Builder(2).build(), "conv7-2") - .addLayer("up8-2", new ConvolutionLayer.Builder(2,2).stride(1,1).nOut(128) + .addLayer("up8-1", Upsampling2D.builder(2).build(), "conv7-2") + .addLayer("up8-2", ConvolutionLayer.builder(2,2).stride(1,1).nOut(128) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "up8-1") .addVertex("merge8", new MergeVertex(), "conv2-2", "up8-2") - .addLayer("conv8-1", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(128) + .addLayer("conv8-1", ConvolutionLayer.builder(3,3).stride(1,1).nOut(128) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "merge8") - .addLayer("conv8-2", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(128) + .addLayer("conv8-2", ConvolutionLayer.builder(3,3).stride(1,1).nOut(128) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "conv8-1") // up9 - .addLayer("up9-1", new Upsampling2D.Builder(2).build(), "conv8-2") - .addLayer("up9-2", new ConvolutionLayer.Builder(2,2).stride(1,1).nOut(64) + .addLayer("up9-1", Upsampling2D.builder(2).build(), "conv8-2") + .addLayer("up9-2", ConvolutionLayer.builder(2,2).stride(1,1).nOut(64) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "up9-1") .addVertex("merge9", new MergeVertex(), "conv1-2", "up9-2") - .addLayer("conv9-1", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(64) + .addLayer("conv9-1", ConvolutionLayer.builder(3,3).stride(1,1).nOut(64) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "merge9") - .addLayer("conv9-2", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(64) + .addLayer("conv9-2", ConvolutionLayer.builder(3,3).stride(1,1).nOut(64) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "conv9-1") - .addLayer("conv9-3", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(2) + .addLayer("conv9-3", ConvolutionLayer.builder(3,3).stride(1,1).nOut(2) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.RELU).build(), "conv9-2") - .addLayer("conv10", new ConvolutionLayer.Builder(1,1).stride(1,1).nOut(1) + .addLayer("conv10", ConvolutionLayer.builder(1,1).stride(1,1).nOut(1) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode) .activation(Activation.IDENTITY).build(), "conv9-3") - .addLayer("output", new CnnLossLayer.Builder(LossFunctions.LossFunction.XENT) + .addLayer("output", CnnLossLayer.builder().lossFunction(LossFunctions.LossFunction.XENT.getILossFunction()) .activation(Activation.SIGMOID).build(), "conv10") .setOutputs("output"); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG16.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG16.java index 2f6aa1cac..6804fbb01 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG16.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG16.java @@ -99,57 +99,57 @@ public class VGG16 extends ZooModel { .graphBuilder() .addInputs("in") // block 1 - .layer(0, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(0, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nIn(inputShape[0]).nOut(64) .cudnnAlgoMode(cudnnAlgoMode).build(), "in") - .layer(1, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(1, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(64).cudnnAlgoMode(cudnnAlgoMode).build(), "0") - .layer(2, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(2, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).kernelSize(2, 2) .stride(2, 2).build(), "1") // block 2 - .layer(3, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(3, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(128).cudnnAlgoMode(cudnnAlgoMode).build(), "2") - .layer(4, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(4, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(128).cudnnAlgoMode(cudnnAlgoMode).build(), "3") - .layer(5, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(5, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).kernelSize(2, 2) .stride(2, 2).build(), "4") // block 3 - .layer(6, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(6, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(256).cudnnAlgoMode(cudnnAlgoMode).build(), "5") - .layer(7, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(7, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(256).cudnnAlgoMode(cudnnAlgoMode).build(), "6") - .layer(8, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(8, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(256).cudnnAlgoMode(cudnnAlgoMode).build(), "7") - .layer(9, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(9, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).kernelSize(2, 2) .stride(2, 2).build(), "8") // block 4 - .layer(10, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(10, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "9") - .layer(11, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(11, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "10") - .layer(12, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(12, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "11") - .layer(13, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(13, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).kernelSize(2, 2) .stride(2, 2).build(), "12") // block 5 - .layer(14, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(14, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "13") - .layer(15, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(15, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "14") - .layer(16, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(16, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "15") - .layer(17, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(17, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).kernelSize(2, 2) .stride(2, 2).build(), "16") - .layer(18, new DenseLayer.Builder().nOut(4096).dropOut(0.5) + .layer(18, DenseLayer.builder().nOut(4096).dropOut(0.5) .build(), "17") - .layer(19, new DenseLayer.Builder().nOut(4096).dropOut(0.5) + .layer(19, DenseLayer.builder().nOut(4096).dropOut(0.5) .build(), "18") - .layer(20, new OutputLayer.Builder( + .layer(20, OutputLayer.builder( LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).name("output") .nOut(numClasses).activation(Activation.SOFTMAX) // radial basis function required .build(), "19") diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG19.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG19.java index 5e846efda..50e65afc5 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG19.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/VGG19.java @@ -88,60 +88,60 @@ public class VGG19 extends ZooModel { .graphBuilder() .addInputs("in") // block 1 - .layer(0, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(0, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nIn(inputShape[0]).nOut(64) .cudnnAlgoMode(cudnnAlgoMode).build(), "in") - .layer(1, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(1, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(64).cudnnAlgoMode(cudnnAlgoMode).build(), "0") - .layer(2, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(2, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).kernelSize(2, 2) .stride(2, 2).build(), "1") // block 2 - .layer(3, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(3, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(128).cudnnAlgoMode(cudnnAlgoMode).build(), "2") - .layer(4, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(4, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(128).cudnnAlgoMode(cudnnAlgoMode).build(), "3") - .layer(5, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(5, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).kernelSize(2, 2) .stride(2, 2).build(), "4") // block 3 - .layer(6, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(6, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(256).cudnnAlgoMode(cudnnAlgoMode).build(), "5") - .layer(7, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(7, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(256).cudnnAlgoMode(cudnnAlgoMode).build(), "6") - .layer(8, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(8, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(256).cudnnAlgoMode(cudnnAlgoMode).build(), "7") - .layer(9, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(9, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(256).cudnnAlgoMode(cudnnAlgoMode).build(), "8") - .layer(10, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(10, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).kernelSize(2, 2) .stride(2, 2).build(), "9") // block 4 - .layer(11, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(11, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "10") - .layer(12, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(12, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "11") - .layer(13, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(13, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "12") - .layer(14, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(14, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "13") - .layer(15, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(15, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).kernelSize(2, 2) .stride(2, 2).build(), "14") // block 5 - .layer(16, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(16, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "15") - .layer(17, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(17, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "16") - .layer(18, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(18, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "17") - .layer(19, new ConvolutionLayer.Builder().kernelSize(3, 3).stride(1, 1) + .layer(19, ConvolutionLayer.builder().kernelSize(3, 3).stride(1, 1) .padding(1, 1).nOut(512).cudnnAlgoMode(cudnnAlgoMode).build(), "18") - .layer(20, new SubsamplingLayer.Builder() - .poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2) + .layer(20, SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).kernelSize(2, 2) .stride(2, 2).build(), "19") - .layer(21, new DenseLayer.Builder().nOut(4096).build(), "20") - .layer(22, new OutputLayer.Builder( + .layer(21, DenseLayer.builder().nOut(4096).build(), "20") + .layer(22, OutputLayer.builder( LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).name("output") .nOut(numClasses).activation(Activation.SOFTMAX) // radial basis function required .build(), "21") diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Xception.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Xception.java index bbba3ff3c..8f228ade5 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Xception.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/Xception.java @@ -106,65 +106,65 @@ public class Xception extends ZooModel { graph // block1 - .addLayer("block1_conv1", new ConvolutionLayer.Builder(3,3).stride(2,2).nOut(32).hasBias(false) + .addLayer("block1_conv1", ConvolutionLayer.builder(3,3).stride(2,2).nOut(32).hasBias(false) .cudnnAlgoMode(cudnnAlgoMode).build(), "input") - .addLayer("block1_conv1_bn", new BatchNormalization(), "block1_conv1") - .addLayer("block1_conv1_act", new ActivationLayer(Activation.RELU), "block1_conv1_bn") - .addLayer("block1_conv2", new ConvolutionLayer.Builder(3,3).stride(1,1).nOut(64).hasBias(false) + .addLayer("block1_conv1_bn", BatchNormalization.builder().build(), "block1_conv1") + .addLayer("block1_conv1_act", ActivationLayer.builder(Activation.RELU).build(), "block1_conv1_bn") + .addLayer("block1_conv2", ConvolutionLayer.builder(3,3).stride(1,1).nOut(64).hasBias(false) .cudnnAlgoMode(cudnnAlgoMode).build(), "block1_conv1_act") - .addLayer("block1_conv2_bn", new BatchNormalization(), "block1_conv2") - .addLayer("block1_conv2_act", new ActivationLayer(Activation.RELU), "block1_conv2_bn") + .addLayer("block1_conv2_bn", BatchNormalization.builder().build(), "block1_conv2") + .addLayer("block1_conv2_act", ActivationLayer.builder(Activation.RELU).build(), "block1_conv2_bn") // residual1 - .addLayer("residual1_conv", new ConvolutionLayer.Builder(1,1).stride(2,2).nOut(128).hasBias(false) + .addLayer("residual1_conv", ConvolutionLayer.builder(1,1).stride(2,2).nOut(128).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "block1_conv2_act") - .addLayer("residual1", new BatchNormalization(), "residual1_conv") + .addLayer("residual1", BatchNormalization.builder().build(), "residual1_conv") // block2 - .addLayer("block2_sepconv1", new SeparableConvolution2D.Builder(3,3).nOut(128).hasBias(false) + .addLayer("block2_sepconv1", SeparableConvolution2D.builder(3,3).nOut(128).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "block1_conv2_act") - .addLayer("block2_sepconv1_bn", new BatchNormalization(), "block2_sepconv1") - .addLayer("block2_sepconv1_act",new ActivationLayer(Activation.RELU), "block2_sepconv1_bn") - .addLayer("block2_sepconv2", new SeparableConvolution2D.Builder(3,3).nOut(128).hasBias(false) + .addLayer("block2_sepconv1_bn", BatchNormalization.builder().build(), "block2_sepconv1") + .addLayer("block2_sepconv1_act",ActivationLayer.builder(Activation.RELU).build(), "block2_sepconv1_bn") + .addLayer("block2_sepconv2", SeparableConvolution2D.builder(3,3).nOut(128).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "block2_sepconv1_act") - .addLayer("block2_sepconv2_bn", new BatchNormalization(), "block2_sepconv2") - .addLayer("block2_pool", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2) + .addLayer("block2_sepconv2_bn", BatchNormalization.builder().build(), "block2_sepconv2") + .addLayer("block2_pool", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2) .convolutionMode(ConvolutionMode.Same).build(), "block2_sepconv2_bn") .addVertex("add1", new ElementWiseVertex(ElementWiseVertex.Op.Add), "block2_pool", "residual1") // residual2 - .addLayer("residual2_conv", new ConvolutionLayer.Builder(1,1).stride(2,2).nOut(256).hasBias(false) + .addLayer("residual2_conv", ConvolutionLayer.builder(1,1).stride(2,2).nOut(256).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "add1") - .addLayer("residual2", new BatchNormalization(), "residual2_conv") + .addLayer("residual2", BatchNormalization.builder().build(), "residual2_conv") // block3 - .addLayer("block3_sepconv1_act", new ActivationLayer(Activation.RELU), "add1") - .addLayer("block3_sepconv1", new SeparableConvolution2D.Builder(3,3).nOut(256).hasBias(false) + .addLayer("block3_sepconv1_act", ActivationLayer.builder(Activation.RELU).build(), "add1") + .addLayer("block3_sepconv1", SeparableConvolution2D.builder(3,3).nOut(256).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "block3_sepconv1_act") - .addLayer("block3_sepconv1_bn", new BatchNormalization(), "block3_sepconv1") - .addLayer("block3_sepconv2_act", new ActivationLayer(Activation.RELU), "block3_sepconv1_bn") - .addLayer("block3_sepconv2", new SeparableConvolution2D.Builder(3,3).nOut(256).hasBias(false) + .addLayer("block3_sepconv1_bn", BatchNormalization.builder().build(), "block3_sepconv1") + .addLayer("block3_sepconv2_act", ActivationLayer.builder(Activation.RELU).build(), "block3_sepconv1_bn") + .addLayer("block3_sepconv2", SeparableConvolution2D.builder(3,3).nOut(256).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "block3_sepconv2_act") - .addLayer("block3_sepconv2_bn", new BatchNormalization(), "block3_sepconv2") - .addLayer("block3_pool", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2) + .addLayer("block3_sepconv2_bn", BatchNormalization.builder().build(), "block3_sepconv2") + .addLayer("block3_pool", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2) .convolutionMode(ConvolutionMode.Same).build(), "block3_sepconv2_bn") .addVertex("add2", new ElementWiseVertex(ElementWiseVertex.Op.Add), "block3_pool", "residual2") // residual3 - .addLayer("residual3_conv", new ConvolutionLayer.Builder(1,1).stride(2,2).nOut(728).hasBias(false) + .addLayer("residual3_conv", ConvolutionLayer.builder(1,1).stride(2,2).nOut(728).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "add2") - .addLayer("residual3", new BatchNormalization(), "residual3_conv") + .addLayer("residual3", BatchNormalization.builder().build(), "residual3_conv") // block4 - .addLayer("block4_sepconv1_act", new ActivationLayer(Activation.RELU), "add2") - .addLayer("block4_sepconv1", new SeparableConvolution2D.Builder(3,3).nOut(728).hasBias(false) + .addLayer("block4_sepconv1_act", ActivationLayer.builder(Activation.RELU).build(), "add2") + .addLayer("block4_sepconv1", SeparableConvolution2D.builder(3,3).nOut(728).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "block4_sepconv1_act") - .addLayer("block4_sepconv1_bn", new BatchNormalization(), "block4_sepconv1") - .addLayer("block4_sepconv2_act", new ActivationLayer(Activation.RELU), "block4_sepconv1_bn") - .addLayer("block4_sepconv2", new SeparableConvolution2D.Builder(3,3).nOut(728).hasBias(false) + .addLayer("block4_sepconv1_bn", BatchNormalization.builder().build(), "block4_sepconv1") + .addLayer("block4_sepconv2_act", ActivationLayer.builder(Activation.RELU).build(), "block4_sepconv1_bn") + .addLayer("block4_sepconv2", SeparableConvolution2D.builder(3,3).nOut(728).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "block4_sepconv2_act") - .addLayer("block4_sepconv2_bn", new BatchNormalization(), "block4_sepconv2") - .addLayer("block4_pool", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2) + .addLayer("block4_sepconv2_bn", BatchNormalization.builder().build(), "block4_sepconv2") + .addLayer("block4_pool", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2) .convolutionMode(ConvolutionMode.Same).build(), "block4_sepconv2_bn") .addVertex("add3", new ElementWiseVertex(ElementWiseVertex.Op.Add), "block4_pool", "residual3"); @@ -176,18 +176,18 @@ public class Xception extends ZooModel { String blockName = "block"+block; graph - .addLayer(blockName+"_sepconv1_act", new ActivationLayer(Activation.RELU), previousInput) - .addLayer(blockName+"_sepconv1", new SeparableConvolution2D.Builder(3,3).nOut(728).hasBias(false) + .addLayer(blockName+"_sepconv1_act", ActivationLayer.builder(Activation.RELU).build(), previousInput) + .addLayer(blockName+"_sepconv1", SeparableConvolution2D.builder(3,3).nOut(728).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), blockName+"_sepconv1_act") - .addLayer(blockName+"_sepconv1_bn", new BatchNormalization(), blockName+"_sepconv1") - .addLayer(blockName+"_sepconv2_act", new ActivationLayer(Activation.RELU), blockName+"_sepconv1_bn") - .addLayer(blockName+"_sepconv2", new SeparableConvolution2D.Builder(3,3).nOut(728).hasBias(false) + .addLayer(blockName+"_sepconv1_bn", BatchNormalization.builder().build(), blockName+"_sepconv1") + .addLayer(blockName+"_sepconv2_act", ActivationLayer.builder(Activation.RELU).build(), blockName+"_sepconv1_bn") + .addLayer(blockName+"_sepconv2", SeparableConvolution2D.builder(3,3).nOut(728).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), blockName+"_sepconv2_act") - .addLayer(blockName+"_sepconv2_bn", new BatchNormalization(), blockName+"_sepconv2") - .addLayer(blockName+"_sepconv3_act", new ActivationLayer(Activation.RELU), blockName+"_sepconv2_bn") - .addLayer(blockName+"_sepconv3", new SeparableConvolution2D.Builder(3,3).nOut(728).hasBias(false) + .addLayer(blockName+"_sepconv2_bn", BatchNormalization.builder().build(), blockName+"_sepconv2") + .addLayer(blockName+"_sepconv3_act", ActivationLayer.builder(Activation.RELU).build(), blockName+"_sepconv2_bn") + .addLayer(blockName+"_sepconv3", SeparableConvolution2D.builder(3,3).nOut(728).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), blockName+"_sepconv3_act") - .addLayer(blockName+"_sepconv3_bn", new BatchNormalization(), blockName+"_sepconv3") + .addLayer(blockName+"_sepconv3_bn", BatchNormalization.builder().build(), blockName+"_sepconv3") .addVertex("add"+(residual+1), new ElementWiseVertex(ElementWiseVertex.Op.Add), blockName+"_sepconv3_bn", previousInput); residual++; @@ -195,37 +195,37 @@ public class Xception extends ZooModel { } // residual12 - graph.addLayer("residual12_conv", new ConvolutionLayer.Builder(1,1).stride(2,2).nOut(1024).hasBias(false) + graph.addLayer("residual12_conv", ConvolutionLayer.builder(1,1).stride(2,2).nOut(1024).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "add" + residual) - .addLayer("residual12", new BatchNormalization(), "residual12_conv"); + .addLayer("residual12", BatchNormalization.builder().build(), "residual12_conv"); // block13 graph - .addLayer("block13_sepconv1_act", new ActivationLayer(Activation.RELU), "add11" ) - .addLayer("block13_sepconv1", new SeparableConvolution2D.Builder(3,3).nOut(728).hasBias(false) + .addLayer("block13_sepconv1_act", ActivationLayer.builder(Activation.RELU).build(), "add11" ) + .addLayer("block13_sepconv1", SeparableConvolution2D.builder(3,3).nOut(728).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "block13_sepconv1_act") - .addLayer("block13_sepconv1_bn", new BatchNormalization(), "block13_sepconv1") - .addLayer("block13_sepconv2_act", new ActivationLayer(Activation.RELU), "block13_sepconv1_bn") - .addLayer("block13_sepconv2", new SeparableConvolution2D.Builder(3,3).nOut(1024).hasBias(false) + .addLayer("block13_sepconv1_bn", BatchNormalization.builder().build(), "block13_sepconv1") + .addLayer("block13_sepconv2_act", ActivationLayer.builder(Activation.RELU).build(), "block13_sepconv1_bn") + .addLayer("block13_sepconv2", SeparableConvolution2D.builder(3,3).nOut(1024).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "block13_sepconv2_act") - .addLayer("block13_sepconv2_bn", new BatchNormalization(), "block13_sepconv2") - .addLayer("block13_pool", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2) + .addLayer("block13_sepconv2_bn", BatchNormalization.builder().build(), "block13_sepconv2") + .addLayer("block13_pool", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2) .convolutionMode(ConvolutionMode.Same).build(), "block13_sepconv2_bn") .addVertex("add12", new ElementWiseVertex(ElementWiseVertex.Op.Add), "block13_pool", "residual12"); // block14 graph - .addLayer("block14_sepconv1", new SeparableConvolution2D.Builder(3,3).nOut(1536).hasBias(false) + .addLayer("block14_sepconv1", SeparableConvolution2D.builder(3,3).nOut(1536).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "add12") - .addLayer("block14_sepconv1_bn", new BatchNormalization(), "block14_sepconv1") - .addLayer("block14_sepconv1_act", new ActivationLayer(Activation.RELU), "block14_sepconv1_bn") - .addLayer("block14_sepconv2", new SeparableConvolution2D.Builder(3,3).nOut(2048).hasBias(false) + .addLayer("block14_sepconv1_bn", BatchNormalization.builder().build(), "block14_sepconv1") + .addLayer("block14_sepconv1_act", ActivationLayer.builder(Activation.RELU).build(), "block14_sepconv1_bn") + .addLayer("block14_sepconv2", SeparableConvolution2D.builder(3,3).nOut(2048).hasBias(false) .convolutionMode(ConvolutionMode.Same).cudnnAlgoMode(cudnnAlgoMode).build(), "block14_sepconv1_act") - .addLayer("block14_sepconv2_bn", new BatchNormalization(), "block14_sepconv2") - .addLayer("block14_sepconv2_act", new ActivationLayer(Activation.RELU), "block14_sepconv2_bn") + .addLayer("block14_sepconv2_bn", BatchNormalization.builder().build(), "block14_sepconv2") + .addLayer("block14_sepconv2_act", ActivationLayer.builder(Activation.RELU).build(), "block14_sepconv2_bn") - .addLayer("avg_pool", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), "block14_sepconv2_act") - .addLayer("predictions", new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) + .addLayer("avg_pool", GlobalPoolingLayer.builder(PoolingType.AVG).build(), "block14_sepconv2_act") + .addLayer("predictions", OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .nOut(numClasses) .activation(Activation.SOFTMAX).build(), "avg_pool") diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/YOLO2.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/YOLO2.java index 3c28a36a0..798c84158 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/YOLO2.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/YOLO2.java @@ -142,7 +142,7 @@ public class YOLO2 extends ZooModel { addLayers(graphBuilder, 21, "activation_13", 1, 512, 64, 0, 0); // reorg - graphBuilder.addLayer("rearrange_21",new SpaceToDepthLayer.Builder(2).build(), "activation_21") + graphBuilder.addLayer("rearrange_21",SpaceToDepthLayer.builder().blockSize(2).build(), "activation_21") // route .addVertex("concatenate_21", new MergeVertex(), "rearrange_21", "activation_20"); @@ -151,7 +151,7 @@ public class YOLO2 extends ZooModel { graphBuilder .addLayer("convolution2d_23", - new ConvolutionLayer.Builder(1,1) + ConvolutionLayer.builder(1,1) .nIn(1024) .nOut(nBoxes * (5 + numClasses)) .weightInit(WeightInit.XAVIER) @@ -163,8 +163,8 @@ public class YOLO2 extends ZooModel { .build(), "activation_22") .addLayer("outputs", - new Yolo2OutputLayer.Builder() - .boundingBoxPriors(priors) + Yolo2OutputLayer.builder() + .boundingBoxes(priors) .build(), "convolution2d_23") .setOutputs("outputs"); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/DarknetHelper.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/DarknetHelper.java index db55d2731..0581e16b7 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/DarknetHelper.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/DarknetHelper.java @@ -67,7 +67,7 @@ public class DarknetHelper { public static ComputationGraphConfiguration.GraphBuilder addLayers(ComputationGraphConfiguration.GraphBuilder graphBuilder, int layerNumber, String input, int filterSize, int nIn, int nOut, int poolSize, int poolStride) { graphBuilder .addLayer("convolution2d_" + layerNumber, - new ConvolutionLayer.Builder(filterSize,filterSize) + ConvolutionLayer.builder(filterSize,filterSize) .nIn(nIn) .nOut(nOut) .weightInit(WeightInit.XAVIER) @@ -78,21 +78,21 @@ public class DarknetHelper { .build(), input) .addLayer("batchnormalization_" + layerNumber, - new BatchNormalization.Builder() + BatchNormalization.builder() .nIn(nOut).nOut(nOut) .weightInit(WeightInit.XAVIER) .activation(Activation.IDENTITY) .build(), "convolution2d_" + layerNumber) .addLayer("activation_" + layerNumber, - new ActivationLayer.Builder() + ActivationLayer.builder() .activation(new ActivationLReLU(0.1)) .build(), "batchnormalization_" + layerNumber); if (poolSize > 0) { graphBuilder .addLayer("maxpooling2d_" + layerNumber, - new SubsamplingLayer.Builder() + SubsamplingLayer.builder() .kernelSize(poolSize, poolSize) .stride(poolStride, poolStride) .convolutionMode(ConvolutionMode.Same) diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/FaceNetHelper.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/FaceNetHelper.java index f6b23b2e3..3b597f87c 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/FaceNetHelper.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/FaceNetHelper.java @@ -37,7 +37,7 @@ public class FaceNetHelper { public static ConvolutionLayer conv1x1(int in, int out, double bias) { - return new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {1, 1}, new int[] {0, 0}).nIn(in).nOut(out) + return ConvolutionLayer.builder(new int[] {1, 1}, new int[] {1, 1}, new int[] {0, 0}).nIn(in).nOut(out) .biasInit(bias).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE).build(); } @@ -50,63 +50,63 @@ public class FaceNetHelper { } public static ConvolutionLayer conv3x3(int in, int out, double bias) { - return new ConvolutionLayer.Builder(new int[] {3, 3}, new int[] {1, 1}, new int[] {1, 1}).nIn(in).nOut(out) + return ConvolutionLayer.builder(new int[] {3, 3}, new int[] {1, 1}, new int[] {1, 1}).nIn(in).nOut(out) .biasInit(bias).build(); } public static ConvolutionLayer conv5x5(int in, int out, double bias) { - return new ConvolutionLayer.Builder(new int[] {5, 5}, new int[] {1, 1}, new int[] {2, 2}).nIn(in).nOut(out) + return ConvolutionLayer.builder(new int[] {5, 5}, new int[] {1, 1}, new int[] {2, 2}).nIn(in).nOut(out) .biasInit(bias).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE).build(); } public static ConvolutionLayer conv7x7(int in, int out, double bias) { - return new ConvolutionLayer.Builder(new int[] {7, 7}, new int[] {2, 2}, new int[] {3, 3}).nIn(in).nOut(out) + return ConvolutionLayer.builder(new int[] {7, 7}, new int[] {2, 2}, new int[] {3, 3}).nIn(in).nOut(out) .biasInit(bias).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE).build(); } public static SubsamplingLayer avgPool7x7(int stride) { - return new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG, new int[] {7, 7}, new int[] {1, 1}) + return SubsamplingLayer.builder(SubsamplingLayer.PoolingType.AVG, new int[] {7, 7}, new int[] {1, 1}) .build(); } public static SubsamplingLayer avgPoolNxN(int size, int stride) { - return new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG, new int[] {size, size}, + return SubsamplingLayer.builder(SubsamplingLayer.PoolingType.AVG, new int[] {size, size}, new int[] {stride, stride}).build(); } public static SubsamplingLayer pNormNxN(int pNorm, int size, int stride) { - return new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.PNORM, new int[] {size, size}, + return SubsamplingLayer.builder(SubsamplingLayer.PoolingType.PNORM, new int[] {size, size}, new int[] {stride, stride}).pnorm(pNorm).build(); } public static SubsamplingLayer maxPool3x3(int stride) { - return new SubsamplingLayer.Builder(new int[] {3, 3}, new int[] {stride, stride}, new int[] {1, 1}).build(); + return SubsamplingLayer.builder(new int[] {3, 3}, new int[] {stride, stride}, new int[] {1, 1}).build(); } public static SubsamplingLayer maxPoolNxN(int size, int stride) { - return new SubsamplingLayer.Builder(new int[] {size, size}, new int[] {stride, stride}, new int[] {1, 1}) + return SubsamplingLayer.builder(new int[] {size, size}, new int[] {stride, stride}, new int[] {1, 1}) .build(); } public static DenseLayer fullyConnected(int in, int out, double dropOut) { - return new DenseLayer.Builder().nIn(in).nOut(out).dropOut(dropOut).build(); + return DenseLayer.builder().nIn(in).nOut(out).dropOut(dropOut).build(); } public static ConvolutionLayer convNxN(int reduceSize, int outputSize, int kernelSize, int kernelStride, boolean padding) { int pad = padding ? ((int) Math.floor(kernelStride / 2) * 2) : 0; - return new ConvolutionLayer.Builder(new int[] {kernelSize, kernelSize}, new int[] {kernelStride, kernelStride}, + return ConvolutionLayer.builder(new int[] {kernelSize, kernelSize}, new int[] {kernelStride, kernelStride}, new int[] {pad, pad}).nIn(reduceSize).nOut(outputSize).biasInit(0.2) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE).build(); } public static ConvolutionLayer convNxNreduce(int inputSize, int reduceSize, int reduceStride) { - return new ConvolutionLayer.Builder(new int[] {1, 1}, new int[] {reduceStride, reduceStride}).nIn(inputSize) + return ConvolutionLayer.builder(new int[] {1, 1}, new int[] {reduceStride, reduceStride}).nIn(inputSize) .nOut(reduceSize).biasInit(0.2).cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE).build(); } public static BatchNormalization batchNorm(int in, int out) { - return new BatchNormalization.Builder(false).nIn(in).nOut(out).build(); + return BatchNormalization.builder(false).nIn(in).nOut(out).build(); } public static ComputationGraphConfiguration.GraphBuilder appendGraph( @@ -164,7 +164,7 @@ public class FaceNetHelper { graph.addLayer(getModuleName(moduleLayerName) + "-batch1-" + i, batchNorm(reduceSize[i], reduceSize[i]), getModuleName(moduleLayerName) + "-cnn1-" + i); graph.addLayer(getModuleName(moduleLayerName) + "-transfer1-" + i, - new ActivationLayer.Builder().activation(transferFunction).build(), + ActivationLayer.builder().activation(transferFunction).build(), getModuleName(moduleLayerName) + "-batch1-" + i); graph.addLayer(getModuleName(moduleLayerName) + "-reduce1-" + i, convNxN(reduceSize[i], outputSize[i], kernelSize[i], kernelStride[i], true), @@ -172,7 +172,7 @@ public class FaceNetHelper { graph.addLayer(getModuleName(moduleLayerName) + "-batch2-" + i, batchNorm(outputSize[i], outputSize[i]), getModuleName(moduleLayerName) + "-reduce1-" + i); graph.addLayer(getModuleName(moduleLayerName) + "-transfer2-" + i, - new ActivationLayer.Builder().activation(transferFunction).build(), + ActivationLayer.builder().activation(transferFunction).build(), getModuleName(moduleLayerName) + "-batch2-" + i); } @@ -204,7 +204,7 @@ public class FaceNetHelper { graph.addLayer(getModuleName(moduleLayerName) + "-batch3", batchNorm(reduceSize[i], reduceSize[i]), getModuleName(moduleLayerName) + "-cnn2"); graph.addLayer(getModuleName(moduleLayerName) + "-transfer3", - new ActivationLayer.Builder().activation(transferFunction).build(), + ActivationLayer.builder().activation(transferFunction).build(), getModuleName(moduleLayerName) + "-batch3"); } catch (IndexOutOfBoundsException e) { } @@ -217,7 +217,7 @@ public class FaceNetHelper { graph.addLayer(getModuleName(moduleLayerName) + "-batch4", batchNorm(reduceSize[i], reduceSize[i]), getModuleName(moduleLayerName) + "-reduce2"); graph.addLayer(getModuleName(moduleLayerName) + "-transfer4", - new ActivationLayer.Builder().activation(transferFunction).build(), + ActivationLayer.builder().activation(transferFunction).build(), getModuleName(moduleLayerName) + "-batch4"); } catch (IndexOutOfBoundsException e) { } diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/InceptionResNetHelper.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/InceptionResNetHelper.java index 85009dde4..d1ae6f3c0 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/InceptionResNetHelper.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/InceptionResNetHelper.java @@ -49,7 +49,7 @@ public class InceptionResNetHelper { ComputationGraphConfiguration.GraphBuilder graph, String blockName, int scale, double activationScale, String input) { // // first add the RELU activation layer - // graph.addLayer(nameLayer(blockName,"activation1",0), new ActivationLayer.Builder().activation(Activation.TANH).build(), input); + // graph.addLayer(nameLayer(blockName,"activation1",0), ActivationLayer.builder().activation(Activation.TANH).build(), input); // loop and add each subsequent resnet blocks String previousBlock = input; @@ -57,65 +57,65 @@ public class InceptionResNetHelper { graph // 1x1 .addLayer(nameLayer(blockName, "cnn1", i), - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(192).nOut(32) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), previousBlock) .addLayer(nameLayer(blockName, "batch1", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(32) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(32) .nOut(32).build(), nameLayer(blockName, "cnn1", i)) // 1x1 -> 3x3 .addLayer(nameLayer(blockName, "cnn2", i), - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(192).nOut(32) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), previousBlock) .addLayer(nameLayer(blockName, "batch2", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(32) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(32) .nOut(32).build(), nameLayer(blockName, "cnn2", i)) .addLayer(nameLayer(blockName, "cnn3", i), - new ConvolutionLayer.Builder(new int[] {3, 3}) + ConvolutionLayer.builder(new int[] {3, 3}) .convolutionMode(ConvolutionMode.Same).nIn(32).nOut(32) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), nameLayer(blockName, "batch2", i)) .addLayer(nameLayer(blockName, "batch3", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(32) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(32) .nOut(32).build(), nameLayer(blockName, "cnn3", i)) // 1x1 -> 3x3 -> 3x3 .addLayer(nameLayer(blockName, "cnn4", i), - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(192).nOut(32) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), previousBlock) .addLayer(nameLayer(blockName, "batch4", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(32) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(32) .nOut(32).build(), nameLayer(blockName, "cnn4", i)) .addLayer(nameLayer(blockName, "cnn5", i), - new ConvolutionLayer.Builder(new int[] {3, 3}) + ConvolutionLayer.builder(new int[] {3, 3}) .convolutionMode(ConvolutionMode.Same).nIn(32).nOut(32) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), nameLayer(blockName, "batch4", i)) .addLayer(nameLayer(blockName, "batch5", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(32) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(32) .nOut(32).build(), nameLayer(blockName, "cnn5", i)) .addLayer(nameLayer(blockName, "cnn6", i), - new ConvolutionLayer.Builder(new int[] {3, 3}) + ConvolutionLayer.builder(new int[] {3, 3}) .convolutionMode(ConvolutionMode.Same).nIn(32).nOut(32) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), nameLayer(blockName, "batch5", i)) .addLayer(nameLayer(blockName, "batch6", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(32) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(32) .nOut(32).build(), nameLayer(blockName, "cnn6", i)) // --> 1x1 --> scaling --> @@ -123,20 +123,20 @@ public class InceptionResNetHelper { nameLayer(blockName, "batch1", i), nameLayer(blockName, "batch3", i), nameLayer(blockName, "batch6", i)) .addLayer(nameLayer(blockName, "cnn7", i), - new ConvolutionLayer.Builder(new int[] {3, 3}) + ConvolutionLayer.builder(new int[] {3, 3}) .convolutionMode(ConvolutionMode.Same).nIn(96).nOut(192) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), nameLayer(blockName, "merge1", i)) .addLayer(nameLayer(blockName, "batch7", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(192) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(192) .nOut(192).build(), nameLayer(blockName, "cnn7", i)) .addVertex(nameLayer(blockName, "scaling", i), new ScaleVertex(activationScale), nameLayer(blockName, "batch7", i)) // --> .addLayer(nameLayer(blockName, "shortcut-identity", i), - new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), + ActivationLayer.builder().activation(Activation.IDENTITY).build(), previousBlock) .addVertex(nameLayer(blockName, "shortcut", i), new ElementWiseVertex(ElementWiseVertex.Op.Add), @@ -145,11 +145,11 @@ public class InceptionResNetHelper { // leave the last vertex as the block name for convenience if (i == scale) - graph.addLayer(blockName, new ActivationLayer.Builder().activation(Activation.TANH).build(), + graph.addLayer(blockName, ActivationLayer.builder().activation(Activation.TANH).build(), nameLayer(blockName, "shortcut", i)); else graph.addLayer(nameLayer(blockName, "activation", i), - new ActivationLayer.Builder().activation(Activation.TANH).build(), + ActivationLayer.builder().activation(Activation.TANH).build(), nameLayer(blockName, "shortcut", i)); previousBlock = nameLayer(blockName, "activation", i); @@ -171,7 +171,7 @@ public class InceptionResNetHelper { double activationScale, String input) { // first add the RELU activation layer graph.addLayer(nameLayer(blockName, "activation1", 0), - new ActivationLayer.Builder().activation(Activation.TANH).build(), input); + ActivationLayer.builder().activation(Activation.TANH).build(), input); // loop and add each subsequent resnet blocks String previousBlock = nameLayer(blockName, "activation1", 0); @@ -179,64 +179,64 @@ public class InceptionResNetHelper { graph // 1x1 .addLayer(nameLayer(blockName, "cnn1", i), - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(576).nOut(128) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), previousBlock) .addLayer(nameLayer(blockName, "batch1", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(128) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(128) .nOut(128).build(), nameLayer(blockName, "cnn1", i)) // 1x1 -> 3x3 -> 3x3 .addLayer(nameLayer(blockName, "cnn2", i), - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(576).nOut(128) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), previousBlock) .addLayer(nameLayer(blockName, "batch2", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(128) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(128) .nOut(128).build(), nameLayer(blockName, "cnn2", i)) .addLayer(nameLayer(blockName, "cnn3", i), - new ConvolutionLayer.Builder(new int[] {1, 3}) + ConvolutionLayer.builder(new int[] {1, 3}) .convolutionMode(ConvolutionMode.Same).nIn(128).nOut(128) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), nameLayer(blockName, "batch2", i)) .addLayer(nameLayer(blockName, "batch3", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(128) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(128) .nOut(128).build(), nameLayer(blockName, "cnn3", i)) .addLayer(nameLayer(blockName, "cnn4", i), - new ConvolutionLayer.Builder(new int[] {3, 1}) + ConvolutionLayer.builder(new int[] {3, 1}) .convolutionMode(ConvolutionMode.Same).nIn(128).nOut(128) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), nameLayer(blockName, "batch3", i)) .addLayer(nameLayer(blockName, "batch4", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(128) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(128) .nOut(128).build(), nameLayer(blockName, "cnn4", i)) // --> 1x1 --> scaling --> .addVertex(nameLayer(blockName, "merge1", i), new MergeVertex(), nameLayer(blockName, "batch1", i), nameLayer(blockName, "batch4", i)) .addLayer(nameLayer(blockName, "cnn5", i), - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(256).nOut(576) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), nameLayer(blockName, "merge1", i)) .addLayer(nameLayer(blockName, "batch5", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(576) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(576) .nOut(576).build(), nameLayer(blockName, "cnn5", i)) .addVertex(nameLayer(blockName, "scaling", i), new ScaleVertex(activationScale), nameLayer(blockName, "batch5", i)) // --> .addLayer(nameLayer(blockName, "shortcut-identity", i), - new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), + ActivationLayer.builder().activation(Activation.IDENTITY).build(), previousBlock) .addVertex(nameLayer(blockName, "shortcut", i), new ElementWiseVertex(ElementWiseVertex.Op.Add), @@ -245,11 +245,11 @@ public class InceptionResNetHelper { // leave the last vertex as the block name for convenience if (i == scale) - graph.addLayer(blockName, new ActivationLayer.Builder().activation(Activation.TANH).build(), + graph.addLayer(blockName, ActivationLayer.builder().activation(Activation.TANH).build(), nameLayer(blockName, "shortcut", i)); else graph.addLayer(nameLayer(blockName, "activation", i), - new ActivationLayer.Builder().activation(Activation.TANH).build(), + ActivationLayer.builder().activation(Activation.TANH).build(), nameLayer(blockName, "shortcut", i)); previousBlock = nameLayer(blockName, "activation", i); @@ -275,64 +275,64 @@ public class InceptionResNetHelper { graph // 1x1 .addLayer(nameLayer(blockName, "cnn1", i), - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(1344).nOut(192) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), previousBlock) .addLayer(nameLayer(blockName, "batch1", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(192) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(192) .nOut(192).build(), nameLayer(blockName, "cnn1", i)) // 1x1 -> 1x3 -> 3x1 .addLayer(nameLayer(blockName, "cnn2", i), - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(1344).nOut(192) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), previousBlock) .addLayer(nameLayer(blockName, "batch2", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(192) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(192) .nOut(192).build(), nameLayer(blockName, "cnn2", i)) .addLayer(nameLayer(blockName, "cnn3", i), - new ConvolutionLayer.Builder(new int[] {1, 3}) + ConvolutionLayer.builder(new int[] {1, 3}) .convolutionMode(ConvolutionMode.Same).nIn(192).nOut(192) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), nameLayer(blockName, "batch2", i)) .addLayer(nameLayer(blockName, "batch3", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001).nIn(192) + BatchNormalization.builder(false).decay(0.995).eps(0.001).nIn(192) .nOut(192).build(), nameLayer(blockName, "cnn3", i)) .addLayer(nameLayer(blockName, "cnn4", i), - new ConvolutionLayer.Builder(new int[] {3, 1}) + ConvolutionLayer.builder(new int[] {3, 1}) .convolutionMode(ConvolutionMode.Same).nIn(192).nOut(192) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), nameLayer(blockName, "batch3", i)) .addLayer(nameLayer(blockName, "batch4", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001) + BatchNormalization.builder(false).decay(0.995).eps(0.001) .activation(Activation.TANH).nIn(192).nOut(192).build(), nameLayer(blockName, "cnn4", i)) // --> 1x1 --> scale --> .addVertex(nameLayer(blockName, "merge1", i), new MergeVertex(), nameLayer(blockName, "batch1", i), nameLayer(blockName, "batch4", i)) .addLayer(nameLayer(blockName, "cnn5", i), - new ConvolutionLayer.Builder(new int[] {1, 1}) + ConvolutionLayer.builder(new int[] {1, 1}) .convolutionMode(ConvolutionMode.Same).nIn(384).nOut(1344) .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE) .build(), nameLayer(blockName, "merge1", i)) .addLayer(nameLayer(blockName, "batch5", i), - new BatchNormalization.Builder(false).decay(0.995).eps(0.001) + BatchNormalization.builder(false).decay(0.995).eps(0.001) .activation(Activation.TANH).nIn(1344).nOut(1344).build(), nameLayer(blockName, "cnn5", i)) .addVertex(nameLayer(blockName, "scaling", i), new ScaleVertex(activationScale), nameLayer(blockName, "batch5", i)) // --> .addLayer(nameLayer(blockName, "shortcut-identity", i), - new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), + ActivationLayer.builder().activation(Activation.IDENTITY).build(), previousBlock) .addVertex(nameLayer(blockName, "shortcut", i), new ElementWiseVertex(ElementWiseVertex.Op.Add), @@ -341,11 +341,11 @@ public class InceptionResNetHelper { // leave the last vertex as the block name for convenience if (i == scale) - graph.addLayer(blockName, new ActivationLayer.Builder().activation(Activation.TANH).build(), + graph.addLayer(blockName, ActivationLayer.builder().activation(Activation.TANH).build(), nameLayer(blockName, "shortcut", i)); else graph.addLayer(nameLayer(blockName, "activation", i), - new ActivationLayer.Builder().activation(Activation.TANH).build(), + ActivationLayer.builder().activation(Activation.TANH).build(), nameLayer(blockName, "shortcut", i)); previousBlock = nameLayer(blockName, "activation", i); diff --git a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/NASNetHelper.java b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/NASNetHelper.java index eb84edd51..93b88168a 100644 --- a/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/NASNetHelper.java +++ b/cavis-zoo/cavis-zoo-models/src/main/java/org/deeplearning4j/zoo/model/helper/NASNetHelper.java @@ -41,14 +41,14 @@ public class NASNetHelper { String prefix = "sepConvBlock"+blockId; graphBuilder - .addLayer(prefix+"_act", new ActivationLayer(Activation.RELU), input) - .addLayer(prefix+"_sepconv1", new SeparableConvolution2D.Builder(kernelSize, kernelSize).stride(stride, stride).nOut(filters).hasBias(false) + .addLayer(prefix+"_act", ActivationLayer.builder(Activation.RELU).build(), input) + .addLayer(prefix+"_sepconv1", SeparableConvolution2D.builder(kernelSize, kernelSize).stride(stride, stride).nOut(filters).hasBias(false) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_act") - .addLayer(prefix+"_conv1_bn", new BatchNormalization.Builder().eps(1e-3).gamma(0.9997).build(), prefix+"_sepconv1") - .addLayer(prefix+"_act2", new ActivationLayer(Activation.RELU), prefix+"_conv1_bn") - .addLayer(prefix+"_sepconv2", new SeparableConvolution2D.Builder(kernelSize, kernelSize).stride(stride, stride).nOut(filters).hasBias(false) + .addLayer(prefix+"_conv1_bn",BatchNormalization.builder().eps(1e-3).gamma(0.9997).build(), prefix+"_sepconv1") + .addLayer(prefix+"_act2", ActivationLayer.builder(Activation.RELU).build(), prefix+"_conv1_bn") + .addLayer(prefix+"_sepconv2", SeparableConvolution2D.builder(kernelSize, kernelSize).stride(stride, stride).nOut(filters).hasBias(false) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_act2") - .addLayer(prefix+"_conv2_bn", new BatchNormalization.Builder().eps(1e-3).gamma(0.9997).build(), prefix+"_sepconv2"); + .addLayer(prefix+"_conv2_bn",BatchNormalization.builder().eps(1e-3).gamma(0.9997).build(), prefix+"_sepconv2"); return prefix+"_conv2_bn"; } @@ -70,22 +70,22 @@ public class NASNetHelper { if(shapeToMatch[1] != inputShape[1]) { graphBuilder - .addLayer(prefix+"_relu1", new ActivationLayer(Activation.RELU), input) + .addLayer(prefix+"_relu1", ActivationLayer.builder(Activation.RELU).build(), input) // tower 1 - .addLayer(prefix+"_avgpool1", new SubsamplingLayer.Builder(PoolingType.AVG).kernelSize(1,1).stride(2,2) + .addLayer(prefix+"_avgpool1", SubsamplingLayer.builder(PoolingType.AVG).kernelSize(1,1).stride(2,2) .convolutionMode(ConvolutionMode.Truncate).build(), prefix+"_relu1") - .addLayer(prefix+"_conv1", new ConvolutionLayer.Builder(1,1).stride(1,1).nOut((int) Math.floor(filters / 2)).hasBias(false) + .addLayer(prefix+"_conv1", ConvolutionLayer.builder(1,1).stride(1,1).nOut((int) Math.floor(filters / 2)).hasBias(false) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_avg_pool_1") // tower 2 - .addLayer(prefix+"_zeropad1", new ZeroPaddingLayer(0,1), prefix+"_relu1") - .addLayer(prefix+"_crop1", new Cropping2D(1,0), prefix+"_zeropad_1") - .addLayer(prefix+"_avgpool2", new SubsamplingLayer.Builder(PoolingType.AVG).kernelSize(1,1).stride(2,2) + .addLayer(prefix+"_zeropad1", ZeroPaddingLayer.builder(0,1).build(), prefix+"_relu1") + .addLayer(prefix+"_crop1", Cropping2D.builder(1,0).build(), prefix+"_zeropad_1") + .addLayer(prefix+"_avgpool2", SubsamplingLayer.builder(PoolingType.AVG).kernelSize(1,1).stride(2,2) .convolutionMode(ConvolutionMode.Truncate).build(), prefix+"_crop1") - .addLayer(prefix+"_conv2", new ConvolutionLayer.Builder(1,1).stride(1,1).nOut((int) Math.floor(filters / 2)).hasBias(false) + .addLayer(prefix+"_conv2", ConvolutionLayer.builder(1,1).stride(1,1).nOut((int) Math.floor(filters / 2)).hasBias(false) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_avgpool2") .addVertex(prefix+"_concat1", new MergeVertex(), prefix+"_conv1", prefix+"_conv2") - .addLayer(prefix+"_bn1", new BatchNormalization.Builder().eps(1e-3).gamma(0.9997) + .addLayer(prefix+"_bn1",BatchNormalization.builder().eps(1e-3).gamma(0.9997) .build(), prefix+"_concat1"); outputName = prefix+"_bn1"; @@ -93,10 +93,10 @@ public class NASNetHelper { if(inputShape[3] != filters) { graphBuilder - .addLayer(prefix+"_projection_relu", new ActivationLayer(Activation.RELU), outputName) - .addLayer(prefix+"_projection_conv", new ConvolutionLayer.Builder(1,1).stride(1,1).nOut(filters).hasBias(false) + .addLayer(prefix+"_projection_relu", ActivationLayer.builder(Activation.RELU).build(), outputName) + .addLayer(prefix+"_projection_conv", ConvolutionLayer.builder(1,1).stride(1,1).nOut(filters).hasBias(false) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_projection_relu") - .addLayer(prefix+"_projection_bn", new BatchNormalization.Builder().eps(1e-3).gamma(0.9997) + .addLayer(prefix+"_projection_bn",BatchNormalization.builder().eps(1e-3).gamma(0.9997) .build(), prefix+"_projection_conv"); outputName = prefix+"_projection_bn"; } @@ -111,10 +111,10 @@ public class NASNetHelper { // top block graphBuilder - .addLayer(prefix+"_relu1", new ActivationLayer(Activation.RELU), topAdjust) - .addLayer(prefix+"_conv1", new ConvolutionLayer.Builder(1,1).stride(1,1).nOut(filters).hasBias(false) + .addLayer(prefix+"_relu1", ActivationLayer.builder(Activation.RELU).build(), topAdjust) + .addLayer(prefix+"_conv1", ConvolutionLayer.builder(1,1).stride(1,1).nOut(filters).hasBias(false) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_relu1") - .addLayer(prefix+"_bn1", new BatchNormalization.Builder().eps(1e-3).gamma(0.9997) + .addLayer(prefix+"_bn1",BatchNormalization.builder().eps(1e-3).gamma(0.9997) .build(), prefix+"_conv1"); // block 1 @@ -129,15 +129,15 @@ public class NASNetHelper { // block 3 graphBuilder - .addLayer(prefix+"_left3", new SubsamplingLayer.Builder(PoolingType.AVG).kernelSize(3,3).stride(1,1) + .addLayer(prefix+"_left3", SubsamplingLayer.builder(PoolingType.AVG).kernelSize(3,3).stride(1,1) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_bn1") .addVertex(prefix+"_add3", new ElementWiseVertex(ElementWiseVertex.Op.Add), prefix+"_left3", topAdjust); // block 4 graphBuilder - .addLayer(prefix+"_left4", new SubsamplingLayer.Builder(PoolingType.AVG).kernelSize(3,3).stride(1,1) + .addLayer(prefix+"_left4", SubsamplingLayer.builder(PoolingType.AVG).kernelSize(3,3).stride(1,1) .convolutionMode(ConvolutionMode.Same).build(), topAdjust) - .addLayer(prefix+"_right4", new SubsamplingLayer.Builder(PoolingType.AVG).kernelSize(3,3).stride(1,1) + .addLayer(prefix+"_right4", SubsamplingLayer.builder(PoolingType.AVG).kernelSize(3,3).stride(1,1) .convolutionMode(ConvolutionMode.Same).build(), topAdjust) .addVertex(prefix+"_add4", new ElementWiseVertex(ElementWiseVertex.Op.Add), prefix+"_left4", prefix+"_right4"); @@ -160,10 +160,10 @@ public class NASNetHelper { // top block graphBuilder - .addLayer(prefix+"_relu1", new ActivationLayer(Activation.RELU), topAdjust) - .addLayer(prefix+"_conv1", new ConvolutionLayer.Builder(1,1).stride(1,1).nOut(filters).hasBias(false) + .addLayer(prefix+"_relu1", ActivationLayer.builder(Activation.RELU).build(), topAdjust) + .addLayer(prefix+"_conv1", ConvolutionLayer.builder(1,1).stride(1,1).nOut(filters).hasBias(false) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_relu1") - .addLayer(prefix+"_bn1", new BatchNormalization.Builder().eps(1e-3).gamma(0.9997) + .addLayer(prefix+"_bn1",BatchNormalization.builder().eps(1e-3).gamma(0.9997) .build(), prefix+"_conv1"); // block 1 @@ -172,27 +172,27 @@ public class NASNetHelper { graphBuilder.addVertex(prefix+"_add1", new ElementWiseVertex(ElementWiseVertex.Op.Add), left1, right1); // block 2 - graphBuilder.addLayer(prefix+"_left2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2) + graphBuilder.addLayer(prefix+"_left2", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3,3).stride(2,2) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_bn1"); String right2 = sepConvBlock(graphBuilder, filters, 3, 1, prefix+"_right2", topAdjust); graphBuilder.addVertex(prefix+"_add2", new ElementWiseVertex(ElementWiseVertex.Op.Add), prefix+"_left2", right2); // block 3 - graphBuilder.addLayer(prefix+"_left3", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG).kernelSize(3,3).stride(2,2) + graphBuilder.addLayer(prefix+"_left3", SubsamplingLayer.builder(SubsamplingLayer.PoolingType.AVG).kernelSize(3,3).stride(2,2) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_bn1"); String right3 = sepConvBlock(graphBuilder, filters, 5, 2, prefix+"_right3", topAdjust); graphBuilder.addVertex(prefix+"_add3", new ElementWiseVertex(ElementWiseVertex.Op.Add), prefix+"_left3", right3); // block 4 graphBuilder - .addLayer(prefix+"_left4", new SubsamplingLayer.Builder(PoolingType.AVG).kernelSize(3,3).stride(1,1) + .addLayer(prefix+"_left4", SubsamplingLayer.builder(PoolingType.AVG).kernelSize(3,3).stride(1,1) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_add1") .addVertex(prefix+"_add4", new ElementWiseVertex(ElementWiseVertex.Op.Add), prefix+"_add2", prefix+"_left4"); // block 5 String left5 = sepConvBlock(graphBuilder, filters, 3, 2, prefix+"_left5", prefix+"_add1"); graphBuilder - .addLayer(prefix+"_right5", new SubsamplingLayer.Builder(PoolingType.MAX).kernelSize(3,3).stride(2,2) + .addLayer(prefix+"_right5", SubsamplingLayer.builder(PoolingType.MAX).kernelSize(3,3).stride(2,2) .convolutionMode(ConvolutionMode.Same).build(), prefix+"_bn1") .addVertex(prefix+"_add5", new ElementWiseVertex(ElementWiseVertex.Op.Add), left5, prefix+"_right5"); diff --git a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/MiscTests.java b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/MiscTests.java index 04a023596..1fb10a855 100644 --- a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/MiscTests.java +++ b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/MiscTests.java @@ -52,7 +52,7 @@ public class MiscTests extends BaseDL4JTest { .setFeatureExtractor("fc2") .removeVertexKeepConnections("predictions") .addLayer("predictions", - new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nIn(4096).nOut(2) .weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).build(), "fc2") diff --git a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java index 27eb6e23d..2cdf0f9b6 100644 --- a/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java +++ b/cavis-zoo/cavis-zoo-models/src/test/java/org/deeplearning4j/zoo/TestInstantiation.java @@ -20,6 +20,11 @@ package org.deeplearning4j.zoo; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +import java.io.IOException; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.BaseDL4JTest; @@ -35,7 +40,6 @@ import org.deeplearning4j.nn.transferlearning.TransferLearningHelper; import org.deeplearning4j.zoo.model.*; import org.deeplearning4j.zoo.model.helper.DarknetHelper; import org.junit.jupiter.api.AfterEach; - import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; @@ -44,262 +48,322 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.LossFunctions; -import java.io.IOException; - -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assumptions.assumeTrue; - @Slf4j -////@Ignore("Times out too often") +//// @Ignore("Times out too often") public class TestInstantiation extends BaseDL4JTest { - protected static void ignoreIfCuda(){ - String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); - if("CUDA".equalsIgnoreCase(backend)) { - log.warn("IGNORING TEST ON CUDA DUE TO CI CRASHES - SEE ISSUE #7657"); - assumeTrue(false); - } + protected static void ignoreIfCuda() { + String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); + if ("CUDA".equalsIgnoreCase(backend)) { + log.warn("IGNORING TEST ON CUDA DUE TO CI CRASHES - SEE ISSUE #7657"); + assumeTrue(false); + } + } + + public static void runTest(ZooModel model, String modelName, int numClasses) throws Exception { + ignoreIfCuda(); + int gridWidth = -1; + int gridHeight = -1; + if (modelName.equals("TinyYOLO") || modelName.equals("YOLO2")) { + int[] inputShapes = model.metaData().getInputShape()[0]; + gridWidth = DarknetHelper.getGridWidth(inputShapes); + gridHeight = DarknetHelper.getGridHeight(inputShapes); + numClasses += 4; } - @AfterEach - public void after() throws Exception { - Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); - System.gc(); - Thread.sleep(1000); - System.gc(); + // set up data iterator + int[] inputShape = model.metaData().getInputShape()[0]; + DataSetIterator iter = + new BenchmarkDataSetIterator( + new int[] {8, inputShape[0], inputShape[1], inputShape[2]}, + numClasses, + 1, + gridWidth, + gridHeight); + + IModel initializedModel = model.init(); + AsyncDataSetIterator async = new AsyncDataSetIterator(iter); + if (initializedModel instanceof MultiLayerNetwork) { + ((MultiLayerNetwork) initializedModel).fit(async); + } else { + ((ComputationGraph) initializedModel).fit(async); } + async.shutdown(); - @Override - public DataType getDataType(){ - return DataType.FLOAT; + // clean up for current model + model = null; + initializedModel = null; + async = null; + iter = null; + Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); + System.gc(); + Thread.sleep(1000); + System.gc(); + } + + @AfterEach + public void after() throws Exception { + Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); + System.gc(); + Thread.sleep(1000); + System.gc(); + } + + @Override + public DataType getDataType() { + return DataType.FLOAT; + } + + @Test + public void testCnnTrainingDarknet() throws Exception { + runTest(Darknet19.builder().numClasses(10).build(), "Darknet19", 10); + } + + @Test + public void testCnnTrainingTinyYOLO() throws Exception { + runTest(TinyYOLO.builder().numClasses(10).build(), "TinyYOLO", 10); + } + + @Test // @Ignore("AB 2019/05/28 - Crashing on CI linux-x86-64 CPU only - Issue #7657") + public void testCnnTrainingYOLO2() throws Exception { + runTest(YOLO2.builder().numClasses(10).build(), "YOLO2", 10); + } + + @Test + public void testInitPretrained() throws IOException { + ignoreIfCuda(); + ZooModel model = + ResNet50.builder() + .numClasses(0) + .build(); // num labels doesn't matter since we're getting pretrained imagenet + assertTrue(model.pretrainedAvailable(PretrainedType.IMAGENET)); + + ComputationGraph initializedModel = (ComputationGraph) model.initPretrained(); + INDArray f = Nd4j.rand(1, 3, 224, 224); + INDArray[] result = initializedModel.output(f); + assertArrayEquals(result[0].shape(), new long[] {1, 1000}); + + // Test fitting. Not ewe need to use transfer learning, as ResNet50 has a dense layer, not an + // OutputLayer + initializedModel = + new TransferLearning.GraphBuilder(initializedModel) + .removeVertexAndConnections("fc1000") + .addLayer( + "fc1000", + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .nIn(2048) + .nOut(1000) + .activation(Activation.SOFTMAX) + .build(), + "flatten_1") + .setOutputs("fc1000") + .build(); + initializedModel.fit( + new org.nd4j.linalg.dataset.DataSet(f, TestUtils.randomOneHot(1, 1000, 12345))); + } + + @Test + public void testInitPretrainedVGG16() throws Exception { + testInitPretrained( + VGG16.builder().numClasses(0).build(), new long[] {1, 3, 224, 224}, new long[] {1, 1000}); + } + + @Test + public void testInitPretrainedVGG19() throws Exception { + testInitPretrained( + VGG19.builder().numClasses(0).build(), new long[] {1, 3, 224, 224}, new long[] {1, 1000}); + } + + @Test // @Ignore("AB 2019/05/28 - JVM crash on linux CUDA CI machines - Issue 7657") + public void testInitPretrainedDarknet19() throws Exception { + testInitPretrained( + Darknet19.builder().numClasses(0).build(), + new long[] {1, 3, 224, 224}, + new long[] {1, 1000}); + } + + @Test // @Ignore("AB 2019/05/28 - JVM crash on linux CUDA CI machines - Issue 7657") + public void testInitPretrainedDarknet19S2() throws Exception { + testInitPretrained( + Darknet19.builder().numClasses(0).inputShape(new int[] {3, 448, 448}).build(), + new long[] {1, 3, 448, 448}, + new long[] {1, 1000}); + } + + @Test + public void testInitPretrainedTinyYOLO() throws Exception { + testInitPretrained( + TinyYOLO.builder().numClasses(0).build(), + new long[] {1, 3, 416, 416}, + new long[] {1, 125, 13, 13}); + } + + @Test + public void testInitPretrainedYOLO2() throws Exception { + testInitPretrained( + YOLO2.builder().numClasses(0).build(), + new long[] {1, 3, 608, 608}, + new long[] {1, 425, 19, 19}); + } + + @Test + public void testInitPretrainedXception() throws Exception { + testInitPretrained( + Xception.builder().numClasses(0).build(), + new long[] {1, 3, 299, 299}, + new long[] {1, 1000}); + } + + @Test + public void testInitPretrainedSqueezenet() throws Exception { + testInitPretrained( + SqueezeNet.builder().numClasses(0).build(), + new long[] {1, 3, 227, 227}, + new long[] {1, 1000}); + } + + public void testInitPretrained(ZooModel model, long[] inShape, long[] outShape) throws Exception { + ignoreIfCuda(); + assertTrue(model.pretrainedAvailable(PretrainedType.IMAGENET)); + + ComputationGraph initializedModel = (ComputationGraph) model.initPretrained(); + INDArray[] result = initializedModel.output(Nd4j.rand(inShape)); + assertArrayEquals(result[0].shape(), outShape); + + // clean up for current model + Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); + initializedModel.getModelParams().close(); + for (INDArray arr : result) { + arr.close(); } + System.gc(); + } - @Test - public void testCnnTrainingDarknet() throws Exception { - runTest(Darknet19.builder().numClasses(10).build(), "Darknet19", 10); - } + @Test + public void testInitRandomModelResNet50() throws IOException { + testInitRandomModel( + ResNet50.builder().numClasses(1000).build(), + new long[] {1, 3, 224, 224}, + new long[] {1, 1000}); + } - @Test - public void testCnnTrainingTinyYOLO() throws Exception { - runTest(TinyYOLO.builder().numClasses(10).build(), "TinyYOLO", 10); - } + @Test + public void testInitRandomModelVGG16() throws IOException { + testInitRandomModel( + VGG16.builder().numClasses(1000).build(), + new long[] {1, 3, 224, 224}, + new long[] {1, 1000}); + } - @Test //@Ignore("AB 2019/05/28 - Crashing on CI linux-x86-64 CPU only - Issue #7657") - public void testCnnTrainingYOLO2() throws Exception { - runTest(YOLO2.builder().numClasses(10).build(), "YOLO2", 10); - } + @Test + public void testInitRandomModelVGG19() throws IOException { + testInitRandomModel( + VGG19.builder().numClasses(1000).build(), + new long[] {1, 3, 224, 224}, + new long[] {1, 1000}); + } - public static void runTest(ZooModel model, String modelName, int numClasses) throws Exception { - ignoreIfCuda(); - int gridWidth = -1; - int gridHeight = -1; - if (modelName.equals("TinyYOLO") || modelName.equals("YOLO2")) { - int[] inputShapes = model.metaData().getInputShape()[0]; - gridWidth = DarknetHelper.getGridWidth(inputShapes); - gridHeight = DarknetHelper.getGridHeight(inputShapes); - numClasses += 4; - } + @Test + public void testInitRandomModelDarknet19() throws IOException { + testInitRandomModel( + Darknet19.builder().numClasses(1000).build(), + new long[] {1, 3, 224, 224}, + new long[] {1, 1000}); + } - // set up data iterator - int[] inputShape = model.metaData().getInputShape()[0]; - DataSetIterator iter = new BenchmarkDataSetIterator( - new int[]{8, inputShape[0], inputShape[1], inputShape[2]}, numClasses, 1, - gridWidth, gridHeight); + @Test + public void testInitRandomModelDarknet19_2() throws IOException { + testInitRandomModel( + Darknet19.builder().inputShape(new int[] {3, 448, 448}).numClasses(1000).build(), + new long[] {1, 3, 448, 448}, + new long[] {1, 1000}); + } - IModel initializedModel = model.init(); - AsyncDataSetIterator async = new AsyncDataSetIterator(iter); - if (initializedModel instanceof MultiLayerNetwork) { - ((MultiLayerNetwork) initializedModel).fit(async); - } else { - ((ComputationGraph) initializedModel).fit(async); - } - async.shutdown(); + @Test + public void testInitRandomModelXception() throws IOException { + testInitRandomModel( + Xception.builder().numClasses(1000).build(), + new long[] {1, 3, 299, 299}, + new long[] {1, 1000}); + } - // clean up for current model - model = null; - initializedModel = null; - async = null; - iter = null; - Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); - System.gc(); - Thread.sleep(1000); - System.gc(); - } + @Test // @Ignore("AB - 2019/05/28 - JVM crash on CI - intermittent? Issue 7657") + public void testInitRandomModelSqueezenet() throws IOException { + testInitRandomModel( + SqueezeNet.builder().numClasses(1000).build(), + new long[] {1, 3, 227, 227}, + new long[] {1, 1000}); + } + @Test + public void testInitRandomModelFaceNetNN4Small2() throws IOException { + testInitRandomModel( + FaceNetNN4Small2.builder().embeddingSize(100).numClasses(10).build(), + new long[] {1, 3, 64, 64}, + new long[] {1, 10}); + } - @Test - public void testInitPretrained() throws IOException { - ignoreIfCuda(); - ZooModel model = ResNet50.builder().numClasses(0).build(); //num labels doesn't matter since we're getting pretrained imagenet - assertTrue(model.pretrainedAvailable(PretrainedType.IMAGENET)); + @Test // @Ignore("AB 2019/05/29 - Crashing on CI linux-x86-64 CPU only - Issue #7657") + public void testInitRandomModelUNet() throws IOException { + testInitRandomModel( + UNet.builder().build(), new long[] {1, 3, 512, 512}, new long[] {1, 1, 512, 512}); + } - ComputationGraph initializedModel = (ComputationGraph) model.initPretrained(); - INDArray f = Nd4j.rand(1, 3, 224, 224); - INDArray[] result = initializedModel.output(f); - assertArrayEquals(result[0].shape(), new long[]{1, 1000}); + public void testInitRandomModel(ZooModel model, long[] inShape, long[] outShape) { + ignoreIfCuda(); + // Test initialization of NON-PRETRAINED models - //Test fitting. Not ewe need to use transfer learning, as ResNet50 has a dense layer, not an OutputLayer - initializedModel = new TransferLearning.GraphBuilder(initializedModel) - .removeVertexAndConnections("fc1000") - .addLayer("fc1000", new OutputLayer.Builder() - .lossFunction(LossFunctions.LossFunction.MCXENT) - .nIn(2048).nOut(1000).activation(Activation.SOFTMAX).build(), "flatten_1") - .setOutputs("fc1000") - .build(); - initializedModel.fit(new org.nd4j.linalg.dataset.DataSet(f, TestUtils.randomOneHot(1, 1000, 12345))); + log.info("Testing {}", model.getClass().getSimpleName()); + ComputationGraph initializedModel = model.init(); + INDArray f = Nd4j.rand(DataType.FLOAT, inShape); + INDArray[] result = initializedModel.output(f); + assertArrayEquals(result[0].shape(), outShape); + INDArray l = + outShape.length == 2 + ? TestUtils.randomOneHot(1, (int) outShape[1], 12345) + : Nd4j.rand(DataType.FLOAT, outShape); + initializedModel.fit(new org.nd4j.linalg.dataset.DataSet(f, l)); - } + // clean up for current model + Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); + f.close(); + l.close(); + initializedModel.getModelParams().close(); + initializedModel.getFlattenedGradients().close(); + System.gc(); + } - @Test - public void testInitPretrainedVGG16() throws Exception { - testInitPretrained(VGG16.builder().numClasses(0).build(), new long[]{1,3,224,224}, new long[]{1,1000}); - } + @Test + public void testYolo4635() throws Exception { + ignoreIfCuda(); + // https://github.com/deeplearning4j/deeplearning4j/issues/4635 - @Test - public void testInitPretrainedVGG19() throws Exception { - testInitPretrained(VGG19.builder().numClasses(0).build(), new long[]{1,3,224,224}, new long[]{1,1000}); - } + int nClasses = 10; + TinyYOLO model = TinyYOLO.builder().numClasses(nClasses).build(); + ComputationGraph computationGraph = (ComputationGraph) model.initPretrained(); + TransferLearningHelper transferLearningHelper = + new TransferLearningHelper(computationGraph, "conv2d_9"); + } - @Test //@Ignore("AB 2019/05/28 - JVM crash on linux CUDA CI machines - Issue 7657") - public void testInitPretrainedDarknet19() throws Exception { - testInitPretrained(Darknet19.builder().numClasses(0).build(), new long[]{1,3,224,224}, new long[]{1,1000}); - } + @Test + public void testTransferLearning() throws Exception { + ignoreIfCuda(); + // https://github.com/deeplearning4j/deeplearning4j/issues/7193 - @Test //@Ignore("AB 2019/05/28 - JVM crash on linux CUDA CI machines - Issue 7657") - public void testInitPretrainedDarknet19S2() throws Exception { - testInitPretrained(Darknet19.builder().numClasses(0).inputShape(new int[]{3,448,448}).build(), new long[]{1,3,448,448}, new long[]{1,1000}); - } + ComputationGraph cg = (ComputationGraph) ResNet50.builder().build().initPretrained(); - @Test - public void testInitPretrainedTinyYOLO() throws Exception { - testInitPretrained(TinyYOLO.builder().numClasses(0).build(), new long[]{1,3,416,416}, new long[]{1,125,13,13}); - } - - @Test - public void testInitPretrainedYOLO2() throws Exception { - testInitPretrained(YOLO2.builder().numClasses(0).build(), new long[]{1,3,608,608}, new long[]{1, 425, 19, 19}); - } - - @Test - public void testInitPretrainedXception() throws Exception { - testInitPretrained(Xception.builder().numClasses(0).build(), new long[]{1,3,299,299}, new long[]{1, 1000}); - } - - @Test - public void testInitPretrainedSqueezenet() throws Exception { - testInitPretrained(SqueezeNet.builder().numClasses(0).build(), new long[]{1,3,227,227}, new long[]{1, 1000}); - } - - public void testInitPretrained(ZooModel model, long[] inShape, long[] outShape) throws Exception { - ignoreIfCuda(); - assertTrue(model.pretrainedAvailable(PretrainedType.IMAGENET)); - - ComputationGraph initializedModel = (ComputationGraph) model.initPretrained(); - INDArray[] result = initializedModel.output(Nd4j.rand(inShape)); - assertArrayEquals(result[0].shape(),outShape); - - // clean up for current model - Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); - initializedModel.getModelParams().close(); - for(INDArray arr : result){ - arr.close(); - } - System.gc(); - } - - - @Test - public void testInitRandomModelResNet50() throws IOException { - testInitRandomModel(ResNet50.builder().numClasses(1000).build(), new long[]{1,3,224,224}, new long[]{1,1000}); - } - - @Test - public void testInitRandomModelVGG16() throws IOException { - testInitRandomModel(VGG16.builder().numClasses(1000).build(), new long[]{1,3,224,224}, new long[]{1,1000}); - } - - @Test - public void testInitRandomModelVGG19() throws IOException { - testInitRandomModel(VGG19.builder().numClasses(1000).build(), new long[]{1,3,224,224}, new long[]{1,1000}); - } - - @Test - public void testInitRandomModelDarknet19() throws IOException { - testInitRandomModel(Darknet19.builder().numClasses(1000).build(), new long[]{1,3,224,224}, new long[]{1,1000}); - } - - @Test - public void testInitRandomModelDarknet19_2() throws IOException { - testInitRandomModel(Darknet19.builder().inputShape(new int[]{3,448,448}).numClasses(1000).build(), new long[]{1,3,448,448}, new long[]{1,1000}); - } - - @Test - public void testInitRandomModelXception() throws IOException { - testInitRandomModel(Xception.builder().numClasses(1000).build(), new long[]{1,3,299,299}, new long[]{1,1000}); - } - - @Test //@Ignore("AB - 2019/05/28 - JVM crash on CI - intermittent? Issue 7657") - public void testInitRandomModelSqueezenet() throws IOException { - testInitRandomModel(SqueezeNet.builder().numClasses(1000).build(), new long[]{1,3,227,227}, new long[]{1,1000}); - } - - @Test - public void testInitRandomModelFaceNetNN4Small2() throws IOException { - testInitRandomModel(FaceNetNN4Small2.builder().embeddingSize(100).numClasses(10).build(), new long[]{1,3,64,64}, new long[]{1,10}); - } - - @Test //@Ignore("AB 2019/05/29 - Crashing on CI linux-x86-64 CPU only - Issue #7657") - public void testInitRandomModelUNet() throws IOException { - testInitRandomModel(UNet.builder().build(), new long[]{1,3,512,512}, new long[]{1,1,512,512}); - } - - - public void testInitRandomModel(ZooModel model, long[] inShape, long[] outShape){ - ignoreIfCuda(); - //Test initialization of NON-PRETRAINED models - - log.info("Testing {}", model.getClass().getSimpleName()); - ComputationGraph initializedModel = model.init(); - INDArray f = Nd4j.rand(DataType.FLOAT, inShape); - INDArray[] result = initializedModel.output(f); - assertArrayEquals(result[0].shape(), outShape); - INDArray l = outShape.length == 2 ? TestUtils.randomOneHot(1, (int)outShape[1], 12345) : Nd4j.rand(DataType.FLOAT, outShape); - initializedModel.fit(new org.nd4j.linalg.dataset.DataSet(f, l)); - - // clean up for current model - Nd4j.getWorkspaceManager().destroyAllWorkspacesForCurrentThread(); - f.close(); - l.close(); - initializedModel.getModelParams().close(); - initializedModel.getFlattenedGradients().close(); - System.gc(); - } - - - @Test - public void testYolo4635() throws Exception { - ignoreIfCuda(); - //https://github.com/deeplearning4j/deeplearning4j/issues/4635 - - int nClasses = 10; - TinyYOLO model = TinyYOLO.builder().numClasses(nClasses).build(); - ComputationGraph computationGraph = (ComputationGraph) model.initPretrained(); - TransferLearningHelper transferLearningHelper = new TransferLearningHelper(computationGraph, "conv2d_9"); - } - - @Test - public void testTransferLearning() throws Exception { - ignoreIfCuda(); - //https://github.com/deeplearning4j/deeplearning4j/issues/7193 - - ComputationGraph cg = (ComputationGraph) ResNet50.builder().build().initPretrained(); - - cg = new TransferLearning.GraphBuilder(cg) - .addLayer("out", new LossLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.IDENTITY).build(), "fc1000") - .setInputTypes(InputType.convolutional(224, 224, 3)) - .setOutputs("out") - .build(); - - } + cg = + new TransferLearning.GraphBuilder(cg) + .addLayer( + "out", + LossLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT.getILossFunction()) + .activation(Activation.IDENTITY) + .build(), + "fc1000") + .setInputTypes(InputType.convolutional(224, 224, 3)) + .setOutputs("out") + .build(); + } } diff --git a/vsconfig.gradle b/vsconfig.gradle index a247ceb10..615b07bc5 100644 --- a/vsconfig.gradle +++ b/vsconfig.gradle @@ -28,7 +28,9 @@ ****************************************************************************/ if (!hasProperty("VISUAL_STUDIO_INSTALL_DIR") && osdetector.os.equals("windows")) { - configureVisualStudio() + if (project.hasProperty("skip-native") && !project.getProperty("skip-native").equals("true")) { + configureVisualStudio() + } } def configureVisualStudio() {