From 24466a8fd4c2f74f04cb03fa5d4a234c8d88b5c3 Mon Sep 17 00:00:00 2001 From: brian Date: Mon, 8 May 2023 09:34:44 +0200 Subject: [PATCH] Fixing Tests --- .gitignore | 13 + .../ai/nd4j/tests/ExploreParamsTest.java | 167 +++ .../src/test/java/net/brutex/gan/App.java | 49 +- .../java/net/brutex/gan/MnistSimpleGAN.java | 8 +- .../testcases/dl4j/CNN1DTestCases.java | 2 +- .../org/nd4j/linalg/api/ndarray/INDArray.java | 14 +- .../java/org/nd4j/linalg/dataset/DataSet.java | 1 + .../java/org/nd4j/linalg/factory/Nd4j.java | 2 +- .../iterator/DataSetIteratorTest.java | 6 +- .../earlystopping/TestEarlyStopping.java | 9 +- .../exceptions/TestInvalidConfigurations.java | 2 +- .../gradientcheck/BNGradientCheckTest.java | 4 +- .../gradientcheck/CNN1DGradientCheckTest.java | 25 +- .../CNN1DNewGradientCheckTest.java | 811 ++++++++++++ .../gradientcheck/CNN3DGradientCheckTest.java | 4 +- .../gradientcheck/CNNGradientCheckTest.java | 4 +- .../LossFunctionGradientCheck.java | 3 +- .../deeplearning4j/nn/dtypes/DTypeTests.java | 2 +- .../convolution/ConvDataFormatTests.java | 4 +- .../convolution/ConvolutionLayerTest.java | 6 +- .../convolution/TestConvolutionModes.java | 14 +- .../normalization/BatchNormalizationTest.java | 2 +- .../nn/weights/WeightInitIdentityTest.java | 2 +- .../TestDistributionDeserializer.java | 5 +- .../convolution/CudnnConvolutionHelper.java | 10 +- .../nn/modelimport/keras/KerasModel.java | 6 +- .../KerasAtrousConvolution1D.java | 18 +- .../KerasAtrousConvolution2D.java | 3 +- .../convolutional/KerasConvolution1D.java | 14 +- .../convolutional/KerasConvolution2D.java | 3 +- .../keras/configurations/JsonTest.java | 4 +- .../keras/e2e/KerasModelEndToEndTest.java | 9 +- .../KerasAtrousConvolution1DTest.java | 4 +- .../convolution/KerasConvolution1DTest.java | 4 +- .../dnn/api/INeuralNetworkConfiguration.java | 2 - .../main/java/net/brutex/ai/dnn/api/NN.java | 1 - .../dnn/networks/ArtificialNeuralNetwork.java | 2 - .../EarlyStoppingConfiguration.java | 9 +- .../EarlyStoppingModelSaver.java | 11 +- .../earlystopping/EarlyStoppingResult.java | 5 +- .../saver/InMemoryModelSaver.java | 5 +- .../saver/LocalFileGraphSaver.java | 7 +- .../saver/LocalFileModelSaver.java | 7 +- .../scorecalc/AutoencoderScoreCalculator.java | 2 +- .../scorecalc/DataSetLossCalculator.java | 4 +- .../scorecalc/DataSetLossCalculatorCG.java | 4 +- .../scorecalc/ScoreCalculator.java | 3 +- .../VAEReconErrorScoreCalculator.java | 2 +- .../base/BaseIEvaluationScoreCalculator.java | 2 +- .../scorecalc/base/BaseScoreCalculator.java | 2 +- .../BestScoreEpochTerminationCondition.java | 2 +- .../EpochTerminationCondition.java | 2 - .../IterationTerminationCondition.java | 1 - .../MaxEpochsTerminationCondition.java | 4 +- ...MaxScoreIterationTerminationCondition.java | 2 +- .../MaxTimeIterationTerminationCondition.java | 3 +- ...eImprovementEpochTerminationCondition.java | 2 +- .../trainer/BaseEarlyStoppingTrainer.java | 13 +- .../trainer/EarlyStoppingGraphTrainer.java | 1 - .../deeplearning4j/eval/BaseEvaluation.java | 15 +- .../deeplearning4j/eval/ConfusionMatrix.java | 7 - .../org/deeplearning4j/eval/Evaluation.java | 9 +- .../eval/EvaluationCalibration.java | 2 +- .../eval/RegressionEvaluation.java | 3 +- .../deeplearning4j/eval/curves/Histogram.java | 2 +- .../eval/curves/PrecisionRecallCurve.java | 6 +- .../eval/curves/ReliabilityDiagram.java | 2 +- .../deeplearning4j/eval/curves/RocCurve.java | 3 +- .../deeplearning4j/eval/meta/Prediction.java | 1 - .../nn/adapters/YoloModelAdapter.java | 3 +- .../nn/api/AbstractParamInitializer.java | 1 - .../org/deeplearning4j/nn/api/Classifier.java | 4 +- .../api/ITraininableLayerConfiguration.java | 3 +- .../java/org/deeplearning4j/nn/api/Layer.java | 6 +- .../nn/api/ParamInitializer.java | 5 +- .../org/deeplearning4j/nn/api/Updater.java | 7 +- .../nn/api/layers/IOutputLayer.java | 2 +- .../nn/api/layers/LayerConstraint.java | 3 +- .../nn/api/layers/RecurrentLayer.java | 7 +- .../conf/ComputationGraphConfiguration.java | 22 +- .../deeplearning4j/nn/conf/DataFormat.java | 4 +- .../nn/conf/InputPreProcessor.java | 9 +- .../NeuralNetBaseBuilderConfiguration.java | 182 +-- .../nn/conf/NeuralNetConfiguration.java | 635 +++------ .../org/deeplearning4j/nn/conf/RNNFormat.java | 10 +- .../nn/conf/constraint/BaseConstraint.java | 8 +- .../nn/conf/constraint/MaxNormConstraint.java | 5 +- .../conf/constraint/MinMaxNormConstraint.java | 7 +- .../conf/constraint/UnitNormConstraint.java | 5 +- .../distribution/ConstantDistribution.java | 4 +- .../nn/conf/distribution/Distribution.java | 5 +- .../distribution/LogNormalDistribution.java | 4 +- .../conf/distribution/NormalDistribution.java | 18 +- .../distribution/OrthogonalDistribution.java | 4 +- .../TruncatedNormalDistribution.java | 4 +- .../distribution/UniformDistribution.java | 4 +- .../serde/LegacyDistributionDeserializer.java | 4 +- .../serde/LegacyDistributionHelper.java | 2 +- .../nn/conf/dropout/AlphaDropout.java | 4 +- .../nn/conf/dropout/Dropout.java | 4 +- .../nn/conf/dropout/GaussianDropout.java | 4 +- .../nn/conf/dropout/GaussianNoise.java | 2 +- .../nn/conf/dropout/IDropout.java | 5 +- .../nn/conf/dropout/SpatialDropout.java | 4 +- .../nn/conf/graph/AttentionVertex.java | 5 +- .../nn/conf/graph/ElementWiseVertex.java | 2 +- .../nn/conf/graph/FrozenVertex.java | 2 +- .../nn/conf/graph/GraphVertex.java | 5 +- .../nn/conf/graph/L2NormalizeVertex.java | 2 +- .../nn/conf/graph/LayerVertex.java | 3 +- .../nn/conf/graph/MergeVertex.java | 1 - .../nn/conf/graph/ReshapeVertex.java | 5 +- .../nn/conf/graph/ScaleVertex.java | 2 +- .../nn/conf/graph/ShiftVertex.java | 2 +- .../nn/conf/graph/SubsetVertex.java | 5 +- .../nn/conf/graph/UnstackVertex.java | 2 +- .../rnn/DuplicateToTimeSeriesVertex.java | 2 +- .../nn/conf/graph/rnn/LastTimeStepVertex.java | 2 +- .../nn/conf/inputs/InputType.java | 19 +- .../conf/layers/AbstractConvolutionLayer.java | 142 ++ .../nn/conf/layers/AbstractLSTM.java | 2 - .../nn/conf/layers/ActivationLayer.java | 4 +- .../nn/conf/layers/AutoEncoder.java | 8 +- .../conf/layers/BaseLayerConfiguration.java | 82 +- .../nn/conf/layers/BaseOutputLayer.java | 111 +- .../nn/conf/layers/BaseRecurrentLayer.java | 1 - .../nn/conf/layers/BaseUpsamplingLayer.java | 1 + .../nn/conf/layers/BatchNormalization.java | 5 + .../nn/conf/layers/CapsuleLayer.java | 4 + .../nn/conf/layers/CapsuleStrengthLayer.java | 3 +- .../nn/conf/layers/CenterLossOutputLayer.java | 2 + .../nn/conf/layers/Cnn3DLossLayer.java | 3 +- .../nn/conf/layers/CnnLossLayer.java | 2 + .../nn/conf/layers/Convolution1D.java | 227 +++- ...tion1DLayer.java => Convolution1DNew.java} | 164 ++- .../nn/conf/layers/Convolution2D.java | 3 +- .../nn/conf/layers/Convolution3D.java | 7 +- .../nn/conf/layers/ConvolutionLayer.java | 104 +- .../nn/conf/layers/Deconvolution2D.java | 196 +-- .../nn/conf/layers/Deconvolution3D.java | 3 +- .../nn/conf/layers/DenseLayer.java | 13 +- .../conf/layers/DepthwiseConvolution2D.java | 9 +- .../nn/conf/layers/DropoutLayer.java | 4 +- .../nn/conf/layers/EmbeddingLayer.java | 3 +- .../conf/layers/EmbeddingSequenceLayer.java | 8 +- .../nn/conf/layers/FeedForwardLayer.java | 10 +- .../nn/conf/layers/GlobalPoolingLayer.java | 8 +- .../conf/layers/GravesBidirectionalLSTM.java | 3 +- .../nn/conf/layers/GravesLSTM.java | 3 +- .../nn/conf/layers/InputTypeUtil.java | 4 +- .../deeplearning4j/nn/conf/layers/LSTM.java | 3 +- .../nn/conf/layers/LayerConfiguration.java | 24 +- .../nn/conf/layers/LayerValidation.java | 7 +- .../layers/LearnedSelfAttentionLayer.java | 3 +- .../layers/LocalResponseNormalization.java | 3 +- .../nn/conf/layers/LocallyConnected1D.java | 9 +- .../nn/conf/layers/LocallyConnected2D.java | 15 +- .../nn/conf/layers/LossLayer.java | 3 +- .../nn/conf/layers/NoParamLayer.java | 20 +- .../nn/conf/layers/OutputLayer.java | 5 +- .../nn/conf/layers/PReLULayer.java | 3 +- .../nn/conf/layers/Pooling1D.java | 4 +- .../nn/conf/layers/Pooling2D.java | 4 +- .../nn/conf/layers/PrimaryCapsules.java | 3 +- .../conf/layers/RecurrentAttentionLayer.java | 8 +- .../nn/conf/layers/RnnLossLayer.java | 3 +- .../nn/conf/layers/RnnOutputLayer.java | 2 + .../nn/conf/layers/SelfAttentionLayer.java | 3 +- .../conf/layers/SeparableConvolution2D.java | 3 +- .../nn/conf/layers/SpaceToBatchLayer.java | 3 +- .../nn/conf/layers/SpaceToDepthLayer.java | 8 +- .../nn/conf/layers/Subsampling1DLayer.java | 108 +- .../nn/conf/layers/Subsampling3DLayer.java | 7 +- .../nn/conf/layers/SubsamplingLayer.java | 48 +- .../nn/conf/layers/Upsampling1D.java | 8 +- .../nn/conf/layers/Upsampling2D.java | 10 +- .../nn/conf/layers/Upsampling3D.java | 9 +- .../nn/conf/layers/ZeroPadding1DLayer.java | 3 +- .../nn/conf/layers/ZeroPadding3DLayer.java | 10 +- .../nn/conf/layers/ZeroPaddingLayer.java | 8 +- .../conf/layers/convolutional/Cropping1D.java | 2 + .../conf/layers/convolutional/Cropping2D.java | 6 +- .../conf/layers/convolutional/Cropping3D.java | 8 +- .../misc/ElementWiseMultiplicationLayer.java | 10 +- .../nn/conf/layers/misc/FrozenLayer.java | 3 + .../layers/misc/FrozenLayerWithBackprop.java | 12 +- .../nn/conf/layers/misc/RepeatVector.java | 8 +- .../objdetect/BoundingBoxesDeserializer.java | 12 +- .../layers/objdetect/Yolo2OutputLayer.java | 4 +- .../conf/layers/recurrent/Bidirectional.java | 15 +- .../conf/layers/recurrent/LastTimeStep.java | 4 +- .../nn/conf/layers/recurrent/SimpleRnn.java | 9 +- .../layers/recurrent/TimeDistributed.java | 3 +- .../samediff/AbstractSameDiffLayer.java | 36 +- .../conf/layers/samediff/SDLayerParams.java | 12 +- .../conf/layers/samediff/SDVertexParams.java | 5 +- .../layers/samediff/SameDiffLambdaLayer.java | 2 +- .../layers/samediff/SameDiffLambdaVertex.java | 4 +- .../conf/layers/samediff/SameDiffLayer.java | 11 +- .../layers/samediff/SameDiffLayerUtils.java | 5 +- .../layers/samediff/SameDiffOutputLayer.java | 4 +- .../conf/layers/samediff/SameDiffVertex.java | 11 +- .../nn/conf/layers/util/MaskLayer.java | 10 +- .../nn/conf/layers/util/MaskZeroLayer.java | 9 +- .../CompositeReconstructionDistribution.java | 9 +- .../variational/LossFunctionWrapper.java | 18 +- .../ReconstructionDistribution.java | 4 +- .../variational/VariationalAutoencoder.java | 3 +- .../nn/conf/memory/LayerMemoryReport.java | 6 +- .../nn/conf/memory/MemoryReport.java | 22 +- .../nn/conf/memory/NetworkMemoryReport.java | 12 +- .../nn/conf/misc/DummyConfig.java | 3 +- .../nn/conf/ocnn/OCNNOutputLayer.java | 2 +- .../preprocessor/BaseInputPreProcessor.java | 2 +- .../Cnn3DToFeedForwardPreProcessor.java | 13 +- .../CnnToFeedForwardPreProcessor.java | 13 +- .../preprocessor/CnnToRnnPreProcessor.java | 15 +- .../ComposableInputPreProcessor.java | 8 +- .../FeedForwardToCnn3DPreProcessor.java | 13 +- .../FeedForwardToCnnPreProcessor.java | 15 +- .../FeedForwardToRnnPreProcessor.java | 10 +- .../preprocessor/RnnToCnnPreProcessor.java | 13 +- .../RnnToFeedForwardPreProcessor.java | 11 +- .../conf/serde/BaseNetConfigDeserializer.java | 28 +- .../nn/conf/serde/CavisMapper.java | 60 + ...utationGraphConfigurationDeserializer.java | 25 +- .../nn/conf/serde/JsonMappers.java | 8 +- .../NeuralNetConfigurationDeserializer.java | 25 +- .../serde/format/DataFormatDeserializer.java | 8 +- .../serde/format/DataFormatSerializer.java | 5 +- .../legacy/LegacyIntArrayDeserializer.java | 2 - .../conf/serde/legacy/LegacyJsonFormat.java | 8 +- .../nn/conf/stepfunctions/StepFunction.java | 1 - .../nn/conf/weightnoise/DropConnect.java | 6 +- .../nn/conf/weightnoise/IWeightNoise.java | 7 +- .../nn/conf/weightnoise/WeightNoise.java | 6 +- .../nn/gradient/DefaultGradient.java | 3 +- .../deeplearning4j/nn/gradient/Gradient.java | 4 +- .../nn/graph/ComputationGraph.java | 33 +- .../nn/graph/util/ComputationGraphUtil.java | 5 +- .../nn/graph/util/GraphIndices.java | 3 +- .../nn/graph/vertex/BaseGraphVertex.java | 7 +- .../nn/graph/vertex/BaseWrapperVertex.java | 7 +- .../nn/graph/vertex/GraphVertex.java | 9 +- .../nn/graph/vertex/VertexIndices.java | 3 +- .../graph/vertex/impl/ElementWiseVertex.java | 9 +- .../nn/graph/vertex/impl/FrozenVertex.java | 1 - .../nn/graph/vertex/impl/InputVertex.java | 4 +- .../graph/vertex/impl/L2NormalizeVertex.java | 6 +- .../nn/graph/vertex/impl/L2Vertex.java | 6 +- .../nn/graph/vertex/impl/LayerVertex.java | 11 +- .../nn/graph/vertex/impl/MergeVertex.java | 9 +- .../graph/vertex/impl/PoolHelperVertex.java | 6 +- .../graph/vertex/impl/PreprocessorVertex.java | 4 +- .../nn/graph/vertex/impl/ReshapeVertex.java | 2 +- .../nn/graph/vertex/impl/ScaleVertex.java | 6 +- .../nn/graph/vertex/impl/ShiftVertex.java | 6 +- .../nn/graph/vertex/impl/StackVertex.java | 6 +- .../nn/graph/vertex/impl/SubsetVertex.java | 9 +- .../nn/graph/vertex/impl/UnstackVertex.java | 9 +- .../impl/rnn/DuplicateToTimeSeriesVertex.java | 6 +- .../vertex/impl/rnn/LastTimeStepVertex.java | 6 +- .../impl/rnn/ReverseTimeSeriesVertex.java | 6 +- .../nn/layers/AbstractLayer.java | 37 +- .../nn/layers/ActivationLayer.java | 7 +- .../deeplearning4j/nn/layers/BaseLayer.java | 1 + .../nn/layers/BaseOutputLayer.java | 12 +- .../nn/layers/BasePretrainNetwork.java | 11 +- .../nn/layers/DropoutLayer.java | 6 +- .../deeplearning4j/nn/layers/FrozenLayer.java | 5 +- .../nn/layers/FrozenLayerWithBackprop.java | 2 +- .../deeplearning4j/nn/layers/HelperUtils.java | 6 +- .../deeplearning4j/nn/layers/LossLayer.java | 14 +- .../deeplearning4j/nn/layers/OutputLayer.java | 3 - .../nn/layers/RepeatVector.java | 5 +- .../nn/layers/convolution/Cnn3DLossLayer.java | 39 +- .../nn/layers/convolution/CnnLossLayer.java | 39 +- .../convolution/Convolution1DLayer.java | 325 ++--- .../convolution/Convolution1DNewLayer.java | 226 ++++ .../convolution/Convolution3DLayer.java | 5 +- .../layers/convolution/ConvolutionHelper.java | 4 +- .../layers/convolution/ConvolutionLayer.java | 1167 +++++++++-------- .../convolution/ConvolutionNewLayer.java | 753 +++++++++++ .../layers/convolution/Cropping1DLayer.java | 12 +- .../layers/convolution/Cropping2DLayer.java | 12 +- .../layers/convolution/Cropping3DLayer.java | 9 +- .../convolution/Deconvolution2DLayer.java | 21 +- .../convolution/Deconvolution3DLayer.java | 15 +- .../DepthwiseConvolution2DLayer.java | 21 +- .../SeparableConvolution2DLayer.java | 25 +- .../nn/layers/convolution/SpaceToBatch.java | 10 +- .../nn/layers/convolution/SpaceToDepth.java | 10 +- .../convolution/ZeroPadding1DLayer.java | 7 +- .../convolution/ZeroPadding3DLayer.java | 3 +- .../layers/convolution/ZeroPaddingLayer.java | 6 +- .../subsampling/Subsampling1DLayer.java | 11 +- .../subsampling/Subsampling3DLayer.java | 6 +- .../subsampling/SubsamplingHelper.java | 4 +- .../subsampling/SubsamplingLayer.java | 17 +- .../convolution/upsampling/Upsampling1D.java | 8 +- .../convolution/upsampling/Upsampling2D.java | 6 +- .../convolution/upsampling/Upsampling3D.java | 6 +- .../nn/layers/feedforward/PReLU.java | 2 +- .../feedforward/autoencoder/AutoEncoder.java | 6 +- .../layers/feedforward/dense/DenseLayer.java | 4 +- .../ElementWiseMultiplicationLayer.java | 13 +- .../feedforward/embedding/EmbeddingLayer.java | 14 +- .../embedding/EmbeddingSequenceLayer.java | 9 +- .../nn/layers/mkldnn/BaseMKLDNNHelper.java | 5 +- .../layers/mkldnn/MKLDNNBatchNormHelper.java | 13 +- .../nn/layers/mkldnn/MKLDNNConvHelper.java | 19 +- .../nn/layers/mkldnn/MKLDNNLSTMHelper.java | 11 +- ...KLDNNLocalResponseNormalizationHelper.java | 7 +- .../mkldnn/MKLDNNSubsamplingHelper.java | 19 +- .../normalization/BatchNormalization.java | 5 +- .../BatchNormalizationHelper.java | 4 +- .../LocalResponseNormalization.java | 12 +- .../LocalResponseNormalizationHelper.java | 4 +- .../nn/layers/objdetect/Yolo2OutputLayer.java | 19 +- .../nn/layers/objdetect/YoloUtils.java | 19 +- .../nn/layers/ocnn/OCNNOutputLayer.java | 13 +- .../nn/layers/ocnn/OCNNParamInitializer.java | 9 +- .../nn/layers/pooling/GlobalPoolingLayer.java | 9 +- .../layers/recurrent/BaseRecurrentLayer.java | 7 +- .../layers/recurrent/BidirectionalLayer.java | 9 +- .../recurrent/GravesBidirectionalLSTM.java | 7 +- .../nn/layers/recurrent/GravesLSTM.java | 4 +- .../nn/layers/recurrent/LSTMHelper.java | 7 +- .../nn/layers/recurrent/LSTMHelpers.java | 13 +- .../layers/recurrent/LastTimeStepLayer.java | 15 +- .../nn/layers/recurrent/MaskZeroLayer.java | 12 +- .../nn/layers/recurrent/RnnLossLayer.java | 11 +- .../nn/layers/recurrent/RnnOutputLayer.java | 418 +++--- .../nn/layers/recurrent/SimpleRnn.java | 8 +- .../recurrent/TimeDistributedLayer.java | 2 +- .../layers/samediff/SameDiffGraphVertex.java | 7 +- .../nn/layers/samediff/SameDiffLayer.java | 11 +- .../layers/samediff/SameDiffOutputLayer.java | 5 +- .../training/CenterLossOutputLayer.java | 7 +- .../nn/layers/util/IdentityLayer.java | 1 - .../nn/layers/util/MaskLayer.java | 10 +- .../nn/layers/wrapper/BaseWrapperLayer.java | 8 +- .../nn/multilayer/MultiLayerNetwork.java | 15 +- .../BatchNormalizationParamInitializer.java | 3 +- .../nn/params/CenterLossParamInitializer.java | 11 +- .../params/Convolution3DParamInitializer.java | 7 +- .../ConvolutionNewParamInitializer.java | 183 +++ .../params/ConvolutionParamInitializer.java | 9 +- .../Deconvolution3DParamInitializer.java | 7 +- .../params/DeconvolutionParamInitializer.java | 5 +- .../nn/params/DefaultParamInitializer.java | 3 +- .../DepthwiseConvolutionParamInitializer.java | 3 +- .../params/ElementWiseParamInitializer.java | 8 +- .../nn/params/EmptyParamInitializer.java | 9 +- .../params/FrozenLayerParamInitializer.java | 1 - ...ozenLayerWithBackpropParamInitializer.java | 12 +- ...avesBidirectionalLSTMParamInitializer.java | 3 +- .../nn/params/GravesLSTMParamInitializer.java | 3 +- .../nn/params/PReLUParamInitializer.java | 9 +- .../nn/params/PretrainParamInitializer.java | 4 +- .../nn/params/SameDiffParamInitializer.java | 15 +- .../SeparableConvolutionParamInitializer.java | 3 +- .../nn/params/SimpleRnnParamInitializer.java | 7 +- ...ariationalAutoencoderParamInitializer.java | 9 +- .../params/WrapperLayerParamInitializer.java | 5 +- .../FineTuneConfiguration.java | 9 +- .../nn/transferlearning/TransferLearning.java | 7 +- .../TransferLearningHelper.java | 3 +- .../nn/updater/BaseMultiLayerUpdater.java | 8 +- .../nn/updater/LayerUpdater.java | 5 +- .../nn/updater/MultiLayerUpdater.java | 3 +- .../nn/updater/UpdaterBlock.java | 5 +- .../nn/updater/UpdaterCreator.java | 2 +- .../graph/ComputationGraphUpdater.java | 5 +- .../nn/weights/IWeightInit.java | 5 +- .../nn/weights/WeightInitDistribution.java | 11 +- .../nn/weights/WeightInitIdentity.java | 6 +- .../nn/weights/WeightInitUtil.java | 6 +- .../WeightInitVarScalingNormalFanAvg.java | 2 - .../WeightInitVarScalingNormalFanOut.java | 2 - .../WeightInitVarScalingUniformFanAvg.java | 1 - .../WeightInitVarScalingUniformFanIn.java | 1 - .../WeightInitVarScalingUniformFanOut.java | 1 - .../embeddings/EmbeddingInitializer.java | 3 +- .../embeddings/WeightInitEmbedding.java | 4 +- .../nn/workspace/LayerWorkspaceMgr.java | 4 +- .../org/deeplearning4j/optimize/Solver.java | 9 +- .../optimize/api/BaseTrainingListener.java | 5 +- .../optimize/api/ConvexOptimizer.java | 7 +- .../optimize/api/IterationListener.java | 3 +- .../optimize/api/LineOptimizer.java | 7 +- .../optimize/api/StepFunction.java | 3 +- .../optimize/api/TrainingListener.java | 5 +- .../optimize/listeners/Checkpoint.java | 5 +- .../listeners/CheckpointListener.java | 9 +- .../CollectScoresIterationListener.java | 5 +- .../listeners/CollectScoresListener.java | 4 +- .../ComposableIterationListener.java | 7 +- .../listeners/EvaluativeListener.java | 3 +- .../listeners/FailureTestingListener.java | 7 +- .../listeners/PerformanceListener.java | 15 +- .../listeners/ScoreIterationListener.java | 3 +- .../listeners/ScoreToChartListener.java | 2 +- .../listeners/SleepyTrainingListener.java | 9 +- .../listeners/TimeIterationListener.java | 7 +- .../callbacks/ModelSavingCallback.java | 7 +- .../optimize/solvers/BackTrackLineSearch.java | 7 +- .../optimize/solvers/BaseOptimizer.java | 13 +- .../optimize/solvers/ConjugateGradient.java | 4 +- .../optimize/solvers/LBFGS.java | 9 +- .../optimize/solvers/LineGradientDescent.java | 3 +- .../solvers/StochasticGradientDescent.java | 5 +- .../BasicGradientsAccumulator.java | 17 +- .../EncodedGradientsAccumulator.java | 17 +- .../solvers/accumulation/EncodingHandler.java | 21 +- .../accumulation/FancyBlockingQueue.java | 7 +- .../accumulation/GradientsAccumulator.java | 4 +- .../solvers/accumulation/IndexedTail.java | 23 +- .../solvers/accumulation/MessageHandler.java | 3 +- .../accumulation/SmartFancyBlockingQueue.java | 7 +- .../encoding/ResidualPostProcessor.java | 3 +- .../encoding/ThresholdAlgorithm.java | 3 +- .../util/Convolution1DUtils.java | 563 ++++---- ...tionUtils.java => Convolution2DUtils.java} | 19 +- .../util/Convolution3DUtils.java | 9 +- .../util/CrashReportingUtil.java | 4 +- .../util/DL4JModelValidator.java | 19 +- .../util/MaskedReductionUtil.java | 4 +- .../deeplearning4j/util/ModelSerializer.java | 45 +- .../org/deeplearning4j/util/NetworkUtils.java | 3 +- .../deeplearning4j/util/OutputLayerUtil.java | 5 +- .../deeplearning4j/util/TimeSeriesUtils.java | 9 +- .../java/net/brutex/ai/dnn/api/dnnTest.java | 2 - .../NeuralNetConfigurationSerdeTest.java | 169 +++ .../nn/layers/HelperUtilsTest.java | 4 +- .../org/deeplearning4j/spark/TestKryo.java | 2 +- .../ui/module/train/TrainModule.java | 3 +- 437 files changed, 6263 insertions(+), 3645 deletions(-) create mode 100644 brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java create mode 100644 cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java rename cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/{Convolution1DLayer.java => Convolution1DNew.java} (56%) create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/CavisMapper.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java rename cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/{ConvolutionUtils.java => Convolution2DUtils.java} (98%) create mode 100644 cavis-dnn/cavis-dnn-nn/src/test/java/net/brutex/ai/dnn/serde/NeuralNetConfigurationSerdeTest.java diff --git a/.gitignore b/.gitignore index 6c39e54b9..e646fc79f 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,8 @@ pom.xml.versionsBackup pom.xml.next release.properties *dependency-reduced-pom.xml +**/build/* +.gradle/* # Specific for Nd4j *.md5 @@ -83,3 +85,14 @@ bruai4j-native-common/cmake* /bruai4j-native/bruai4j-native-common/blasbuild/ /bruai4j-native/bruai4j-native-common/build/ /cavis-native/cavis-native-lib/blasbuild/ +/cavis-dnn/cavis-dnn-core/build/reports/tests/cudaTest/classes/org.deeplearning4j.gradientcheck.AttentionLayerTest.html +/cavis-dnn/cavis-dnn-core/build/reports/tests/cudaTest/css/base-style.css +/cavis-dnn/cavis-dnn-core/build/reports/tests/cudaTest/css/style.css +/cavis-dnn/cavis-dnn-core/build/reports/tests/cudaTest/js/report.js +/cavis-dnn/cavis-dnn-core/build/reports/tests/cudaTest/packages/org.deeplearning4j.gradientcheck.html +/cavis-dnn/cavis-dnn-core/build/reports/tests/cudaTest/index.html +/cavis-dnn/cavis-dnn-core/build/resources/main/iris.dat +/cavis-dnn/cavis-dnn-core/build/resources/test/junit-platform.properties +/cavis-dnn/cavis-dnn-core/build/resources/test/logback-test.xml +/cavis-dnn/cavis-dnn-core/build/test-results/cudaTest/TEST-org.deeplearning4j.gradientcheck.AttentionLayerTest.xml +/cavis-dnn/cavis-dnn-core/build/tmp/jar/MANIFEST.MF diff --git a/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java b/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java new file mode 100644 index 000000000..1b0d7b840 --- /dev/null +++ b/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java @@ -0,0 +1,167 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.nd4j.tests; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator; +import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; +import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.junit.jupiter.api.Test; +import org.nd4j.common.primitives.Pair; +import org.nd4j.evaluation.classification.Evaluation; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.lossfunctions.LossFunctions; + +@Slf4j +public class ExploreParamsTest { + + @Test + public void testParam() { + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .seed(12345) + .dataType(DataType.DOUBLE) + .layer( + DenseLayer.builder().nIn(4).nOut(30).name("1. Dense").activation(Activation.TANH)) + .layer(DenseLayer.builder().nIn(30).nOut(10).name("2. Dense")) + // .layer(FrozenLayer.builder(DenseLayer.builder().nOut(6).build()).build()) + + .layer( + OutputLayer.builder() + .nOut(3) + .lossFunction(LossFunctions.LossFunction.MSE) + .activation(Activation.SOFTMAX)) + .build(); + MultiLayerNetwork nn = new MultiLayerNetwork(conf); + nn.init(); + log.info(nn.summary()); + // INDArray input = Nd4j.rand(10,4); + INDArray labels = Nd4j.zeros(9, 3); + + INDArray input = + Nd4j.create( + new double[][] { + {5.15, 3.5, 1.4, 0.21}, // setosa + {4.9, 3.2, 1.4, 0.2}, // setosa + {4.7, 3.2, 1.23, 0.2}, // setosa + {7, 3.25, 4.7, 1.41}, // versicolor + {6.4, 3.2, 4.54, 1.5}, // versicolor + {6.9, 3.1, 4.92, 1.5}, // versicolor + {7.7, 3, 6.1, 2.3}, // virginica + {6.3, 3.4, 5.6, 2.45}, // virginica + {6.4, 3.12, 5.5, 1.8} // virginica + }); + + labels.putScalar(0, 1); + labels.putScalar(3, 1); + labels.putScalar(6, 1); + labels.putScalar(10, 1); + labels.putScalar(13, 1); + labels.putScalar(16, 1); + labels.putScalar(20, 1); + labels.putScalar(23, 1); + labels.putScalar(26, 1); + + IrisDataSetIterator iter = new IrisDataSetIterator(); + //Iterable> it = List.of(new Pair(input, labels)); + List l = new ArrayList<>(); + for (int i=0; i< input.rows(); i++) { + l.add(new Pair(input.getRow(i), labels.getRow(i))); + } + Iterable> it = l; + INDArrayDataSetIterator diter = new INDArrayDataSetIterator(it, 1); + + for (int i = 0; i < 100; i++) { + // nn.fit(input, labels); + // nn.fit( input, labels); + nn.fit(diter); + // nn.feedForward(input); + if(i%20==0) log.info("Score: {}", nn.getScore()); + } + + Evaluation eval = nn.evaluate(iter, List.of("setosa", "vericolor", "virginica")); + + log.info("\n{}", eval.stats()); + } + + @Test + public void testParam2() throws IOException { + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .seed(12345) + .layer( + DenseLayer.builder().nIn(784).nOut(20).name("1. Dense")) + .layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense")) + .layer( + OutputLayer.builder() + .nOut(10) + .lossFunction(LossFunctions.LossFunction.MSE) + .activation(Activation.SOFTMAX)) + .build(); + MultiLayerNetwork nn = new MultiLayerNetwork(conf); + nn.init(); + log.info(nn.summary()); + + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder() + .seed(12345) + .layer( + DenseLayer.builder().nIn(784).nOut(20).name("1. Dense").dropOut(0.7)) + .layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense")) + .layer( + OutputLayer.builder() + .nOut(10) + .lossFunction(LossFunctions.LossFunction.MSE) + .activation(Activation.SOFTMAX)) + .build(); + MultiLayerNetwork nn2 = new MultiLayerNetwork(conf2); + nn2.init(); + log.info(nn2.summary()); + + + + MnistDataSetIterator iter = new MnistDataSetIterator(10, 500); + MnistDataSetIterator iter2 = new MnistDataSetIterator(10, 50); + + + for (int i = 0; i < 200; i++) { + nn.fit(iter); + nn2.fit(iter); + if(i%20==0) log.info("Score: {} vs. {}", nn.getScore(), nn2.getScore()); + } + + Evaluation eval = nn.evaluate(iter2); + Evaluation eval2 = nn2.evaluate(iter2); + + log.info("\n{} \n{}", eval.stats(), eval2.stats()); + } +} diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java index 5d06092ab..4243c46e2 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java @@ -45,6 +45,7 @@ import org.datavec.image.transform.PipelineImageTransform; import org.datavec.image.transform.ResizeImageTransform; import org.datavec.image.transform.ShowImageTransform; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; +import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.Distribution; @@ -65,6 +66,7 @@ import org.deeplearning4j.optimize.listeners.ScoreToChartListener; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.impl.ActivationLReLU; +import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; @@ -80,11 +82,11 @@ public class App { private static final int X_DIM = 20 ; private static final int Y_DIM = 20; - private static final int CHANNELS = 1; - private static final int batchSize = 10; + private static final int CHANNELS = 3; + private static final int batchSize = 50; private static final int INPUT = 128; - private static final int OUTPUT_PER_PANEL = 4; + private static final int OUTPUT_PER_PANEL = 16; private static final int ARRAY_SIZE_PER_SAMPLE = X_DIM*Y_DIM*CHANNELS; private static final IUpdater UPDATER = Adam.builder().learningRate(LEARNING_RATE).beta1(0.5).build(); @@ -146,7 +148,7 @@ public class App { ActivationLayer.builder(new ActivationLReLU(0.2)).build(), DropoutLayer.builder(1 - 0.5).build(), - OutputLayer.builder().name("dis-output").lossFunction(LossFunction.XENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build() + OutputLayer.builder().name("dis-output").lossFunction(LossFunction.MCXENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build() }; } @@ -196,6 +198,7 @@ public class App { .activation( Activation.IDENTITY ) .layersFromArray( layers ) .inputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) + .dataType(DataType.FLOAT) .build(); ((NeuralNetConfiguration) conf).init(); return conf; @@ -212,9 +215,9 @@ public class App { log.info("\u001B[32m Some \u001B[1m green \u001B[22m text \u001B[0m \u001B[7m Inverted\u001B[0m "); Nd4j.getMemoryManager().setAutoGcWindow(500); -// MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45); - // FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/flowers"), NativeImageLoader.getALLOWED_FORMATS()); - FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/humans"), NativeImageLoader.getALLOWED_FORMATS()); + //MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45); + //FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/flowers"), NativeImageLoader.getALLOWED_FORMATS()); + FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/humans"), NativeImageLoader.getALLOWED_FORMATS()); ImageTransform transform = new ColorConversionTransform(new Random(42), 7 ); @@ -223,7 +226,7 @@ public class App { ImageTransform transform3 = new ResizeImageTransform(X_DIM, Y_DIM); ImageTransform tr = new PipelineImageTransform.Builder() - .addImageTransform(transform) //convert to GREY SCALE + //.addImageTransform(transform) //convert to GREY SCALE .addImageTransform(transform3) //.addImageTransform(transform2) .build(); @@ -270,10 +273,10 @@ public class App { break; } - if(i%20 == 0) { - // frame2 = visualize(new INDArray[]{real}, batchSize, - // frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images - } + //if(i%20 == 0) { + frame2 = visualize(new INDArray[]{real}, batchSize, + frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images + //} real.divi(255f); // int batchSize = (int) real.shape()[0]; @@ -290,7 +293,7 @@ public class App { DataSet data = DataSet.merge(Arrays.asList(realSet, fakeSet)); dis.fit(data); - dis.fit(data); + //dis.fit(data); // Update the discriminator in the GAN network updateGan(gen, dis, gan); @@ -298,7 +301,7 @@ public class App { //gan.fit(new DataSet(Nd4j.rand(batchSize, INPUT), Nd4j.zeros(batchSize, 1))); gan.fit(new DataSet(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1))); - + //Visualize and reporting if (j % 10 == 1) { System.out.println("Iteration " + j + " Visualizing..."); INDArray[] samples = batchSize > OUTPUT_PER_PANEL ? new INDArray[OUTPUT_PER_PANEL] : new INDArray[batchSize]; @@ -320,11 +323,16 @@ public class App { frame = visualize(samples, 1, frame == null ? new JFrame() : frame, false); //each samples only has 1 image, thus batchElements=1 } } - trainData.reset(); + if (trainData.resetSupported()) { + trainData.reset(); + } else { + log.error("Trainingdata {} does not support reset.", trainData.toString()); + } + // Copy the GANs generator to gen. + updateGen(gen, gan); } - // Copy the GANs generator to gen. - updateGen(gen, gan); + gen.save(new File("mnist-mlp-generator.dlj")); } @@ -383,7 +391,12 @@ public class App { } private static JLabel getImage(INDArray tensor, int batchElement, boolean isOrig) { - final BufferedImage bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY); + final BufferedImage bi; + if(CHANNELS>1) { + bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_INT_RGB); //need to change here based on channels + } else { + bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY); //need to change here based on channels + } final int imageSize = X_DIM * Y_DIM; final int offset = batchElement * imageSize; int pxl = offset * CHANNELS; //where to start in the INDArray diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java index d467f46a4..2cfddb795 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java @@ -24,12 +24,14 @@ package net.brutex.gan; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.DropoutLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; +import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.impl.ActivationLReLU; import org.nd4j.linalg.api.ndarray.INDArray; @@ -98,7 +100,10 @@ public class MnistSimpleGAN { return new MultiLayerNetwork(discConf); } - + @Test + public void runTest() throws Exception { + main(null); + } public static void main(String[] args) throws Exception { GAN gan = new GAN.Builder() .generator(MnistSimpleGAN::getGenerator) @@ -108,6 +113,7 @@ public class MnistSimpleGAN { .updater(UPDATER) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(100) + .build(); Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000); diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java index bd4b122f1..4a4d77ccb 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN1DTestCases.java @@ -88,7 +88,7 @@ public class CNN1DTestCases { .convolutionMode(ConvolutionMode.Same)) .graphBuilder() .addInputs("in") - .layer("0", Convolution1DLayer.builder().nOut(32).activation(Activation.TANH).kernelSize(3).stride(1).build(), "in") + .layer("0", Convolution1D.builder().nOut(32).activation(Activation.TANH).kernelSize(3).stride(1).build(), "in") .layer("1", Subsampling1DLayer.builder().kernelSize(2).stride(1).poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()).build(), "0") .layer("2", Cropping1D.builder(1).build(), "1") .layer("3", ZeroPadding1DLayer.builder(1).build(), "2") diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java index f4d4b200e..680d7e945 100644 --- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java +++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java @@ -2385,11 +2385,15 @@ public interface INDArray extends Serializable, AutoCloseable { */ long[] stride(); - /** - * Return the ordering (fortran or c 'f' and 'c' respectively) of this ndarray - * @return the ordering of this ndarray - */ - char ordering(); + /** + * Return the ordering (fortran or c 'f' and 'c' respectively) of this ndarray

+ * C Is Contiguous layout. Mathematically speaking, row major.
+ * F Is Fortran contiguous layout. Mathematically speaking, column major.
+ * {@see https://en.wikipedia.org/wiki/Row-_and_column-major_order}
+ * + * @return the ordering of this ndarray + */ + char ordering(); /** * Returns the size along a specified dimension diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/dataset/DataSet.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/dataset/DataSet.java index 222990cc5..d0c691cad 100644 --- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/dataset/DataSet.java +++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/dataset/DataSet.java @@ -334,6 +334,7 @@ public class DataSet implements org.nd4j.linalg.dataset.api.DataSet { public void save(File to) { try (FileOutputStream fos = new FileOutputStream(to, false); BufferedOutputStream bos = new BufferedOutputStream(fos)) { + to.mkdirs(); save(bos); } catch (IOException e) { throw new RuntimeException(e); diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java index dc4fa4cc4..5895f70e2 100644 --- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java +++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java @@ -5121,7 +5121,7 @@ public class Nd4j { Nd4j.backend = backend; updateNd4jContext(); props = Nd4jContext.getInstance().getConf(); - logger.info("Properties for Nd4jContext " + props); + log.debug("Properties for Nd4jContext {}", props); PropertyParser pp = new PropertyParser(props); String otherDtype = pp.toString(ND4JSystemProperties.DTYPE); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java index cf3aff480..e33eebd6e 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/datasets/iterator/DataSetIteratorTest.java @@ -166,10 +166,10 @@ public class DataSetIteratorTest extends BaseDL4JTest { int seed = 123; int listenerFreq = 1; - LFWDataSetIterator lfw = new LFWDataSetIterator(batchSize, numSamples, + final LFWDataSetIterator lfw = new LFWDataSetIterator(batchSize, numSamples, new int[] {numRows, numColumns, numChannels}, outputNum, false, true, 1.0, new Random(seed)); - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(seed) + final var builder = NeuralNetConfiguration.builder().seed(seed) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .layer(0, ConvolutionLayer.builder(5, 5).nIn(numChannels).nOut(6) @@ -181,7 +181,7 @@ public class DataSetIteratorTest extends BaseDL4JTest { .build()) .inputType(InputType.convolutionalFlat(numRows, numColumns, numChannels)); - MultiLayerNetwork model = new MultiLayerNetwork(builder.build()); + final MultiLayerNetwork model = new MultiLayerNetwork(builder.build()); model.init(); model.addTrainingListeners(new ScoreIterationListener(listenerFreq)); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java index 45d179da1..7493457fa 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java @@ -45,6 +45,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.variational.BernoulliReconstructionDistribution; import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; +import org.deeplearning4j.nn.conf.serde.CavisMapper; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.optimize.api.BaseTrainingListener; @@ -924,8 +925,8 @@ public class TestEarlyStopping extends BaseDL4JTest { }; for(EpochTerminationCondition e : etc ){ - String s = NeuralNetConfiguration.mapper().writeValueAsString(e); - EpochTerminationCondition c = NeuralNetConfiguration.mapper().readValue(s, EpochTerminationCondition.class); + String s = CavisMapper.getMapper(CavisMapper.Type.JSON).writeValueAsString(e); + EpochTerminationCondition c = CavisMapper.getMapper(CavisMapper.Type.JSON).readValue(s, EpochTerminationCondition.class); assertEquals(e, c); } @@ -936,8 +937,8 @@ public class TestEarlyStopping extends BaseDL4JTest { }; for(IterationTerminationCondition i : itc ){ - String s = NeuralNetConfiguration.mapper().writeValueAsString(i); - IterationTerminationCondition c = NeuralNetConfiguration.mapper().readValue(s, IterationTerminationCondition.class); + String s = CavisMapper.getMapper(CavisMapper.Type.JSON).writeValueAsString(i); + IterationTerminationCondition c = CavisMapper.getMapper(CavisMapper.Type.JSON).readValue(s, IterationTerminationCondition.class); assertEquals(i, c); } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java index 0fc03569c..54f51ba2a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java @@ -309,7 +309,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest { try { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().convolutionMode(ConvolutionMode.Strict) - .list() + .layer(0, ConvolutionLayer.builder().kernelSize(2, 3).stride(2, 2).padding(0, 0).nOut(5) .build()) .layer(1, OutputLayer.builder().nOut(10).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java index 54c7e7763..4c9c0c3e1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java @@ -77,7 +77,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { NeuralNetConfiguration.builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .seed(12345L) - .dist(new NormalDistribution(0, 1)).list() + .weightInit(new NormalDistribution(0, 1)) .layer(0, DenseLayer.builder().nIn(4).nOut(3) .activation(Activation.IDENTITY).build()) .layer(1,BatchNormalization.builder().useLogStd(useLogStd).nOut(3).build()) @@ -122,7 +122,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { .dataType(DataType.DOUBLE) .updater(new NoOp()).seed(12345L) .dist(new NormalDistribution(0, 2)).list() - .layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) + .layer(0, Convolution2D.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) .activation(Activation.IDENTITY).build()) .layer(1,BatchNormalization.builder().useLogStd(useLogStd).build()) .layer(2, ActivationLayer.builder().activation(Activation.TANH).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java index b957fde47..b843f576c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java @@ -91,9 +91,8 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()) .dist(new NormalDistribution(0, 1)) .convolutionMode(ConvolutionMode.Same) - .list() .layer( - Convolution1DLayer.builder() + Convolution1D.builder() .activation(afn) .kernelSize(kernel) .stride(stride) @@ -202,7 +201,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .dist(new NormalDistribution(0, 1)) .convolutionMode(ConvolutionMode.Same) .layer( - Convolution1DLayer.builder() + Convolution1D.builder() .activation(afn) .kernelSize(kernel) .stride(stride) @@ -211,7 +210,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .build()) .layer(Cropping1D.builder(cropping).build()) .layer( - Convolution1DLayer.builder() + Convolution1D.builder() .activation(afn) .kernelSize(kernel) .stride(stride) @@ -317,7 +316,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .dist(new NormalDistribution(0, 1)) .convolutionMode(ConvolutionMode.Same) .layer( - Convolution1DLayer.builder() + Convolution1D.builder() .activation(afn) .kernelSize(kernel) .stride(stride) @@ -326,7 +325,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .build()) .layer(ZeroPadding1DLayer.builder(zeroPadding).build()) .layer( - Convolution1DLayer.builder() + Convolution1D.builder() .activation(afn) .kernelSize(kernel) .stride(stride) @@ -435,10 +434,9 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()) .dist(new NormalDistribution(0, 1)) .convolutionMode(ConvolutionMode.Same) - .list() .layer( 0, - Convolution1DLayer.builder() + Convolution1D.builder() .activation(afn) .kernelSize(kernel) .stride(stride) @@ -447,7 +445,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .build()) .layer( 1, - Convolution1DLayer.builder() + Convolution1D.builder() .activation(afn) .kernelSize(kernel) .stride(stride) @@ -461,6 +459,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .stride(stride) .padding(padding) .pnorm(pnorm) + .name("SubsamplingLayer") .build()) .layer( 3, @@ -548,7 +547,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .seed(12345) .list() .layer( - Convolution1DLayer.builder() + Convolution1D.builder() .kernelSize(2) .rnnDataFormat(RNNFormat.NCW) .stride(stride) @@ -562,7 +561,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .pnorm(pnorm) .build()) .layer( - Convolution1DLayer.builder() + Convolution1D.builder() .kernelSize(2) .rnnDataFormat(RNNFormat.NCW) .stride(stride) @@ -655,7 +654,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .seed(12345) .list() .layer( - Convolution1DLayer.builder() + Convolution1D.builder() .kernelSize(k) .dilation(d) .hasBias(hasBias) @@ -664,7 +663,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .nOut(convNOut1) .build()) .layer( - Convolution1DLayer.builder() + Convolution1D.builder() .kernelSize(k) .dilation(d) .convolutionMode(ConvolutionMode.Causal) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java new file mode 100644 index 000000000..453f12190 --- /dev/null +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java @@ -0,0 +1,811 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.gradientcheck; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.BaseDL4JTest; +import org.deeplearning4j.TestUtils; +import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.util.Convolution1DUtils; +import org.junit.jupiter.api.Test; +import org.nd4j.common.primitives.Pair; +import org.nd4j.evaluation.classification.Evaluation; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.DataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.NoOp; +import org.nd4j.linalg.lossfunctions.LossFunctions; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +@Slf4j +public class CNN1DNewGradientCheckTest extends BaseDL4JTest { + private static final boolean PRINT_RESULTS = true; + private static final boolean RETURN_ON_FIRST_FAILURE = false; + private static final double DEFAULT_EPS = 1e-6; + private static final double DEFAULT_MAX_REL_ERROR = 1e-3; + private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; + + static { + Nd4j.setDataType(DataType.DOUBLE); + } + + @Test + public void testCnn1D() { + int minibatchSize = 4; + int[] dataChannels = {4, 10}; //the input + int[] kernels = {2,4,5,8}; + int stride = 2; + int padding = 3; + int seriesLength = 300; + + for (int kernel : kernels) { + for (int dChannels : dataChannels) { + int numLabels = ((seriesLength + (2 * padding) - kernel) / stride) + 1; + final NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DNew.builder() + .activation(Activation.RELU) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nIn(dChannels) // channels + .nOut(3) + .rnnDataFormat(RNNFormat.NCW) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(4) + .build()) + .inputType(InputType.recurrent(dChannels, seriesLength)) + .build(); + + INDArray input = Nd4j.rand(minibatchSize, dChannels, seriesLength); + INDArray labels = Nd4j.zeros(minibatchSize, 4, numLabels); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < numLabels; j++) { + labels.putScalar(new int[] {i, i % 4, j}, 1.0); + } + } + final MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + String msg = + "Minibatch=" + + minibatchSize + + ", activationFn=" + + Activation.RELU + + ", kernel = " + + kernel; + + System.out.println(msg); + for (int j = 0; j < net.getnLayers(); j++) + System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); +/** + List> iter = new java.util.ArrayList<>(Collections.emptyList()); + iter.add(new Pair<>(input, labels)); + for(int x=0;x<100; x++) net.fit(input, labels); + Evaluation eval = net.evaluate(new INDArrayDataSetIterator(iter,2), Arrays.asList(new String[]{"One", "Two", "Three", "Four"})); + // net.fit(input, labels); + eval.eval(labels, net.output(input)); + + **/ + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + TestUtils.testModelSerialization(net); + } + } + + + } + + + @Test + public void testCnn1DWithLocallyConnected1D() { + Nd4j.getRandom().setSeed(1337); + + int[] minibatchSizes = {2, 3}; + int length = 25; + int convNIn = 18; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; + + int[] kernels = {1,2,4}; + int stride = 1; + int padding = 0; + + Activation[] activations = {Activation.SIGMOID}; + + for (Activation afn : activations) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < length; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nIn(convNIn) + .nOut(convNOut1) + .rnnDataFormat(RNNFormat.NCW) + .build()) + .layer( + LocallyConnected1D.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nIn(convNOut1) + .nOut(convNOut2) + .hasBias(false) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "Minibatch=" + minibatchSize + ", activationFn=" + afn + ", kernel = " + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + TestUtils.testModelSerialization(net); + } + } + } + } + + @Test + public void testCnn1DWithCropping1D() { + Nd4j.getRandom().setSeed(1337); + + int[] minibatchSizes = {1, 3}; + int length = 7; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; + + int[] kernels = {1, 2, 4}; + int stride = 1; + + int padding = 0; + int cropping = 1; + int croppedLength = length - 2 * cropping; + + Activation[] activations = {Activation.SIGMOID}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, croppedLength); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < croppedLength; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut1) + .build()) + .layer(Cropping1D.builder(cropping).build()) + .layer( + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut2) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "PoolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn + + ", kernel = " + + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + TestUtils.testModelSerialization(net); + } + } + } + } + } + + @Test + public void testCnn1DWithZeroPadding1D() { + Nd4j.getRandom().setSeed(1337); + + int[] minibatchSizes = {1, 3}; + int length = 7; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; + + int[] kernels = {1, 2, 4}; + int stride = 1; + int pnorm = 2; + + int padding = 0; + int zeroPadding = 2; + int paddedLength = length + 2 * zeroPadding; + + Activation[] activations = {Activation.SIGMOID}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, paddedLength); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < paddedLength; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DNew.builder() + .activation(afn) + .kernelSize(2, kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut1) + .build()) + .layer(ZeroPadding1DLayer.builder(zeroPadding).build()) + .layer( + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut2) + .build()) + .layer(ZeroPadding1DLayer.builder(0).build()) + .layer( + Subsampling1DLayer.builder(poolingType) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .pnorm(pnorm) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "PoolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn + + ", kernel = " + + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + TestUtils.testModelSerialization(net); + } + } + } + } + } + + @Test + public void testCnn1DWithSubsampling1D() { + Nd4j.getRandom().setSeed(12345); + + int[] minibatchSizes = {1, 3}; + int length = 7; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; + + int[] kernels = {1, 2, 4}; + int stride = 1; + int padding = 0; + int pnorm = 2; + + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < length; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + 0, + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut1) + .build()) + .layer( + 1, + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut2) + .build()) + .layer( + 2, + Subsampling1DLayer.builder(poolingType) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .pnorm(pnorm) + .name("SubsamplingLayer") + .build()) + .layer( + 3, + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "PoolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn + + ", kernel = " + + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + TestUtils.testModelSerialization(net); + } + } + } + } + } + + @Test + public void testCnn1dWithMasking() { + int length = 12; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 3; + + int pnorm = 2; + + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG + }; + + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (ConvolutionMode cm : + new ConvolutionMode[] {ConvolutionMode.Same, ConvolutionMode.Truncate}) { + for (int stride : new int[] {1, 2}) { + String s = cm + ", stride=" + stride + ", pooling=" + poolingType; + log.info("Starting test: " + s); + Nd4j.getRandom().setSeed(12345); + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .activation(Activation.TANH) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(cm) + .seed(12345) + .layer( + Convolution1DNew.builder() + .kernelSize(2) + .rnnDataFormat(RNNFormat.NCW) + .stride(stride) + .nIn(convNIn) + .nOut(convNOut1) + .build()) + .layer( + Subsampling1DLayer.builder(poolingType) + .kernelSize(2) + .stride(stride) + .pnorm(pnorm) + .build()) + .layer( + Convolution1DNew.builder() + .kernelSize(2) + .rnnDataFormat(RNNFormat.NCW) + .stride(stride) + .nIn(convNOut1) + .nOut(convNOut2) + .build()) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build()) + .layer( + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray f = Nd4j.rand(2, convNIn, length); + INDArray fm = Nd4j.create(2, length); + fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1); + fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, 6)).assign(1); + + INDArray label = TestUtils.randomOneHot(2, finalNOut); + + boolean gradOK = + GradientCheckUtil.checkGradients( + new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm)); + + assertTrue(gradOK, s); + TestUtils.testModelSerialization(net); + + // TODO also check that masked step values don't impact forward pass, score or gradients + + DataSet ds = new DataSet(f, label, fm, null); + double scoreBefore = net.score(ds); + net.setInput(f); + net.setLabels(label); + net.setLayerMaskArrays(fm, null); + net.computeGradientAndScore(); + INDArray gradBefore = net.getFlattenedGradients().dup(); + f.putScalar(1, 0, 10, 10.0); + f.putScalar(1, 1, 11, 20.0); + double scoreAfter = net.score(ds); + net.setInput(f); + net.setLabels(label); + net.setLayerMaskArrays(fm, null); + net.computeGradientAndScore(); + INDArray gradAfter = net.getFlattenedGradients().dup(); + + assertEquals(scoreBefore, scoreAfter, 1e-6); + assertEquals(gradBefore, gradAfter); + } + } + } + } + + @Test + public void testCnn1Causal() throws Exception { + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 3; + + int[] lengths = {11, 12, 13, 9, 10, 11}; + int[] kernels = {2, 3, 2, 4, 2, 3}; + int[] dilations = {1, 1, 2, 1, 2, 1}; + int[] strides = {1, 2, 1, 2, 1, 1}; + boolean[] masks = {false, true, false, true, false, true}; + boolean[] hasB = {true, false, true, false, true, true}; + for (int i = 0; i < lengths.length; i++) { + int length = lengths[i]; + int k = kernels[i]; + int d = dilations[i]; + int st = strides[i]; + boolean mask = masks[i]; + boolean hasBias = hasB[i]; + // TODO has bias + String s = "k=" + k + ", s=" + st + " d=" + d + ", seqLen=" + length; + log.info("Starting test: " + s); + Nd4j.getRandom().setSeed(12345); + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .activation(Activation.TANH) + .weightInit(new NormalDistribution(0, 1)) + .seed(12345) + .layer( + Convolution1DNew.builder() + .kernelSize(k) + .dilation(d) + .hasBias(hasBias) + .convolutionMode(ConvolutionMode.Causal) + .stride(st) + .nOut(convNOut1) + .build()) + .layer( + Convolution1DNew.builder() + .kernelSize(k) + .dilation(d) + .convolutionMode(ConvolutionMode.Causal) + .stride(st) + .nOut(convNOut2) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray f = Nd4j.rand(DataType.DOUBLE, 2, convNIn, length); + INDArray fm = null; + if (mask) { + fm = Nd4j.create(2, length); + fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1); + fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, length - 2)).assign(1); + } + + long outSize1 = Convolution1DUtils.getOutputSize(length, k, st, 0, ConvolutionMode.Causal, d); + long outSize2 = + Convolution1DUtils.getOutputSize(outSize1, k, st, 0, ConvolutionMode.Causal, d); + + INDArray label = TestUtils.randomOneHotTimeSeries(2, finalNOut, (int) outSize2); + + String msg = + "Minibatch=" + + 1 + + ", activationFn=" + + Activation.RELU + + ", kernel = " + + k; + + System.out.println(msg); + for (int j = 0; j < net.getnLayers(); j++) + System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); + + boolean gradOK = + GradientCheckUtil.checkGradients( + new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm)); + + assertTrue(gradOK, s); + TestUtils.testModelSerialization(net); + } + } +} diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java index 154716fb3..27f8a6069 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN3DGradientCheckTest.java @@ -112,9 +112,8 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .dataType(DataType.DOUBLE) - .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) + .updater(new NoOp()) .dist(new NormalDistribution(0, 1)) - .list() .layer(0, Convolution3D.builder().activation(afn).kernelSize(kernel) .stride(stride).nIn(convNIn).nOut(convNOut1).hasBias(false) .convolutionMode(mode).dataFormat(df) @@ -400,7 +399,6 @@ public class CNN3DGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()).weightInit(WeightInit.LECUN_NORMAL) .dist(new NormalDistribution(0, 1)) .seed(12345) - .list() .layer(0, Convolution3D.builder().activation(afn).kernelSize(1, 1, 1) .nIn(convNIn).nOut(convNOut).hasBias(false) .convolutionMode(mode).dataFormat(df) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java index a3ef0c082..f071baca5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java @@ -108,8 +108,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .seed(12345L) - .list() - .layer(0, ConvolutionLayer.builder(1, 1).nOut(6).activation(afn).build()) + + .layer(0, Convolution2D.builder().kernelSize(1).stride(1).nOut(6).activation(afn).build()) .layer(1, OutputLayer.builder(lf).activation(outputActivation).nOut(3).build()) .inputType(InputType.convolutionalFlat(1, 4, 1)); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java index 345c0251f..bbcf8c702 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java @@ -32,6 +32,7 @@ import org.deeplearning4j.nn.conf.distribution.UniformDistribution; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.LossLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.conf.serde.CavisMapper; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; @@ -336,7 +337,7 @@ public class LossFunctionGradientCheck extends BaseDL4JTest { // to ensure that we carry the parameters through // the serializer. try{ - ObjectMapper m = NeuralNetConfiguration.mapper(); + ObjectMapper m = CavisMapper.getMapper(CavisMapper.Type.JSON); String s = m.writeValueAsString(lossFunctions[i]); ILossFunction lf2 = m.readValue(s, lossFunctions[i].getClass()); lossFunctions[i] = lf2; diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java index 1aabd53e2..8c1de3250 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java @@ -180,7 +180,7 @@ public class DTypeTests extends BaseDL4JTest { Pooling2D.class, //Alias for SubsamplingLayer Convolution2D.class, //Alias for ConvolutionLayer Pooling1D.class, //Alias for Subsampling1D - Convolution1D.class, //Alias for Convolution1DLayer + Convolution1D.class, //Alias for Convolution1D TensorFlowCnnToFeedForwardPreProcessor.class //Deprecated )); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java index 44d7380fd..c6b8b0446 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java @@ -37,7 +37,7 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; import org.nd4j.linalg.activations.Activation; @@ -1026,7 +1026,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { } catch (DL4JInvalidInputException e) { // e.printStackTrace(); String msg = e.getMessage(); - assertTrue(msg.contains(ConvolutionUtils.NCHW_NHWC_ERROR_MSG) || msg.contains("input array channels does not match CNN layer configuration"), msg); + assertTrue(msg.contains(Convolution2DUtils.NCHW_NHWC_ERROR_MSG) || msg.contains("input array channels does not match CNN layer configuration"), msg); } } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java index 87814e038..91b25c1bd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java @@ -36,7 +36,7 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; -import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; +import org.deeplearning4j.nn.conf.layers.Convolution1D; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; @@ -921,7 +921,7 @@ public class ConvolutionLayerTest extends BaseDL4JTest { NeuralNetConfiguration.builder() .convolutionMode(ConvolutionMode.Same) .layer( - Convolution1DLayer.builder() + Convolution1D.builder() .nOut(3) .kernelSize(2) .activation(Activation.TANH) @@ -975,7 +975,7 @@ public class ConvolutionLayerTest extends BaseDL4JTest { @Test public void testConv1dCausalAllowed() { - Convolution1DLayer.builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); + Convolution1D.builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); Subsampling1DLayer.builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java index 89ce76cdb..5fa2f72ae 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java @@ -33,7 +33,7 @@ import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.ndarray.INDArray; @@ -346,7 +346,7 @@ public class TestConvolutionModes extends BaseDL4JTest { assertEquals(2, it.getHeight()); assertEquals(2, it.getWidth()); assertEquals(dOut, it.getChannels()); - int[] outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Strict); + int[] outSize = Convolution2DUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Strict); assertEquals(2, outSize[0]); assertEquals(2, outSize[1]); @@ -357,7 +357,7 @@ public class TestConvolutionModes extends BaseDL4JTest { assertEquals(2, it.getHeight()); assertEquals(2, it.getWidth()); assertEquals(dOut, it.getChannels()); - outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Truncate); + outSize = Convolution2DUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Truncate); assertEquals(2, outSize[0]); assertEquals(2, outSize[1]); @@ -367,7 +367,7 @@ public class TestConvolutionModes extends BaseDL4JTest { assertEquals(3, it.getHeight()); assertEquals(3, it.getWidth()); assertEquals(dOut, it.getChannels()); - outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, null, ConvolutionMode.Same); + outSize = Convolution2DUtils.getOutputSize(inData, kernel, stride, null, ConvolutionMode.Same); assertEquals(3, outSize[0]); assertEquals(3, outSize[1]); @@ -397,7 +397,7 @@ public class TestConvolutionModes extends BaseDL4JTest { System.out.println(e.getMessage()); } try { - outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Strict); + outSize = Convolution2DUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Strict); fail("Exception expected"); } catch (DL4JException e) { System.out.println(e.getMessage()); @@ -409,7 +409,7 @@ public class TestConvolutionModes extends BaseDL4JTest { assertEquals(1, it.getHeight()); assertEquals(1, it.getWidth()); assertEquals(dOut, it.getChannels()); - outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Truncate); + outSize = Convolution2DUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Truncate); assertEquals(1, outSize[0]); assertEquals(1, outSize[1]); @@ -419,7 +419,7 @@ public class TestConvolutionModes extends BaseDL4JTest { assertEquals(2, it.getHeight()); assertEquals(2, it.getWidth()); assertEquals(dOut, it.getChannels()); - outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, null, ConvolutionMode.Same); + outSize = Convolution2DUtils.getOutputSize(inData, kernel, stride, null, ConvolutionMode.Same); assertEquals(2, outSize[0]); assertEquals(2, outSize[1]); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java index bf01f07f8..d943be062 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationTest.java @@ -732,7 +732,7 @@ public class BatchNormalizationTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .convolutionMode(ConvolutionMode.Same) .layer(rnn ? LSTM.builder().nOut(3).build() : - Convolution1DLayer.builder().kernelSize(3).stride(1).nOut(3).build()) + Convolution1D.builder().kernelSize(3).stride(1).nOut(3).build()) .layer(BatchNormalization.builder().build()) .layer(RnnOutputLayer.builder().nOut(3).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build()) .inputType(InputType.recurrent(3)) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java index 5dc7bd0a3..3f3162d59 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java @@ -52,7 +52,7 @@ public class WeightInitIdentityTest extends BaseDL4JTest { .graphBuilder() .addInputs(inputName) .setOutputs(output) - .layer(conv, Convolution1DLayer.builder(7) + .layer(conv, Convolution1D.builder(7) .convolutionMode(ConvolutionMode.Same) .nOut(input.size(1)) .weightInit(new WeightInitIdentity()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/TestDistributionDeserializer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/TestDistributionDeserializer.java index 8ec311167..1d61bc3e6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/TestDistributionDeserializer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/regressiontest/TestDistributionDeserializer.java @@ -23,6 +23,7 @@ package org.deeplearning4j.regressiontest; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.*; +import org.deeplearning4j.nn.conf.serde.CavisMapper; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.databind.ObjectMapper; @@ -38,7 +39,7 @@ public class TestDistributionDeserializer extends BaseDL4JTest { new Distribution[] {new NormalDistribution(3, 0.5), new UniformDistribution(-2, 1), new GaussianDistribution(2, 1.0), new BinomialDistribution(10, 0.3)}; - ObjectMapper om = NeuralNetConfiguration.mapper(); + ObjectMapper om = CavisMapper.getMapper(CavisMapper.Type.JSON); for (Distribution d : distributions) { String json = om.writeValueAsString(d); @@ -50,7 +51,7 @@ public class TestDistributionDeserializer extends BaseDL4JTest { @Test public void testDistributionDeserializerLegacyFormat() throws Exception { - ObjectMapper om = NeuralNetConfiguration.mapper(); + ObjectMapper om = CavisMapper.getMapper(CavisMapper.Type.JSON); String normalJson = "{\n" + " \"normal\" : {\n" + " \"mean\" : 0.1,\n" + " \"std\" : 1.2\n" + " }\n" + " }"; diff --git a/cavis-dnn/cavis-dnn-cudnn/src/main/java/org/deeplearning4j/cuda/convolution/CudnnConvolutionHelper.java b/cavis-dnn/cavis-dnn-cudnn/src/main/java/org/deeplearning4j/cuda/convolution/CudnnConvolutionHelper.java index 7aa9a62fe..00cc4ed93 100644 --- a/cavis-dnn/cavis-dnn-cudnn/src/main/java/org/deeplearning4j/cuda/convolution/CudnnConvolutionHelper.java +++ b/cavis-dnn/cavis-dnn-cudnn/src/main/java/org/deeplearning4j/cuda/convolution/CudnnConvolutionHelper.java @@ -38,7 +38,7 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.cuda.BaseCudnnHelper; import org.deeplearning4j.nn.layers.convolution.ConvolutionHelper; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.nd4j.jita.allocator.Allocator; import org.nd4j.jita.allocator.impl.AtomicAllocator; import org.nd4j.jita.conf.CudaEnvironment; @@ -681,9 +681,9 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti int[] outSize; if (convolutionMode == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation - padding = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) inH, (int) inW}, kernel, strides, dilation); - int[] padBottomRight = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {(int) inH, (int) inW}, kernel, strides, dilation); + outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation + padding = Convolution2DUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) inH, (int) inW}, kernel, strides, dilation); + int[] padBottomRight = Convolution2DUtils.getSameModeBottomRightPadding(outSize, new int[] {(int) inH, (int) inW}, kernel, strides, dilation); if(!Arrays.equals(padding, padBottomRight)){ /* CuDNN - even as of 7.1 (CUDA 9.1) still doesn't have support for proper SAME mode padding (i.e., asymmetric @@ -731,7 +731,7 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti // CuDNN handle } } else { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, padding, convolutionMode, dilation, format); //Also performs validation + outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, padding, convolutionMode, dilation, format); //Also performs validation } return new CudnnForwardArgs(manualPadBottom, manualPadRight, input, origInput, padding, outSize); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java index 6f1bf14d2..07130d4bc 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasModel.java @@ -42,7 +42,7 @@ import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils; import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder; import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils; import org.deeplearning4j.nn.modelimport.keras.utils.KerasOptimizerUtils; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.nd4j.common.primitives.Counter; import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.learning.config.IUpdater; @@ -442,8 +442,8 @@ public class KerasModel { KerasInput kerasInput = (KerasInput) layer; LayerConfiguration layer1 = layersOrdered.get(kerasLayerIdx + 1).layer; //no dim order, try to pull it from the next layer if there is one - if(ConvolutionUtils.layerHasConvolutionLayout(layer1)) { - CNN2DFormat formatForLayer = ConvolutionUtils.getFormatForLayer(layer1); + if(Convolution2DUtils.layerHasConvolutionLayout(layer1)) { + CNN2DFormat formatForLayer = Convolution2DUtils.getFormatForLayer(layer1); if(formatForLayer == CNN2DFormat.NCHW) { dimOrder = KerasLayer.DimOrder.THEANO; } else if(formatForLayer == CNN2DFormat.NHWC) { diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java index 21c580494..16eab2a00 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java @@ -23,7 +23,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolutional; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; +import org.deeplearning4j.nn.conf.layers.Convolution1D; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; @@ -84,29 +84,29 @@ public class KerasAtrousConvolution1D extends KerasConvolution { IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(), enforceTrainingConfig, conf, kerasMajorVersion); - ConvolutionLayer.ConvolutionLayerBuilder builder = Convolution1DLayer.builder().name(this.name) + var builder = Convolution1D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) .dilation(getDilationRate(layerConfig, 1, conf, true)[0]) .l1(this.weightL1Regularization).l2(this.weightL2Regularization) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) - .kernelSize(getKernelSizeFromConfig(layerConfig, 1, conf, kerasMajorVersion)[0]) + .kernelSize(getKernelSizeFromConfig(layerConfig, 1, conf, kerasMajorVersion)) .hasBias(hasBias) .rnnDataFormat(dimOrder == DimOrder.TENSORFLOW ? RNNFormat.NWC : RNNFormat.NCW) - .stride(getStrideFromConfig(layerConfig, 1, conf)[0]); + .stride(getStrideFromConfig(layerConfig, 1, conf)); int[] padding = getPaddingFromBorderModeConfig(layerConfig, 1, conf, kerasMajorVersion); if (hasBias) builder.biasInit(0.0); if (padding != null) - builder.padding(padding[0]); + builder.padding(padding); if (biasConstraint != null) builder.constrainBias(biasConstraint); if (weightConstraint != null) builder.constrainWeights(weightConstraint); this.layer = builder.build(); - Convolution1DLayer convolution1DLayer = (Convolution1DLayer) layer; - convolution1DLayer.setDefaultValueOverriden(true); + Convolution1D convolution1D = (Convolution1D) layer; + convolution1D.setDefaultValueOverriden(true); } /** @@ -114,8 +114,8 @@ public class KerasAtrousConvolution1D extends KerasConvolution { * * @return ConvolutionLayer */ - public Convolution1DLayer getAtrousConvolution1D() { - return (Convolution1DLayer) this.layer; + public Convolution1D getAtrousConvolution1D() { + return (Convolution1D) this.layer; } /** diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java index 419d74490..6f6fa25b3 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java @@ -24,6 +24,7 @@ import lombok.val; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Convolution2D; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; @@ -85,7 +86,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution { IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(), enforceTrainingConfig, conf, kerasMajorVersion); - val builder = ConvolutionLayer.builder().name(this.name) + val builder = Convolution2D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java index d99db5999..e85df98c0 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java @@ -28,7 +28,7 @@ import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; +import org.deeplearning4j.nn.conf.layers.Convolution1D; import org.deeplearning4j.nn.conf.layers.InputTypeUtil; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; @@ -93,7 +93,7 @@ public class KerasConvolution1D extends KerasConvolution { IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(), enforceTrainingConfig, conf, kerasMajorVersion); - Convolution1DLayer.Convolution1DLayerBuilder builder = Convolution1DLayer.builder().name(this.name) + var builder = Convolution1D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) @@ -125,9 +125,9 @@ public class KerasConvolution1D extends KerasConvolution { this.layer = builder.build(); //set this in order to infer the dimensional format - Convolution1DLayer convolution1DLayer = (Convolution1DLayer) this.layer; - convolution1DLayer.setDataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW); - convolution1DLayer.setDefaultValueOverriden(true); + Convolution1D convolution1D = (Convolution1D) this.layer; + convolution1D.setDataFormat(dimOrder == DimOrder.TENSORFLOW ? CNN2DFormat.NHWC : CNN2DFormat.NCHW); + convolution1D.setDefaultValueOverriden(true); } /** @@ -135,8 +135,8 @@ public class KerasConvolution1D extends KerasConvolution { * * @return ConvolutionLayer */ - public Convolution1DLayer getConvolution1DLayer() { - return (Convolution1DLayer) this.layer; + public Convolution1D getConvolution1DLayer() { + return (Convolution1D) this.layer; } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java index 7eb2c62a0..ff1e73819 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java @@ -28,6 +28,7 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Convolution2D; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; @@ -95,7 +96,7 @@ public class KerasConvolution2D extends KerasConvolution { LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion); - final var builder = ConvolutionLayer.builder().name(this.name) + final var builder = Convolution2D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/JsonTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/JsonTest.java index 4558eccc7..93a691c98 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/JsonTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/JsonTest.java @@ -41,8 +41,8 @@ public class JsonTest extends BaseDL4JTest { }; for(InputPreProcessor p : pp ){ - String s = NeuralNetConfiguration.mapper().writeValueAsString(p); - InputPreProcessor p2 = NeuralNetConfiguration.mapper().readValue(s, InputPreProcessor.class); + String s = CavisMapper.getMapper(CavisMapper.Type.JSON).writeValueAsString(p); + InputPreProcessor p2 = CavisMapper.getMapper(CavisMapper.Type.JSON).readValue(s, InputPreProcessor.class); assertEquals(p, p2); } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java index be1ecf339..0fbc460e7 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java @@ -29,11 +29,8 @@ import org.deeplearning4j.gradientcheck.GradientCheckUtil; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; -import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; -import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.layers.LossLayer; -import org.deeplearning4j.nn.conf.layers.RnnOutputLayer; +import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.Convolution1D; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.modelimport.keras.Hdf5Archive; @@ -656,7 +653,7 @@ public class KerasModelEndToEndTest extends BaseDL4JTest { MultiLayerNetwork net = importEndModelTest(modelPath, inputsOutputPath, true, true, true, true, false, null, null); Layer l = net.getLayer(0); - Convolution1DLayer c1d = (Convolution1DLayer) l.getTrainingConfig(); + Convolution1D c1d = (Convolution1D) l.getTrainingConfig(); assertEquals(ConvolutionMode.Causal, c1d.getConvolutionMode()); } } diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java index a44cf7472..968982f69 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.dropout.Dropout; -import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; +import org.deeplearning4j.nn.conf.layers.Convolution1D; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; @@ -97,7 +97,7 @@ public class KerasAtrousConvolution1DTest extends BaseDL4JTest { config.put(conf.getLAYER_FIELD_BORDER_MODE(), BORDER_MODE_VALID); layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); - Convolution1DLayer layer = new KerasAtrousConvolution1D(layerConfig).getAtrousConvolution1D(); + Convolution1D layer = new KerasAtrousConvolution1D(layerConfig).getAtrousConvolution1D(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java index 56c1abf34..b918182c0 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution1DTest.java @@ -22,7 +22,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.convolution; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.dropout.Dropout; -import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; +import org.deeplearning4j.nn.conf.layers.Convolution1D; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils; import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration; @@ -119,7 +119,7 @@ public class KerasConvolution1DTest extends BaseDL4JTest { config.put(conf.getLAYER_FIELD_BORDER_MODE(), BORDER_MODE_VALID); layerConfig.put(conf.getLAYER_FIELD_CONFIG(), config); - Convolution1DLayer layer = new KerasConvolution1D(layerConfig).getConvolution1DLayer(); + Convolution1D layer = new KerasConvolution1D(layerConfig).getConvolution1DLayer(); assertEquals(ACTIVATION_DL4J, layer.getActivationFn().toString()); assertEquals(LAYER_NAME, layer.getName()); assertEquals(INIT_DL4J, layer.getWeightInit()); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java index 3c679267c..c15e09963 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/INeuralNetworkConfiguration.java @@ -22,8 +22,6 @@ package net.brutex.ai.dnn.api; import java.io.Serializable; -import java.util.List; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; public interface INeuralNetworkConfiguration extends Serializable, Cloneable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java index 69041b3a5..06e73dcf9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/NN.java @@ -23,7 +23,6 @@ package net.brutex.ai.dnn.api; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; -import org.deeplearning4j.nn.conf.layers.DenseLayer; /** * A fluent API to configure and create artificial neural networks diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java index 46d2fa5b7..f70e57b15 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/networks/ArtificialNeuralNetwork.java @@ -23,7 +23,6 @@ package net.brutex.ai.dnn.networks; import java.io.Serializable; import java.util.Arrays; -import java.util.HashMap; import java.util.Map; import lombok.Getter; import lombok.NonNull; @@ -33,7 +32,6 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.nd4j.linalg.api.ndarray.INDArray; - /** * Artificial Neural Network An artificial neural network (1) takes some input data, and (2) * transforms this input data by calculating a weighted sum over the inputs and (3) applies a diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingConfiguration.java index d95c5aab6..6d23d10a1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingConfiguration.java @@ -20,6 +20,10 @@ package org.deeplearning4j.earlystopping; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import lombok.Data; import lombok.NoArgsConstructor; import net.brutex.ai.dnn.api.IModel; @@ -30,11 +34,6 @@ import org.deeplearning4j.earlystopping.termination.IterationTerminationConditio import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.nd4j.common.function.Supplier; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - @Data @NoArgsConstructor public class EarlyStoppingConfiguration implements Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingModelSaver.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingModelSaver.java index 9037e0792..2cc1b644c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingModelSaver.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingModelSaver.java @@ -20,16 +20,15 @@ package org.deeplearning4j.earlystopping; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import java.io.IOException; +import java.io.Serializable; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.saver.InMemoryModelSaver; import org.deeplearning4j.earlystopping.saver.LocalFileGraphSaver; import org.deeplearning4j.earlystopping.saver.LocalFileModelSaver; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -import java.io.IOException; -import java.io.Serializable; @JsonInclude(JsonInclude.Include.NON_NULL) @JsonSubTypes(value = {@JsonSubTypes.Type(value = InMemoryModelSaver.class, name = "InMemoryModelSaver"), diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingResult.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingResult.java index 817f4c7db..bee3e4a85 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingResult.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/EarlyStoppingResult.java @@ -20,11 +20,10 @@ package org.deeplearning4j.earlystopping; -import lombok.Data; -import net.brutex.ai.dnn.api.IModel; - import java.io.Serializable; import java.util.Map; +import lombok.Data; +import net.brutex.ai.dnn.api.IModel; @Data public class EarlyStoppingResult implements Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/InMemoryModelSaver.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/InMemoryModelSaver.java index b24b47651..a0b695eca 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/InMemoryModelSaver.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/InMemoryModelSaver.java @@ -20,10 +20,9 @@ package org.deeplearning4j.earlystopping.saver; -import org.deeplearning4j.earlystopping.EarlyStoppingModelSaver; -import net.brutex.ai.dnn.api.IModel; - import java.io.IOException; +import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.earlystopping.EarlyStoppingModelSaver; public class InMemoryModelSaver implements EarlyStoppingModelSaver { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/LocalFileGraphSaver.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/LocalFileGraphSaver.java index 4b08e401f..8466d1479 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/LocalFileGraphSaver.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/LocalFileGraphSaver.java @@ -20,15 +20,14 @@ package org.deeplearning4j.earlystopping.saver; +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; import org.apache.commons.io.FilenameUtils; import org.deeplearning4j.earlystopping.EarlyStoppingModelSaver; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.util.ModelSerializer; -import java.io.File; -import java.io.IOException; -import java.nio.charset.Charset; - public class LocalFileGraphSaver implements EarlyStoppingModelSaver { private static final String BEST_GRAPH_BIN = "bestGraph.bin"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/LocalFileModelSaver.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/LocalFileModelSaver.java index be933913f..881ec3092 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/LocalFileModelSaver.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/saver/LocalFileModelSaver.java @@ -20,15 +20,14 @@ package org.deeplearning4j.earlystopping.saver; +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; import org.apache.commons.io.FilenameUtils; import org.deeplearning4j.earlystopping.EarlyStoppingModelSaver; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.util.ModelSerializer; -import java.io.File; -import java.io.IOException; -import java.nio.charset.Charset; - public class LocalFileModelSaver implements EarlyStoppingModelSaver { private static final String BEST_MODEL_BIN = "bestModel.bin"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/AutoencoderScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/AutoencoderScoreCalculator.java index 69f1785e4..1b26dfb6c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/AutoencoderScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/AutoencoderScoreCalculator.java @@ -26,11 +26,11 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.evaluation.regression.RegressionEvaluation; import org.nd4j.evaluation.regression.RegressionEvaluation.Metric; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class AutoencoderScoreCalculator extends BaseScoreCalculator { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.java index 2f6199449..3394911e6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculator.java @@ -20,8 +20,9 @@ package org.deeplearning4j.earlystopping.scorecalc; -import org.deeplearning4j.earlystopping.scorecalc.base.BaseScoreCalculator; +import com.fasterxml.jackson.annotation.JsonProperty; import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.earlystopping.scorecalc.base.BaseScoreCalculator; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; @@ -29,7 +30,6 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.MultiDataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -import com.fasterxml.jackson.annotation.JsonProperty; public class DataSetLossCalculator extends BaseScoreCalculator { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculatorCG.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculatorCG.java index 1d6b9fb7a..086cae47c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculatorCG.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/DataSetLossCalculatorCG.java @@ -20,6 +20,8 @@ package org.deeplearning4j.earlystopping.scorecalc; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.NoArgsConstructor; import lombok.val; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -27,8 +29,6 @@ import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; @NoArgsConstructor @Deprecated diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ScoreCalculator.java index a9568d2d9..e8b7ba1a3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/ScoreCalculator.java @@ -20,12 +20,11 @@ package org.deeplearning4j.earlystopping.scorecalc; -import net.brutex.ai.dnn.api.IModel; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; - import java.io.Serializable; +import net.brutex.ai.dnn.api.IModel; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") @JsonInclude(JsonInclude.Include.NON_NULL) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconErrorScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconErrorScoreCalculator.java index 4b2f1eb9f..4ff0f7921 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconErrorScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/VAEReconErrorScoreCalculator.java @@ -26,11 +26,11 @@ import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.variational.VariationalAutoencoder; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.evaluation.regression.RegressionEvaluation; import org.nd4j.evaluation.regression.RegressionEvaluation.Metric; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class VAEReconErrorScoreCalculator extends BaseScoreCalculator { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseIEvaluationScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseIEvaluationScoreCalculator.java index 7a064c151..ee65f7ed0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseIEvaluationScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseIEvaluationScoreCalculator.java @@ -20,9 +20,9 @@ package org.deeplearning4j.earlystopping.scorecalc.base; +import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.datasets.iterator.MultiDataSetWrapperIterator; import org.deeplearning4j.earlystopping.scorecalc.ScoreCalculator; -import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.evaluation.IEvaluation; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseScoreCalculator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseScoreCalculator.java index ce01ebfcd..03dc6d6ee 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseScoreCalculator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/scorecalc/base/BaseScoreCalculator.java @@ -21,8 +21,8 @@ package org.deeplearning4j.earlystopping.scorecalc.base; import lombok.NonNull; -import org.deeplearning4j.earlystopping.scorecalc.ScoreCalculator; import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.earlystopping.scorecalc.ScoreCalculator; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.MultiDataSet; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/BestScoreEpochTerminationCondition.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/BestScoreEpochTerminationCondition.java index 3aeea5d96..94489c23f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/BestScoreEpochTerminationCondition.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/BestScoreEpochTerminationCondition.java @@ -20,8 +20,8 @@ package org.deeplearning4j.earlystopping.termination; -import lombok.Data; import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; @Data public class BestScoreEpochTerminationCondition implements EpochTerminationCondition { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/EpochTerminationCondition.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/EpochTerminationCondition.java index 2aed68348..407960a29 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/EpochTerminationCondition.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/EpochTerminationCondition.java @@ -22,9 +22,7 @@ package org.deeplearning4j.earlystopping.termination; import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; - import java.io.Serializable; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/IterationTerminationCondition.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/IterationTerminationCondition.java index b45a63f8c..4642c2e45 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/IterationTerminationCondition.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/IterationTerminationCondition.java @@ -22,7 +22,6 @@ package org.deeplearning4j.earlystopping.termination; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonTypeInfo; - import java.io.Serializable; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxEpochsTerminationCondition.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxEpochsTerminationCondition.java index aa0da9d68..5009d9726 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxEpochsTerminationCondition.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxEpochsTerminationCondition.java @@ -20,10 +20,10 @@ package org.deeplearning4j.earlystopping.termination; -import lombok.Data; -import lombok.NoArgsConstructor; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; +import lombok.NoArgsConstructor; @NoArgsConstructor @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxScoreIterationTerminationCondition.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxScoreIterationTerminationCondition.java index 32929a157..e33d3ae6a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxScoreIterationTerminationCondition.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxScoreIterationTerminationCondition.java @@ -20,8 +20,8 @@ package org.deeplearning4j.earlystopping.termination; -import lombok.Data; import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; @Data public class MaxScoreIterationTerminationCondition implements IterationTerminationCondition { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxTimeIterationTerminationCondition.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxTimeIterationTerminationCondition.java index 0f48f2d50..d17bcccc0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxTimeIterationTerminationCondition.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/MaxTimeIterationTerminationCondition.java @@ -20,10 +20,9 @@ package org.deeplearning4j.earlystopping.termination; -import lombok.Data; import com.fasterxml.jackson.annotation.JsonProperty; - import java.util.concurrent.TimeUnit; +import lombok.Data; /**Terminate training based on max time. */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/ScoreImprovementEpochTerminationCondition.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/ScoreImprovementEpochTerminationCondition.java index fe84514fc..88ce9e93c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/ScoreImprovementEpochTerminationCondition.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/termination/ScoreImprovementEpochTerminationCondition.java @@ -20,9 +20,9 @@ package org.deeplearning4j.earlystopping.termination; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.extern.slf4j.Slf4j; -import com.fasterxml.jackson.annotation.JsonProperty; @Slf4j @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java index 770512e4d..6db50290a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/BaseEarlyStoppingTrainer.java @@ -20,6 +20,12 @@ package org.deeplearning4j.earlystopping.trainer; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.earlystopping.EarlyStoppingConfiguration; import org.deeplearning4j.earlystopping.EarlyStoppingResult; @@ -40,13 +46,6 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Collection; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.Map; - public abstract class BaseEarlyStoppingTrainer implements IEarlyStoppingTrainer { private static final Logger log = LoggerFactory.getLogger(BaseEarlyStoppingTrainer.class); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/EarlyStoppingGraphTrainer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/EarlyStoppingGraphTrainer.java index e0011f535..e3a666b14 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/EarlyStoppingGraphTrainer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/earlystopping/trainer/EarlyStoppingGraphTrainer.java @@ -20,7 +20,6 @@ package org.deeplearning4j.earlystopping.trainer; -import org.deeplearning4j.datasets.iterator.MultiDataSetWrapperIterator; import org.deeplearning4j.datasets.iterator.impl.SingletonDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.SingletonMultiDataSetIterator; import org.deeplearning4j.earlystopping.EarlyStoppingConfiguration; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java index 1c784f9b1..4b5f8fee3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/BaseEvaluation.java @@ -20,6 +20,13 @@ package org.deeplearning4j.eval; +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import lombok.EqualsAndHashCode; import lombok.Getter; import org.nd4j.common.primitives.AtomicBoolean; @@ -28,14 +35,6 @@ import org.nd4j.common.primitives.serde.JsonDeserializerAtomicBoolean; import org.nd4j.common.primitives.serde.JsonDeserializerAtomicDouble; import org.nd4j.common.primitives.serde.JsonSerializerAtomicBoolean; import org.nd4j.common.primitives.serde.JsonSerializerAtomicDouble; -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.MapperFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.SerializationFeature; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @Deprecated @EqualsAndHashCode(callSuper = false) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/ConfusionMatrix.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/ConfusionMatrix.java index b45172e35..976d86075 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/ConfusionMatrix.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/ConfusionMatrix.java @@ -20,15 +20,8 @@ package org.deeplearning4j.eval; -import com.google.common.collect.HashMultiset; -import com.google.common.collect.Multiset; -import lombok.Getter; -import java.io.Serializable; -import java.util.ArrayList; import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; @Deprecated public class ConfusionMatrix> extends org.nd4j.evaluation.classification.ConfusionMatrix { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java index a7b4dd483..7a438a4b5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/Evaluation.java @@ -20,14 +20,11 @@ package org.deeplearning4j.eval; -import lombok.EqualsAndHashCode; -import lombok.NonNull; -import org.nd4j.evaluation.EvaluationAveraging; -import org.nd4j.evaluation.IEvaluation; -import org.nd4j.linalg.api.ndarray.INDArray; - import java.util.List; import java.util.Map; +import lombok.EqualsAndHashCode; +import lombok.NonNull; +import org.nd4j.linalg.api.ndarray.INDArray; @EqualsAndHashCode(callSuper = true) @Deprecated diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/EvaluationCalibration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/EvaluationCalibration.java index 9b699a401..81cdbc1e1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/EvaluationCalibration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/EvaluationCalibration.java @@ -20,9 +20,9 @@ package org.deeplearning4j.eval; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.EqualsAndHashCode; import lombok.Getter; -import com.fasterxml.jackson.annotation.JsonProperty; @Deprecated @Getter diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/RegressionEvaluation.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/RegressionEvaluation.java index 284037026..2462ddd7f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/RegressionEvaluation.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/RegressionEvaluation.java @@ -20,11 +20,10 @@ package org.deeplearning4j.eval; +import java.util.List; import lombok.Data; import lombok.EqualsAndHashCode; -import java.util.List; - @Deprecated @Data @EqualsAndHashCode(callSuper = true) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/Histogram.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/Histogram.java index 74f60a603..94d8d7386 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/Histogram.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/Histogram.java @@ -20,10 +20,10 @@ package org.deeplearning4j.eval.curves; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import org.nd4j.evaluation.curves.BaseHistogram; -import com.fasterxml.jackson.annotation.JsonProperty; @Deprecated @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/PrecisionRecallCurve.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/PrecisionRecallCurve.java index 0f3e4a4cf..68b158849 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/PrecisionRecallCurve.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/PrecisionRecallCurve.java @@ -20,13 +20,9 @@ package org.deeplearning4j.eval.curves; -import com.google.common.base.Preconditions; -import lombok.AllArgsConstructor; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; @Deprecated @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/ReliabilityDiagram.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/ReliabilityDiagram.java index 66ffe2e11..b58831153 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/ReliabilityDiagram.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/ReliabilityDiagram.java @@ -20,8 +20,8 @@ package org.deeplearning4j.eval.curves; -import lombok.NonNull; import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.NonNull; @Deprecated public class ReliabilityDiagram extends org.nd4j.evaluation.curves.ReliabilityDiagram { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/RocCurve.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/RocCurve.java index 824115afa..6bcf47d9f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/RocCurve.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/curves/RocCurve.java @@ -20,10 +20,9 @@ package org.deeplearning4j.eval.curves; -import com.google.common.base.Preconditions; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; -import com.fasterxml.jackson.annotation.JsonProperty; @Deprecated @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/meta/Prediction.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/meta/Prediction.java index 125056bce..9b94f1665 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/meta/Prediction.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/eval/meta/Prediction.java @@ -20,7 +20,6 @@ package org.deeplearning4j.eval.meta; -import lombok.AllArgsConstructor; import lombok.Data; @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/adapters/YoloModelAdapter.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/adapters/YoloModelAdapter.java index ea435af20..451c27108 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/adapters/YoloModelAdapter.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/adapters/YoloModelAdapter.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.adapters; +import java.util.List; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.NoArgsConstructor; @@ -32,8 +33,6 @@ import org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.exception.ND4JIllegalStateException; -import java.util.List; - @Builder @AllArgsConstructor @NoArgsConstructor diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/AbstractParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/AbstractParamInitializer.java index d93c96448..26ff63aae 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/AbstractParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/AbstractParamInitializer.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.api; -import lombok.Getter; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Classifier.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Classifier.java index 631f1bed4..2bc87003b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Classifier.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Classifier.java @@ -20,14 +20,12 @@ package org.deeplearning4j.nn.api; +import java.util.List; import net.brutex.ai.dnn.api.IModel; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; -import java.util.List; - - public interface Classifier extends IModel { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java index b44ee12d0..6921537cc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ITraininableLayerConfiguration.java @@ -20,13 +20,12 @@ package org.deeplearning4j.nn.api; +import java.util.List; import org.deeplearning4j.nn.conf.GradientNormalization; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.List; - public interface ITraininableLayerConfiguration { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java index 7ff694e99..ae6ae8fdb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Layer.java @@ -21,7 +21,7 @@ package org.deeplearning4j.nn.api; -import java.util.Map; +import java.io.Serializable; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -29,10 +29,8 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.LayerHelper; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; - -import java.io.Serializable; +import org.nd4j.linalg.api.ndarray.INDArray; /** * A layer is the highest-level building block in deep learning. A layer is a container that usually diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java index 2505e05f8..c39b30cc9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/ParamInitializer.java @@ -20,13 +20,12 @@ package org.deeplearning4j.nn.api; +import java.util.List; +import java.util.Map; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.List; -import java.util.Map; - /** * Param initializer for a layer * diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java index ae301a40c..e1c75e431 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/Updater.java @@ -20,11 +20,10 @@ package org.deeplearning4j.nn.api; -import org.deeplearning4j.nn.gradient.Gradient; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - import java.io.Serializable; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.linalg.api.ndarray.INDArray; /** * Update the model diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/IOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/IOutputLayer.java index 9ae58d28d..1e92aed4d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/IOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/IOutputLayer.java @@ -22,8 +22,8 @@ package org.deeplearning4j.nn.api.layers; import org.deeplearning4j.nn.api.Classifier; import org.deeplearning4j.nn.api.Layer; -import org.nd4j.linalg.api.ndarray.INDArray; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.linalg.api.ndarray.INDArray; public interface IOutputLayer extends Layer, Classifier { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java index cfa82b050..a50fbb7c6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/LayerConstraint.java @@ -20,11 +20,10 @@ package org.deeplearning4j.nn.api.layers; -import org.deeplearning4j.nn.api.Layer; import com.fasterxml.jackson.annotation.JsonTypeInfo; - import java.io.Serializable; import java.util.Set; +import org.deeplearning4j.nn.api.Layer; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") public interface LayerConstraint extends Cloneable, Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java index 61c50b161..083096e5e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/api/layers/RecurrentLayer.java @@ -20,13 +20,12 @@ package org.deeplearning4j.nn.api.layers; +import java.util.Map; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.gradient.Gradient; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Map; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.api.ndarray.INDArray; public interface RecurrentLayer extends Layer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java index 784d768ff..66f9fd859 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ComputationGraphConfiguration.java @@ -20,6 +20,12 @@ package org.deeplearning4j.nn.conf; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.exc.InvalidTypeIdException; +import java.io.IOException; +import java.io.Serializable; +import java.util.*; import lombok.*; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.graph.GraphVertex; @@ -34,6 +40,7 @@ import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffVertex; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.conf.memory.NetworkMemoryReport; +import org.deeplearning4j.nn.conf.serde.CavisMapper; import org.deeplearning4j.nn.conf.serde.JsonMappers; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; @@ -42,16 +49,9 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.exc.InvalidTypeIdException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.io.Serializable; -import java.util.*; - @Data @EqualsAndHashCode(exclude = {"trainingWorkspaceMode", "inferenceWorkspaceMode", "cacheMode", "topologicalOrder", "topologicalOrderStr"}) @AllArgsConstructor(access = AccessLevel.PRIVATE) @@ -110,7 +110,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @return YAML representation of configuration */ public String toYaml() { - ObjectMapper mapper = NeuralNetConfiguration.mapperYaml(); + ObjectMapper mapper = CavisMapper.getMapper(CavisMapper.Type.YAML); synchronized (mapper) { try { return mapper.writeValueAsString(this); @@ -127,7 +127,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { * @return {@link ComputationGraphConfiguration} */ public static ComputationGraphConfiguration fromYaml(String json) { - ObjectMapper mapper = NeuralNetConfiguration.mapperYaml(); + ObjectMapper mapper = CavisMapper.getMapper(CavisMapper.Type.YAML); try { return mapper.readValue(json, ComputationGraphConfiguration.class); } catch (IOException e) { @@ -140,7 +140,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { */ public String toJson() { //As per NeuralNetConfiguration.toJson() - ObjectMapper mapper = NeuralNetConfiguration.mapper(); + ObjectMapper mapper =CavisMapper.getMapper(CavisMapper.Type.JSON); synchronized (mapper) { //JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields occasionally //when writeValueAsString is used by multiple threads. This results in invalid JSON. See issue #3243 @@ -160,7 +160,7 @@ public class ComputationGraphConfiguration implements Serializable, Cloneable { */ public static ComputationGraphConfiguration fromJson(String json) { //As per NeuralNetConfiguration.fromJson() - ObjectMapper mapper = NeuralNetConfiguration.mapper(); + ObjectMapper mapper = CavisMapper.getMapper(CavisMapper.Type.JSON); ComputationGraphConfiguration conf; try { conf = mapper.readValue(json, ComputationGraphConfiguration.class); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/DataFormat.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/DataFormat.java index e8bd06860..fca65fb11 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/DataFormat.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/DataFormat.java @@ -19,10 +19,10 @@ */ package org.deeplearning4j.nn.conf; -import org.deeplearning4j.nn.conf.serde.format.DataFormatDeserializer; -import org.deeplearning4j.nn.conf.serde.format.DataFormatSerializer; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import org.deeplearning4j.nn.conf.serde.format.DataFormatDeserializer; +import org.deeplearning4j.nn.conf.serde.format.DataFormatSerializer; @JsonSerialize(using = DataFormatSerializer.class) @JsonDeserialize(using = DataFormatDeserializer.class) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java index 9667f4909..d14787a9a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/InputPreProcessor.java @@ -21,14 +21,13 @@ package org.deeplearning4j.nn.conf; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import java.io.Serializable; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -import java.io.Serializable; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.api.ndarray.INDArray; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") public interface InputPreProcessor extends Serializable, Cloneable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java index 578f94fd7..b69eb174f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java @@ -21,10 +21,9 @@ package org.deeplearning4j.nn.conf; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ArrayNode; +import java.util.*; import lombok.*; import lombok.experimental.SuperBuilder; import lombok.extern.slf4j.Slf4j; @@ -35,10 +34,8 @@ import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.dropout.IDropout; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.serde.JsonMappers; import org.deeplearning4j.nn.conf.stepfunctions.StepFunction; import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.nn.weights.IWeightInit; @@ -47,7 +44,6 @@ import org.deeplearning4j.nn.weights.WeightInitDistribution; import org.deeplearning4j.nn.weights.WeightInitXavier; import org.deeplearning4j.util.NetworkUtils; import org.nd4j.common.base.Preconditions; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.learning.config.IUpdater; @@ -57,9 +53,6 @@ import org.nd4j.linalg.learning.regularization.L2Regularization; import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.WeightDecay; -import java.io.IOException; -import java.util.*; - /** * Deeplearning4j is a domain-specific language to configure deep neural networks, which are made of * multiple layers. Everything starts with a NeuralNetConfiguration, which organizes those layers @@ -331,7 +324,6 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor */ @Getter @Setter @lombok.Builder.Default private IUpdater biasUpdater = null; - /** * Weight initialization scheme to use, for initial weight values Note: values set by this method * will be applied to all applicable layers in the network, unless a different value is explicitly @@ -339,6 +331,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * and can be overridden on a per-layer basis. */ @Getter @Setter @lombok.Builder.Default private IWeightInit weightInit = new WeightInitXavier(); + /** * Sets the convolution mode for convolutional layers, which impacts padding and output sizes. See * {@link ConvolutionMode} for details. Defaults to ConvolutionMode.TRUNCATE
@@ -416,113 +409,6 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor @Getter @Setter @lombok.Builder.Default private double biasInit = 0.0; @Getter @Setter @lombok.Builder.Default private double gainInit = 1.0; - /** - * Handle {@link WeightInit} and {@link Distribution} from legacy configs in Json format. Copied - * from handling of {@link Activation} above. - * - * @return True if all is well and layer iteration shall continue. False else-wise. - */ - private static boolean handleLegacyWeightInitFromJson( - String json, LayerConfiguration l, ObjectMapper mapper, JsonNode confs, int layerCount) { - if ((l instanceof BaseLayerConfiguration) - && ((BaseLayerConfiguration) l).getWeightInit() == null) { - try { - JsonNode jsonNode = mapper.readTree(json); - if (confs == null) { - confs = jsonNode.get("confs"); - } - if (confs instanceof ArrayNode) { - ArrayNode layerConfs = (ArrayNode) confs; - JsonNode outputLayerNNCNode = layerConfs.get(layerCount); - if (outputLayerNNCNode == null) { - return false; // Should never happen... - } - JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); - - if (layerWrapperNode == null || layerWrapperNode.size() != 1) { - return true; - } - - JsonNode layerNode = layerWrapperNode.elements().next(); - JsonNode weightInit = - layerNode.get("weightInit"); // Should only have 1 element: "dense", "output", etc - JsonNode distribution = layerNode.get("dist"); - - Distribution dist = null; - if (distribution != null) { - dist = mapper.treeToValue(distribution, Distribution.class); - } - - if (weightInit != null) { - final IWeightInit wi = - WeightInit.valueOf(weightInit.asText()).getWeightInitFunction(dist); - ((BaseLayerConfiguration) l).setWeightInit(wi); - } - } - - } catch (IOException e) { - log.warn( - "ILayer with null WeightInit detected: " + l.getName() + ", could not parse JSON", - e); - } - } - return true; - } - - /** - * Object mapper for serialization of configurations - * - * @return - */ - public static ObjectMapper mapperYaml() { - return JsonMappers.getMapperYaml(); - } - - /** - * Object mapper for serialization of configurations - * - * @return - */ - public static ObjectMapper mapper() { - return JsonMappers.getMapper(); - } - - public static NeuralNetBaseBuilderConfiguration fromYaml(String input) { - throw new RuntimeException("Needs fixing - not supported."); // TODO - } - - /** - * @return JSON representation of NN configuration - */ - public String toYaml() { - ObjectMapper mapper = NeuralNetBaseBuilderConfiguration.mapperYaml(); - synchronized (mapper) { - try { - return mapper.writeValueAsString(this); - } catch (com.fasterxml.jackson.core.JsonProcessingException e) { - throw new RuntimeException(e); - } - } - } - - /** - * @return JSON representation of NN configuration - */ - public String toJson() { - ObjectMapper mapper = NeuralNetBaseBuilderConfiguration.mapper(); - synchronized (mapper) { - // JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields - // occasionally - // when writeValueAsString is used by multiple threads. This results in invalid JSON. See - // issue #3243 - try { - return mapper.writeValueAsString(this); - } catch (com.fasterxml.jackson.core.JsonProcessingException e) { - throw new RuntimeException(e); - } - } - } - @Override public NeuralNetBaseBuilderConfiguration clone() { NeuralNetBaseBuilderConfiguration clone; @@ -561,14 +447,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor List innerConfigurations$value = new ArrayList<>(); // initialize with an empty list - public B activation(Activation activation) { - this.activation = activation; - return self(); - } - public B activation(IActivation activation) { - this.activation = activation; - return self(); - } + /** * Set constraints to be applied to all layers. Default: no constraints.
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm @@ -583,7 +462,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor public B constrainWeights(LayerConstraint... constraints) { constrainWeights$value = Arrays.asList(constraints); constrainWeights$set = true; - return (B) this; + return self(); } /** @@ -618,7 +497,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor public B constrainAllParameters(LayerConstraint... constraints) { allParamConstraints$value = Arrays.asList(constraints); allParamConstraints$set = true; - return (B) this; + return self(); } /** @@ -635,7 +514,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor public B constrainBias(LayerConstraint... constraints) { biasConstraints$value = Arrays.asList(constraints); biasConstraints$set = true; - return (B) this; + return self(); } /** @@ -645,10 +524,11 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param processor what to use to preProcess the data. * @return builder pattern */ - public B inputPreProcessor(Integer layer, InputPreProcessor processor) { + public B inputPreProcessor(@NonNull Integer layer, @NonNull InputPreProcessor processor) { + if(inputPreProcessors$value==null) inputPreProcessors$value=new LinkedHashMap<>(); inputPreProcessors$value.put(layer, processor); inputPreProcessors$set = true; - return (B) this; + return self(); } /** @@ -658,7 +538,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param layer the layer * @return builder */ - public B layer(Integer index, @NonNull LayerConfiguration layer) { + public B layer(@NonNull Integer index, @NonNull LayerConfiguration layer) { innerConfigurations$value.add(index, layer); innerConfigurations$set = true; return self(); @@ -680,10 +560,11 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param layer the layer * @return builder */ + @JsonIgnore public B layer(@NonNull LayerConfiguration layer) { innerConfigurations$value.add(layer); innerConfigurations$set = true; - return (B) this; + return self(); } public B layer(@NonNull LayerConfiguration.LayerConfigurationBuilder layer) { return this.layer(layer.build()); @@ -699,7 +580,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor public B layersFromArray(@NonNull LayerConfiguration[] arrLayers) { innerConfigurations$value.addAll(List.of(arrLayers)); innerConfigurations$set = true; - return (B) this; + return self(); } /** Specify additional layer configurations */ @@ -707,7 +588,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor public B layersFromList(@NonNull List listLayers) { innerConfigurations$value.addAll(listLayers); innerConfigurations$set = true; - return (B) this; + return self(); } /** @@ -723,7 +604,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor regularization$value.add(new L1Regularization(l1)); } regularization$set = true; - return (B) this; + return self(); } /** @@ -751,7 +632,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor regularization$value.add(new L2Regularization(l2)); } regularization$set = true; - return (B) this; + return self(); } /** @@ -766,7 +647,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor regularizationBias$value.add(new L1Regularization(l1Bias)); } regularizationBias$set = true; - return (B) this; + return self(); } /** @@ -791,7 +672,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor "L2 bias regularization removed: incompatible with added WeightDecay regularization"); regularizationBias$value.add(new L2Regularization(l2Bias)); } - return (B) this; + return self(); } /** @@ -833,7 +714,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor regularization$value.add(new WeightDecay(coefficient, applyLR)); } regularization$set = true; - return (B) this; + return self(); } /** @@ -870,7 +751,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor regularizationBias$value.add(new WeightDecay(coefficient, applyLR)); } regularization$set = true; - return (B) this; + return self(); } @@ -881,7 +762,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor */ @Deprecated public B list() { - return (B) this; + return self(); } /** @@ -893,23 +774,24 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * * @param distribution Distribution to use for weight initialization */ - @JsonIgnore + @JsonIgnore @Deprecated public B weightInit(Distribution distribution) { this.weightInit$value = new WeightInitDistribution(distribution); this.weightInit$set = true; - return (B) this; + return self(); } @JsonIgnore public B weightInit(WeightInit weightInit) { this.weightInit$value = weightInit.getWeightInitFunction(); this.weightInit$set = true; - return (B) this; + return self(); } + @JsonProperty("weightInit") //this is needed for Jackson < 2.4, otherwise JsonIgnore on the other setters will ignore this also public B weightInit(IWeightInit iWeightInit) { this.weightInit$value = iWeightInit; this.weightInit$set = true; - return (B) this; + return self(); } /** @@ -918,12 +800,13 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @param distribution * @return */ + @JsonIgnore public B dist(@NonNull Distribution distribution) { - return (B) weightInit(distribution); + return weightInit(distribution); } public B dropOut(@NonNull IDropout dropout) { - return (B) idropOut(dropout); + return idropOut(dropout); } /** @@ -933,7 +816,7 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor * @return builder */ public B dropOut(double dropout) { - return (B) idropOut(new Dropout(dropout)); + return idropOut(new Dropout(dropout)); } /** @@ -946,7 +829,8 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor public B confs(@NonNull List confs) { innerConfigurations$value.addAll(confs); innerConfigurations$set = true; - return (B) this; + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index e6042e3d6..fcdb56125 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -22,39 +22,26 @@ package org.deeplearning4j.nn.conf; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.exc.InvalidTypeIdException; -import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.*; +import java.util.*; +import java.util.stream.Collectors; import lombok.*; import lombok.experimental.SuperBuilder; import lombok.extern.jackson.Jacksonized; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; -import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.conf.memory.NetworkMemoryReport; -import org.deeplearning4j.nn.conf.serde.JsonMappers; -import org.deeplearning4j.nn.weights.IWeightInit; -import org.deeplearning4j.nn.weights.WeightInit; +import org.deeplearning4j.nn.conf.serde.CavisMapper; import org.deeplearning4j.util.OutputLayerUtil; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.Sgd; -import org.nd4j.linalg.lossfunctions.LossFunctions; -import org.nd4j.linalg.lossfunctions.impl.LossBinaryXENT; -import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; -import org.nd4j.linalg.lossfunctions.impl.LossMSE; -import org.nd4j.linalg.lossfunctions.impl.LossNegativeLogLikelihood; - -import java.io.IOException; -import java.util.*; -import java.util.stream.Collectors; /** * Deeplearning4j is a domain-specific language to configure deep neural networks, which are made of @@ -62,71 +49,50 @@ import java.util.stream.Collectors; * and their hyperparameters. Hyperparameters are variables that determine how a neural network * learns. They include how many times to update the weights of the model, how to initialize those * weights, which activation function to attach to the nodes, which optimization algorithm to use, - * and how fast the model should learn. This is what one configuration would look like: - *

- * - * NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
- * .weightInit(WeightInit.XAVIER) .activation(Activation.RELU)
- * .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
- * .updater(new Sgd(0.05)) //... other hyperparameters
- * .backprop(true)
- * .build();

- * - * With Deeplearning4j, you add a layer - * by calling layer on the NeuralNetConfiguration.NeuralNetConfigurationBuilder(), specifying its place in the order of + * and how fast the model should learn. This is what one configuration would look like:
+ *
+ * NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
+ * .weightInit(WeightInit.XAVIER) .activation(Activation.RELU)
+ * .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+ * .updater(new Sgd(0.05)) //... other hyperparameters
+ * .backprop(true)
+ * .build();
+ *
+ * With Deeplearning4j, you add a layer by calling layer on the + * NeuralNetConfiguration.NeuralNetConfigurationBuilder(), specifying its place in the order of * layers (the zero-indexed layer below is the input layer), the number of input and output nodes, - * nIn and nOut, as well as the type: DenseLayer.

- * - * .layer(0, DenseLayer.builder().nIn(784).nOut(250)
- * .build())

- * - * Once you've configured your net, you train the - * model with model.fit. + * nIn and nOut, as well as the type: DenseLayer.
+ *
+ * .layer(0, DenseLayer.builder().nIn(784).nOut(250)
+ * .build())
+ *
+ * Once you've configured your net, you train the model with model.fit. */ - - @Data @Slf4j -@Jacksonized -@JsonIgnoreProperties(value={"net"}, ignoreUnknown = true) -@EqualsAndHashCode(exclude = {"net"}, callSuper = true) -//@JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") +@JsonIgnoreProperties(value = {"net"}) +@EqualsAndHashCode(callSuper = true) +// @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") -//The inner builder, that we can then extend ... -@SuperBuilder //TODO fix access +// The inner builder, that we can then extend ... +@Jacksonized +@SuperBuilder // TODO fix access public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { - - private IModel net; private static final int DEFAULT_TBPTT_LENGTH = 20; - private boolean initCalled = false; - - @Getter - @Setter - @NonNull - @lombok.Builder.Default - @Deprecated + @Getter @Setter @NonNull @lombok.Builder.Default @Deprecated protected WorkspaceMode trainingWorkspaceMode = WorkspaceMode.ENABLED; - @Getter - @Setter - @NonNull - @lombok.Builder.Default - @Deprecated + + @Getter @Setter @NonNull @lombok.Builder.Default @Deprecated protected WorkspaceMode inferenceWorkspaceMode = WorkspaceMode.ENABLED; - - @Getter - @Setter - @lombok.Builder.Default - protected int iterationCount = 0; - //Counter for the number of epochs completed so far. Used for per-epoch schedules - @Getter - @Setter - @lombok.Builder.Default - protected int epochCount = 0; - @lombok.Builder.Default - protected double dampingFactor = 100; + @Getter @Setter @lombok.Builder.Default protected int iterationCount = 0; + // Counter for the number of epochs completed so far. Used for per-epoch schedules + @Getter @Setter @lombok.Builder.Default protected int epochCount = 0; + @lombok.Builder.Default protected double dampingFactor = 100; + @EqualsAndHashCode.Exclude private IModel net; + private boolean initCalled = false; // gradient keys used for ensuring order when getting and setting the gradient @lombok.Builder.Default private LinkedHashSet netWideVariables = new LinkedHashSet<>(); @@ -141,22 +107,19 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { */ @Getter @Setter @Builder.Default private IUpdater updater = new Sgd(); - /** - * Sets the cuDNN algo mode for convolutional layers, which impacts performance and memory usage of cuDNN. - * See {@link ConvolutionLayer.AlgoMode} for details. Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. - *
- * Note: values set by this method will be applied to all applicable layers in the network, unless a different - * value is explicitly set on a given layer. In other words: values set via this method are used as the default - * value, and can be overridden on a per-layer basis. + * Sets the cuDNN algo mode for convolutional layers, which impacts performance and memory usage + * of cuDNN. See {@link ConvolutionLayer.AlgoMode} for details. Defaults to "PREFER_FASTEST", but + * "NO_WORKSPACE" uses less memory.
+ * Note: values set by this method will be applied to all applicable layers in the network, unless + * a different value is explicitly set on a given layer. In other words: values set via this + * method are used as the default value, and can be overridden on a per-layer basis. + * * @param cudnnAlgoMode cuDNN algo mode to use */ - @Getter - @Setter - @lombok.Builder.Default + @Getter @Setter @lombok.Builder.Default private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST; - /** * Create a neural net configuration from json * @@ -164,260 +127,23 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { * @return {@link NeuralNetConfiguration} */ public static NeuralNetConfiguration fromJson(String json) { - NeuralNetConfiguration conf; - ObjectMapper mapper = NeuralNetConfiguration.mapper(); + ObjectMapper mapper = CavisMapper.getMapper(CavisMapper.Type.JSON); try { - conf = mapper.readValue(json, NeuralNetConfiguration.class); - } catch (InvalidTypeIdException e) { - if (e.getMessage().contains("@class")) { - try { - //JSON may be legacy (1.0.0-alpha or earlier), attempt to load it using old format - return JsonMappers.getLegacyMapper().readValue(json, NeuralNetConfiguration.class); - } catch (InvalidTypeIdException e2) { - //Check for legacy custom layers: "Could not resolve type id 'CustomLayer' as a subtype of [simple type, class org.deeplearning4j.nn.conf.layers.ILayer]: known type ids = [Bidirectional, CenterLossOutputLayer, CnnLossLayer, ..." - //1.0.0-beta5: dropping support for custom layers defined in pre-1.0.0-beta format. Built-in layers from these formats still work - String msg = e2.getMessage(); - if (msg != null && msg.contains("Could not resolve type id")) { - throw new RuntimeException( - "Error deserializing NeuralNetConfiguration - configuration may have a custom " + - "layer, vertex or preprocessor, in pre version 1.0.0-beta JSON format.\nModels in legacy format with custom" - + - " layers should be loaded in 1.0.0-beta to 1.0.0-beta4 and saved again, before loading in the current version of DL4J", - e); - } - throw new RuntimeException(e2); - } catch (IOException e2) { - throw new RuntimeException(e2); - } - } - throw new RuntimeException(e); - } catch (IOException e) { - //Check if this exception came from legacy deserializer... - String msg = e.getMessage(); - if (msg != null && msg.contains("legacy")) { - throw new RuntimeException( - "Error deserializing NeuralNetConfiguration - configuration may have a custom " + - "layer, vertex or preprocessor, in pre version 1.0.0-alpha JSON format. These layers can be " - + - "deserialized by first registering them with NeuralNetConfiguration.registerLegacyCustomClassesForJSON(Class...)", - e); - } + return mapper.readValue(json, NeuralNetConfiguration.class); + } catch (JsonProcessingException e) { throw new RuntimeException(e); } - - //To maintain backward compatibility after loss function refactoring (configs generated with v0.5.0 or earlier) - // Previously: enumeration used for loss functions. Now: use classes - // IN the past, could have only been an OutputLayer or RnnOutputLayer using these enums - int layerCount = 0; - JsonNode confs = null; - for (LayerConfiguration nnc : conf.getFlattenedLayerConfigurations()) { - LayerConfiguration l = nnc; - if (l instanceof BaseOutputLayer && ((BaseOutputLayer) l).getLossFunction() == null) { - //lossFn field null -> may be an old config format, with lossFunction field being for the enum - //if so, try walking the JSON graph to extract out the appropriate enum value - - BaseOutputLayer ol = (BaseOutputLayer) l; - try { - JsonNode jsonNode = mapper.readTree(json); - if (confs == null) { - confs = jsonNode.get("confs"); - } - if (confs instanceof ArrayNode) { - ArrayNode layerConfs = (ArrayNode) confs; - JsonNode outputLayerNNCNode = layerConfs.get(layerCount); - if (outputLayerNNCNode == null) { - throw new RuntimeException( - "should never happen"); //return conf; //Should never happen... - } - JsonNode outputLayerNode = outputLayerNNCNode.get("layer"); - - JsonNode lossFunctionNode = null; - if (outputLayerNode.has("output")) { - lossFunctionNode = outputLayerNode.get("output").get("lossFunction"); - } else if (outputLayerNode.has("rnnoutput")) { - lossFunctionNode = outputLayerNode.get("rnnoutput").get("lossFunction"); - } - - if (lossFunctionNode != null) { - String lossFunctionEnumStr = lossFunctionNode.asText(); - LossFunctions.LossFunction lossFunction = null; - try { - lossFunction = LossFunctions.LossFunction.valueOf(lossFunctionEnumStr); - } catch (Exception e) { - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON", - e); - } - - if (lossFunction != null) { - switch (lossFunction) { - case MSE: - ol.setLossFunction(new LossMSE()); - break; - case XENT: - ol.setLossFunction(new LossBinaryXENT()); - break; - case NEGATIVELOGLIKELIHOOD: - ol.setLossFunction(new LossNegativeLogLikelihood()); - break; - case MCXENT: - ol.setLossFunction(new LossMCXENT()); - break; - - //Remaining: TODO - case SQUARED_LOSS: - case RECONSTRUCTION_CROSSENTROPY: - default: - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not set loss function for {}", - lossFunction); - break; - } - } - } - - } else { - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON: layer 'confs' field is not an ArrayNode (is: {})", - (confs != null ? confs.getClass() : null)); - } - } catch (IOException e) { - log.warn( - "OutputLayer with null LossFunction or pre-0.6.0 loss function configuration detected: could not parse JSON", - e); - break; - } - } - - //Also, pre 0.7.2: activation functions were Strings ("activationFunction" field), not classes ("activationFn") - //Try to load the old format if necessary, and create the appropriate IActivation instance - if ((l instanceof BaseLayerConfiguration) && ((BaseLayerConfiguration) l).getActivationFn() == null) { - try { - JsonNode jsonNode = mapper.readTree(json); - if (confs == null) { - confs = jsonNode.get("confs"); - } - if (confs instanceof ArrayNode) { - ArrayNode layerConfs = (ArrayNode) confs; - JsonNode outputLayerNNCNode = layerConfs.get(layerCount); - if (outputLayerNNCNode == null) { - throw new RuntimeException( - "Should never happen"); //return conf; //Should never happen... - } - JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); - - if (layerWrapperNode == null || layerWrapperNode.size() != 1) { - continue; - } - - JsonNode layerNode = layerWrapperNode.elements().next(); - JsonNode activationFunction = layerNode.get( - "activationFunction"); //Should only have 1 element: "dense", "output", etc - - if (activationFunction != null) { - Activation ia = Activation.fromString(activationFunction.asText()); - ((BaseLayerConfiguration) l).setActivation(ia.getActivationFunction()); - } - } - - } catch (IOException e) { - log.warn( - "ILayer with null ActivationFn field or pre-0.7.2 activation function detected: could not parse JSON", - e); - } - } - - if (!handleLegacyWeightInitFromJson(json, l, mapper, confs, layerCount)) { - return conf; - } - - layerCount++; - } - return conf; - } - - /** - * Handle {@link WeightInit} and {@link Distribution} from legacy configs in Json format. Copied - * from handling of {@link Activation} above. - * - * @return True if all is well and layer iteration shall continue. False else-wise. - */ - private static boolean handleLegacyWeightInitFromJson(String json, LayerConfiguration l, - ObjectMapper mapper, - JsonNode confs, int layerCount) { - if ((l instanceof BaseLayerConfiguration) && ((BaseLayerConfiguration) l).getWeightInit() == null) { - try { - JsonNode jsonNode = mapper.readTree(json); - if (confs == null) { - confs = jsonNode.get("confs"); - } - if (confs instanceof ArrayNode) { - ArrayNode layerConfs = (ArrayNode) confs; - JsonNode outputLayerNNCNode = layerConfs.get(layerCount); - if (outputLayerNNCNode == null) { - return false; //Should never happen... - } - JsonNode layerWrapperNode = outputLayerNNCNode.get("layer"); - - if (layerWrapperNode == null || layerWrapperNode.size() != 1) { - return true; - } - - JsonNode layerNode = layerWrapperNode.elements().next(); - JsonNode weightInit = layerNode.get( - "weightInit"); //Should only have 1 element: "dense", "output", etc - JsonNode distribution = layerNode.get("dist"); - - Distribution dist = null; - if (distribution != null) { - dist = mapper.treeToValue(distribution, Distribution.class); - } - - if (weightInit != null) { - final IWeightInit wi = WeightInit.valueOf(weightInit.asText()) - .getWeightInitFunction(dist); - ((BaseLayerConfiguration) l).setWeightInit(wi); - } - } - - } catch (IOException e) { - log.warn( - "ILayer with null WeightInit detected: " + l.getName() + ", could not parse JSON", - e); - } - } - return true; - - } - - /** - * Object mapper for serialization of configurations - * - * @return - */ - public static ObjectMapper mapperYaml() { - return JsonMappers.getMapperYaml(); - } - - /** - * Object mapper for serialization of configurations - * - * @return - */ - public static ObjectMapper mapper() { - return JsonMappers.getMapper(); } public static NeuralNetConfiguration fromYaml(String input) { - throw new RuntimeException("Needs fixing - not supported."); //TODO + throw new RuntimeException("Needs fixing - not supported."); // TODO } - /** * @return JSON representation of NN configuration */ public String toYaml() { - ObjectMapper mapper = NeuralNetConfiguration.mapperYaml(); + ObjectMapper mapper = CavisMapper.getMapper(CavisMapper.Type.YAML); synchronized (mapper) { try { return mapper.writeValueAsString(this); @@ -431,10 +157,12 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { * @return JSON representation of NN configuration */ public String toJson() { - ObjectMapper mapper = NeuralNetConfiguration.mapper(); + ObjectMapper mapper = CavisMapper.getMapper(CavisMapper.Type.JSON); synchronized (mapper) { - //JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields occasionally - //when writeValueAsString is used by multiple threads. This results in invalid JSON. See issue #3243 + // JSON mappers are supposed to be thread safe: however, in practice they seem to miss fields + // occasionally + // when writeValueAsString is used by multiple threads. This results in invalid JSON. See + // issue #3243 try { return mapper.writeValueAsString(this); } catch (com.fasterxml.jackson.core.JsonProcessingException e) { @@ -453,7 +181,9 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { public NeuralNetConfiguration clone() { NeuralNetConfiguration clone; clone = (NeuralNetConfiguration) super.clone(); - if(getStepFunction() != null) { clone.setStepFunction(getStepFunction().clone()); } + if (getStepFunction() != null) { + clone.setStepFunction(getStepFunction().clone()); + } clone.netWideVariables = new LinkedHashSet<>(netWideVariables); clone.setInnerConfigurations(new ArrayList<>(innerConfigurations)); @@ -473,98 +203,109 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { clone.setDataType(this.getDataType()); return clone; - } - /** - * - */ + /** */ @Override public void init() { - if(initCalled) return; - initCalled=true; + if (initCalled) return; + initCalled = true; - /** - * Run init() for each layer - */ - for( NeuralNetConfiguration nconf : getNetConfigurations() ) { + /** Run init() for each layer */ + for (NeuralNetConfiguration nconf : getNetConfigurations()) { nconf.init(); } - //getNetConfigurations().stream().forEach( conf -> { - // conf.init(); //do not call on self - //}); //call init on all embedded net configurations + // getNetConfigurations().stream().forEach( conf -> { + // conf.init(); //do not call on self + // }); //call init on all embedded net configurations - //TODO do not put inside self to avoid serialization issues + // TODO do not put inside self to avoid serialization issues // innerConfigurations.add(0, this); //put this configuration at first place + + getLayerConfigurations().stream() + .forEach( + lconf -> + lconf.setNetConfiguration( + this)); // set this as net config for all layers (defined in here, not stacked + + /** - * Inherit network wide configuration setting to those layer configurations - * that do not have an individual setting (nor a default) + * Inherit network wide configuration setting to those layer configurations that do not have an + * individual setting (nor a default) */ - for(LayerConfiguration lconf : this.getFlattenedLayerConfigurations()) { + for (LayerConfiguration lconf : this.getFlattenedLayerConfigurations()) { lconf.runInheritance(); } - getLayerConfigurations().stream().forEach( lconf -> lconf.setNetConfiguration(this)); //set this as net config for all layers (defined in here, not stacked - - //Validate BackpropType setting + // Validate BackpropType setting if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH) && backpropType != BackpropType.TruncatedBPTT) { - log.warn("Truncated backpropagation through time lengths have been configured with values " - + tbpttFwdLength - + " and " + tbpttBackLength + " but backprop type is set to " + backpropType - + ". TBPTT configuration" + - " settings will only take effect if backprop type is set to BackpropType.TruncatedBPTT"); + log.warn( + "Truncated backpropagation through time lengths have been configured with values " + + tbpttFwdLength + + " and " + + tbpttBackLength + + " but backprop type is set to " + + backpropType + + ". TBPTT configuration" + + " settings will only take effect if backprop type is set to BackpropType.TruncatedBPTT"); } if (backpropType == BackpropType.TruncatedBPTT && isValidateTbpttConfig()) { - //Check for invalid combination - tbptt plus LastTimeStepLayer or + // Check for invalid combination - tbptt plus LastTimeStepLayer or for (int i = 0; i < getFlattenedLayerConfigurations().size(); i++) { LayerConfiguration l = getFlattenedLayerConfigurations().get(i); if (l instanceof LastTimeStep || l instanceof GlobalPoolingLayer) { throw new IllegalStateException( "Invalid network configuration detected: Truncated backpropagation through time (TBPTT)" - + - " cannot be used with layer " + i + " of type " + l.getClass().getName() - + ": TBPTT is incompatible with this layer type (which is designed " + - "to process entire sequences at once, and does support the type of sequence segments that TPBTT uses).\n" - + - "This check can be disabled using validateTbpttConfig(false) but this is not recommended."); + + " cannot be used with layer " + + i + + " of type " + + l.getClass().getName() + + ": TBPTT is incompatible with this layer type (which is designed " + + "to process entire sequences at once, and does support the type of sequence segments that TPBTT uses).\n" + + "This check can be disabled using validateTbpttConfig(false) but this is not recommended."); } } } if (getInputType() == null && inputPreProcessors.get(0) == null) { - //User hasn't set the InputType. Sometimes we can infer it... - // For example, Dense/RNN layers, where preprocessor isn't set -> user is *probably* going to feed in + // User hasn't set the InputType. Sometimes we can infer it... + // For example, Dense/RNN layers, where preprocessor isn't set -> user is *probably* going to + // feed in // standard feedforward or RNN data - //This isn't the most elegant implementation, but should avoid breaking backward compatibility here - //Can't infer InputType for CNN layers, however (don't know image dimensions/depth) + // This isn't the most elegant implementation, but should avoid breaking backward + // compatibility here + // Can't infer InputType for CNN layers, however (don't know image dimensions/depth) LayerConfiguration firstLayer = getFlattenedLayerConfigurations().get(0); if (firstLayer instanceof BaseRecurrentLayer) { BaseRecurrentLayer brl = (BaseRecurrentLayer) firstLayer; val nIn = brl.getNIn(); if (nIn > 0) { - setInputType( InputType.recurrent(nIn, brl.getDataFormat())); + setInputType(InputType.recurrent(nIn, brl.getDataFormat())); } - } else if (firstLayer instanceof DenseLayer || firstLayer instanceof EmbeddingLayer + } else if (firstLayer instanceof DenseLayer + || firstLayer instanceof EmbeddingLayer || firstLayer instanceof OutputLayer) { - //Can't just use "instanceof FeedForwardLayer" here. ConvolutionLayer is also a FeedForwardLayer + // Can't just use "instanceof FeedForwardLayer" here. ConvolutionLayer is also a + // FeedForwardLayer FeedForwardLayer ffl = (FeedForwardLayer) firstLayer; val nIn = ffl.getNIn(); if (nIn > 0) { - setInputType( InputType.feedForward(nIn)); + setInputType(InputType.feedForward(nIn)); } } } - //Add preprocessors and set nIns, if InputType has been set + // Add preprocessors and set nIns, if InputType has been set // Builder.inputType field can be set in 1 of 4 ways: // 1. User calls setInputType directly // 2. Via ConvolutionLayerSetup -> internally calls setInputType(InputType.convolutional(...)) - // 3. Via the above code: i.e., assume input is as expected by the RNN or dense layer -> sets the inputType field - if(inputPreProcessors == null) { + // 3. Via the above code: i.e., assume input is as expected by the RNN or dense layer -> sets + // the inputType field + if (inputPreProcessors == null) { inputPreProcessors = new HashMap<>(); } if (getInputType() != null) { @@ -572,7 +313,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { for (int i = 0; i < getFlattenedLayerConfigurations().size(); i++) { LayerConfiguration l = getFlattenedLayerConfigurations().get(i); if (inputPreProcessors.get(i) == null) { - //Don't override preprocessor setting, but set preprocessor if required... + // Don't override preprocessor setting, but set preprocessor if required... @NonNull InputPreProcessor inputPreProcessor = l.getPreProcessorForInputType(currentInputType); if (inputPreProcessor != null) { @@ -586,41 +327,47 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { } if (i > 0) { LayerConfiguration layer = getFlattenedLayerConfigurations().get(i - 1); - //convolution 1d is an edge case where it has rnn input type but the filters - //should be the output - if (layer instanceof Convolution1DLayer) { + // convolution 1d is an edge case where it has rnn input type but the filters + // should be the output + if (layer instanceof Convolution1D || layer instanceof Convolution1DNew) { if (l instanceof DenseLayer && getInputType() instanceof InputType.InputTypeRecurrent) { FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l; if (getInputType() instanceof InputType.InputTypeRecurrent) { - InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) getInputType(); + InputType.InputTypeRecurrent recurrent = + (InputType.InputTypeRecurrent) getInputType(); feedForwardLayer.setNIn(recurrent.getTimeSeriesLength()); } } else { - l.setNIn(currentInputType, - isOverrideNinUponBuild()); //Don't override the nIn setting, if it's manually set by the user + l.setNIn( + currentInputType, + isOverrideNinUponBuild()); // Don't override the nIn setting, if it's manually set + // by the user } } else { - l.setNIn(currentInputType, - isOverrideNinUponBuild()); //Don't override the nIn setting, if it's manually set by the user + l.setNIn( + currentInputType, + isOverrideNinUponBuild()); // Don't override the nIn setting, if it's manually set + // by the user } } else { - l.setNIn(currentInputType, - isOverrideNinUponBuild()); //Don't override the nIn setting, if it's manually set by the user + l.setNIn( + currentInputType, + isOverrideNinUponBuild()); // Don't override the nIn setting, if it's manually set by + // the user } currentInputType = l.getOutputType(i, currentInputType); } - } Nd4j.getRandom().setSeed(getSeed()); - //Validate output layer configuration + // Validate output layer configuration if (isValidateOutputLayerConfig()) { - //Validate output layer configurations... + // Validate output layer configurations... for (LayerConfiguration n : getFlattenedLayerConfigurations()) { - OutputLayerUtil.validateOutputLayer(n.getName(), n); //No-op for non output/loss layers + OutputLayerUtil.validateOutputLayer(n.getName(), n); // No-op for non output/loss layers } } } @@ -646,26 +393,28 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { layerName = String.valueOf(i); } - //Pass input type through preprocessor, if necessary + // Pass input type through preprocessor, if necessary InputPreProcessor preproc = getInputPreProcess(i); - //TODO memory requirements for preprocessor + // TODO memory requirements for preprocessor if (preproc != null) { inputType = preproc.getOutputType(inputType); } - LayerMemoryReport report = getFlattenedLayerConfigurations().get(i).getMemoryReport(inputType); + LayerMemoryReport report = + getFlattenedLayerConfigurations().get(i).getMemoryReport(inputType); memoryReportMap.put(layerName, report); inputType = getFlattenedLayerConfigurations().get(i).getOutputType(i, inputType); } - return new NetworkMemoryReport(memoryReportMap, NeuralNetConfiguration.class, - "MultiLayerNetwork", inputType); + return new NetworkMemoryReport( + memoryReportMap, NeuralNetConfiguration.class, "MultiLayerNetwork", inputType); } /** * For the given input shape/type for the network, return a list of activation sizes for each - * layer in the network.
i.e., list.get(i) is the output activation sizes for layer i + * layer in the network.
+ * i.e., list.get(i) is the output activation sizes for layer i * * @param inputType Input type for the network * @return A lits of activation types for the network, indexed by layer number @@ -699,38 +448,47 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { public void addNetWideVariable(String variable) { if (!netWideVariables.contains(variable)) { netWideVariables.add(variable); - log.trace("Adding neural network wide variable '{}' to the list of variables. New length is {}.", variable, netWideVariables.size()); + log.trace( + "Adding neural network wide variable '{}' to the list of variables. New length is {}.", + variable, + netWideVariables.size()); } - log.trace("Skipped adding neural network wide variable '{}' to the list of variables. It was already present. Length remains {}.", variable, netWideVariables.size()); + log.trace( + "Skipped adding neural network wide variable '{}' to the list of variables. It was already present. Length remains {}.", + variable, + netWideVariables.size()); } public void clearNetWideVariable() { netWideVariables.clear(); - log.trace("Adding neural network wide variables have been cleared. New length is {}.", netWideVariables.size()); + log.trace( + "Adding neural network wide variables have been cleared. New length is {}.", + netWideVariables.size()); } - - /** - * From the list of layers and neural net configurations, only return the Layer Configurations that - * are defined in this neural network (it does not include embedded neural network configuration - * layers) + * From the list of layers and neural net configurations, only return the Layer Configurations + * that are defined in this neural network (it does not include embedded neural network + * configuration layers) + * * @return list with layer configurations */ @JsonIgnore public List getLayerConfigurations() { return innerConfigurations.stream() .filter(obj -> (obj instanceof LayerConfiguration)) - .map( obj -> (LayerConfiguration)obj ) - .collect( Collectors.toList()); + .map(obj -> (LayerConfiguration) obj) + .collect(Collectors.toList()); } /** - * From the list of layers and neural net configurations, only return the neural net configurations + * From the list of layers and neural net configurations, only return the neural net + * configurations + * * @return list with neural net configurations */ - //@Synchronized("innerConfigurationsLock") + // @Synchronized("innerConfigurationsLock") @JsonIgnore public List getNetConfigurations() { List list; @@ -751,35 +509,42 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { * @return list of layer configurations */ public List getFlattenedLayerConfigurations(NeuralNetConfiguration conf) { - List ret = new ArrayList<>(); //create the final return list - //When properly initialized, _this_ configuration is set first in the list, however we - //can find cases where this is not true, thus the first configuration is another net or layer configuration - //and should not be skipped. In essence, skip first configuration if that is "this". - //TODO: skipping not needed anymore as we removed _this_ from innerConfigurations + List ret = new ArrayList<>(); // create the final return list + // When properly initialized, _this_ configuration is set first in the list, however we + // can find cases where this is not true, thus the first configuration is another net or layer + // configuration + // and should not be skipped. In essence, skip first configuration if that is "this". + // TODO: skipping not needed anymore as we removed _this_ from innerConfigurations int iSkip = 0; - if(conf.getInnerConfigurations().size()>0 && conf.getInnerConfigurations().get(0).equals(this)) { iSkip=1;} - conf.getInnerConfigurations().stream().skip(iSkip) - .forEach(obj -> { - //if Layer Config, include in list and inherit parameters from this conf - //else if neural net configuration, call self recursively to resolve layer configurations + if (conf.getInnerConfigurations().size() > 0 + && conf.getInnerConfigurations().get(0).equals(this)) { + iSkip = 1; + } + conf.getInnerConfigurations().stream() + .skip(iSkip) + .forEach( + obj -> { + // if Layer Config, include in list and inherit parameters from this conf + // else if neural net configuration, call self recursively to resolve layer + // configurations if (obj instanceof LayerConfiguration) { ((LayerConfiguration) obj).setNetConfiguration(conf); ret.add((LayerConfiguration) obj); - } else if (obj instanceof NeuralNetConfiguration) - ret.addAll(getFlattenedLayerConfigurations( - (NeuralNetConfiguration) obj)); + } else if (obj instanceof NeuralNetConfiguration) + ret.addAll(getFlattenedLayerConfigurations((NeuralNetConfiguration) obj)); else { log.error( - "The list of layers and neural network configurations does contain an object of {}. Element will be ignored.", - obj.getClass().getSimpleName()); + "The list of layers and neural network configurations does contain an object of {}. Element will be ignored.", + obj.getClass().getSimpleName()); } }); return ret; } /** - * Sames as {@link #getFlattenedLayerConfigurations(NeuralNetConfiguration)}, but uses this configurations - * list of configurations + * Sames as {@link #getFlattenedLayerConfigurations(NeuralNetConfiguration)}, but uses this + * configurations list of configurations + * * @return list of layer configurations */ @JsonIgnore @@ -789,6 +554,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { /** * Add a new layer to the first position + * * @param layer configuration */ public void setLayer(@NonNull LayerConfiguration layer) { @@ -801,26 +567,28 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { } /** - * Deprecated, do not use. Workaround for old tests - * and getFlattenedLayerConfigurations().get(0); + * Deprecated, do not use. Workaround for old tests and getFlattenedLayerConfigurations().get(0); + * * @return */ - @Deprecated @JsonIgnore + @Deprecated + @JsonIgnore public LayerConfiguration getFirstLayer() { log.warn("This getFirstLayer method is an ugly workaround and will be removed."); return getFlattenedLayerConfigurations().get(0); } + /* + protected boolean canEqual(final Object other) { + return other instanceof NeuralNetConfiguration; + } + */ - protected boolean canEqual(final Object other) { - return other instanceof NeuralNetConfiguration; - } - - - public static abstract class NeuralNetConfigurationBuilder> extends - NeuralNetBaseBuilderConfigurationBuilder { + public abstract static class NeuralNetConfigurationBuilder< + C extends NeuralNetConfiguration, + B extends NeuralNetConfiguration.NeuralNetConfigurationBuilder> + extends NeuralNetBaseBuilderConfigurationBuilder { public ComputationGraphConfiguration.GraphBuilder graphBuilder() { return new ComputationGraphConfiguration.GraphBuilder(this); @@ -829,10 +597,9 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { public NeuralNetConfigurationBuilder clone() { try { return (NeuralNetConfigurationBuilder) super.clone(); - } catch(CloneNotSupportedException ex) { + } catch (CloneNotSupportedException ex) { throw new RuntimeException(ex); } } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java index 84a2d1c3a..918bcf696 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java @@ -21,7 +21,13 @@ package org.deeplearning4j.nn.conf; +/** + * N is the batch size
+ * C is the number of feature maps (that is,, number of channels)
+ * H is the image height (not used for 1D conv as this is an RNN format
+ * W is the image width
+ * **/ public enum RNNFormat implements DataFormat { - NCW, - NWC + /** n=batch size; c=channels/ features; w=width **/ NCW, + /** n=batch size; w=width; c=channels/ features **/ NWC } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java index f9a3e81f0..50d9658bf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/BaseConstraint.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.conf.constraint; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; import lombok.*; import org.apache.commons.lang3.ArrayUtils; import org.deeplearning4j.nn.api.Layer; @@ -27,11 +30,6 @@ import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - - @AllArgsConstructor @EqualsAndHashCode @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java index a38e6dfcf..fc5104d0c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MaxNormConstraint.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.constraint; +import java.util.Collections; +import java.util.Set; import lombok.Data; import lombok.EqualsAndHashCode; import org.nd4j.linalg.api.ndarray.INDArray; @@ -27,9 +29,6 @@ import org.nd4j.linalg.factory.Broadcast; import org.nd4j.linalg.indexing.BooleanIndexing; import org.nd4j.linalg.indexing.conditions.Conditions; -import java.util.Collections; -import java.util.Set; - @Data @EqualsAndHashCode(callSuper = true) public class MaxNormConstraint extends BaseConstraint { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java index ca43d4ca0..77de17be5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/MinMaxNormConstraint.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.constraint; +import java.util.Collections; +import java.util.Set; import lombok.Data; import lombok.EqualsAndHashCode; import org.nd4j.linalg.api.ndarray.INDArray; @@ -27,11 +29,6 @@ import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.factory.Broadcast; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.indexing.BooleanIndexing; -import org.nd4j.linalg.indexing.conditions.Conditions; - -import java.util.Collections; -import java.util.Set; @Data @EqualsAndHashCode(callSuper = true) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java index 3e80f341b..f0d41ceb8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/constraint/UnitNormConstraint.java @@ -20,14 +20,13 @@ package org.deeplearning4j.nn.conf.constraint; +import java.util.Collections; +import java.util.Set; import lombok.Data; import lombok.EqualsAndHashCode; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Broadcast; -import java.util.Collections; -import java.util.Set; - @Data @EqualsAndHashCode(callSuper = true) public class UnitNormConstraint extends BaseConstraint { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/ConstantDistribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/ConstantDistribution.java index 9d50bcc6b..00d7b6157 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/ConstantDistribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/ConstantDistribution.java @@ -20,10 +20,10 @@ package org.deeplearning4j.nn.conf.distribution; -import lombok.Data; -import lombok.EqualsAndHashCode; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; @Data @EqualsAndHashCode(callSuper = false) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distribution.java index f9cfb41cd..768136f51 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/Distribution.java @@ -20,13 +20,10 @@ package org.deeplearning4j.nn.conf.distribution; -import org.deeplearning4j.nn.conf.distribution.serde.LegacyDistributionHelper; import com.fasterxml.jackson.annotation.JsonTypeInfo; - import java.io.Serializable; -@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "type", - defaultImpl = LegacyDistributionHelper.class) +@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "@class") public abstract class Distribution implements Serializable, Cloneable { private static final long serialVersionUID = 5401741214954998498L; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/LogNormalDistribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/LogNormalDistribution.java index 5a2d12035..b2f2b9c1e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/LogNormalDistribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/LogNormalDistribution.java @@ -20,10 +20,10 @@ package org.deeplearning4j.nn.conf.distribution; -import lombok.Data; -import lombok.EqualsAndHashCode; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; /** * A log-normal distribution, with two parameters: mean and standard deviation. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/NormalDistribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/NormalDistribution.java index 1c867a6ff..74cd7b5d2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/NormalDistribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/NormalDistribution.java @@ -20,10 +20,10 @@ package org.deeplearning4j.nn.conf.distribution; -import lombok.Data; -import lombok.EqualsAndHashCode; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; /** * A normal (Gaussian) distribution, with two parameters: mean and standard deviation @@ -48,21 +48,7 @@ public class NormalDistribution extends Distribution { this.std = std; } - public double getMean() { - return mean; - } - public void setMean(double mean) { - this.mean = mean; - } - - public double getStd() { - return std; - } - - public void setStd(double std) { - this.std = std; - } @Override public int hashCode() { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java index f34fbc93f..59ee62c0c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java @@ -20,10 +20,10 @@ package org.deeplearning4j.nn.conf.distribution; -import lombok.Data; -import lombok.EqualsAndHashCode; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; /** * Orthogonal distribution, with gain parameter.
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/TruncatedNormalDistribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/TruncatedNormalDistribution.java index 027471534..a769b973c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/TruncatedNormalDistribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/TruncatedNormalDistribution.java @@ -20,10 +20,10 @@ package org.deeplearning4j.nn.conf.distribution; -import lombok.Data; -import lombok.EqualsAndHashCode; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; +import lombok.EqualsAndHashCode; @EqualsAndHashCode(callSuper = false) @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/UniformDistribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/UniformDistribution.java index 0c3e29de5..5f3a6daf3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/UniformDistribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/UniformDistribution.java @@ -20,12 +20,12 @@ package org.deeplearning4j.nn.conf.distribution; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import org.apache.commons.math3.exception.NumberIsTooLargeException; import org.apache.commons.math3.exception.util.LocalizedFormats; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; /** * A uniform distribution, with two parameters: lower and upper - i.e., U(lower,upper) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/serde/LegacyDistributionDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/serde/LegacyDistributionDeserializer.java index ecf9fee12..1b932d2f2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/serde/LegacyDistributionDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/serde/LegacyDistributionDeserializer.java @@ -20,15 +20,13 @@ package org.deeplearning4j.nn.conf.distribution.serde; -import org.deeplearning4j.nn.conf.distribution.*; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonDeserializer; import com.fasterxml.jackson.databind.JsonNode; - import java.io.IOException; +import org.deeplearning4j.nn.conf.distribution.*; public class LegacyDistributionDeserializer extends JsonDeserializer { @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/serde/LegacyDistributionHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/serde/LegacyDistributionHelper.java index 8f1168ef6..92598a6af 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/serde/LegacyDistributionHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/serde/LegacyDistributionHelper.java @@ -20,8 +20,8 @@ package org.deeplearning4j.nn.conf.distribution.serde; -import org.deeplearning4j.nn.conf.distribution.Distribution; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import org.deeplearning4j.nn.conf.distribution.Distribution; @JsonDeserialize(using = LegacyDistributionDeserializer.class) public class LegacyDistributionHelper extends Distribution { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java index e5234e13e..3e4e2aee2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/AlphaDropout.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.dropout; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NonNull; @@ -32,8 +34,6 @@ import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.MulOp; import org.nd4j.linalg.api.ops.random.impl.BernoulliDistribution; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.schedule.ISchedule; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; @Data @EqualsAndHashCode(exclude = {"lastPValue","alphaPrime","a","b", "mask"}) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java index 0078801bc..a72d48ba7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.dropout; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -36,8 +38,6 @@ import org.nd4j.linalg.api.ops.random.impl.DropOutInverted; import org.nd4j.linalg.exception.ND4JOpProfilerException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.schedule.ISchedule; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; @Data @JsonIgnoreProperties({"mask", "helper", "helperCountFail", "initializedHelper"}) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java index 6157ef078..554de0a5a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianDropout.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.dropout; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.workspace.ArrayType; @@ -30,8 +32,6 @@ import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.MulOp; import org.nd4j.linalg.api.ops.random.impl.GaussianDistribution; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.schedule.ISchedule; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; @Data @JsonIgnoreProperties({"noise"}) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java index f33a99150..9bb079ebd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/GaussianNoise.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.dropout; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.api.ndarray.INDArray; @@ -27,7 +28,6 @@ import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.AddOp; import org.nd4j.linalg.api.ops.random.impl.GaussianDistribution; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.schedule.ISchedule; -import com.fasterxml.jackson.annotation.JsonProperty; @Data public class GaussianNoise implements IDropout { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/IDropout.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/IDropout.java index a5aa67288..33d58c053 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/IDropout.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/IDropout.java @@ -20,11 +20,10 @@ package org.deeplearning4j.nn.conf.dropout; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import java.io.Serializable; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -import java.io.Serializable; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") public interface IDropout extends Serializable, Cloneable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/SpatialDropout.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/SpatialDropout.java index d0bb26529..67ff15ace 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/SpatialDropout.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/SpatialDropout.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.dropout; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.val; @@ -31,8 +33,6 @@ import org.nd4j.linalg.api.ops.random.impl.DropOutInverted; import org.nd4j.linalg.factory.Broadcast; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.schedule.ISchedule; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; @Data @JsonIgnoreProperties({"mask"}) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java index e50e1aae5..068f88474 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.graph; import com.google.common.base.Preconditions; +import java.util.Map; import lombok.*; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -30,12 +31,10 @@ import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Map; @NoArgsConstructor @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertex.java index 5ea6e64b2..f64b394a1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.graph; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.val; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -30,7 +31,6 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; @Data public class ElementWiseVertex extends GraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/FrozenVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/FrozenVertex.java index 73557c2c1..ff63f7c94 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/FrozenVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/FrozenVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.graph; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -28,7 +29,6 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; @Data @EqualsAndHashCode(callSuper = false) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/GraphVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/GraphVertex.java index 00f0f7f52..ff139f2ba 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/GraphVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/GraphVertex.java @@ -20,15 +20,14 @@ package org.deeplearning4j.nn.conf.graph; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import java.io.Serializable; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InvalidInputTypeException; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -import java.io.Serializable; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") public abstract class GraphVertex implements Cloneable, Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/L2NormalizeVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/L2NormalizeVertex.java index 502021120..e3e6d7860 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/L2NormalizeVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/L2NormalizeVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.graph; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.val; @@ -30,7 +31,6 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; @Data @EqualsAndHashCode(callSuper = false) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java index 67f6ee365..1e98ebe2a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/LayerVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.graph; +import java.util.Arrays; import lombok.Data; import lombok.Getter; import lombok.NoArgsConstructor; @@ -34,8 +35,6 @@ import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Arrays; - @NoArgsConstructor @Data public class LayerVertex extends GraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/MergeVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/MergeVertex.java index 97b4e9606..a302079b6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/MergeVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/MergeVertex.java @@ -22,7 +22,6 @@ package org.deeplearning4j.nn.conf.graph; import lombok.Data; -import lombok.Setter; import lombok.val; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.RNNFormat; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ReshapeVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ReshapeVertex.java index 7ae4a374b..f9e34488a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ReshapeVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ReshapeVertex.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.graph; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.Data; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InvalidInputTypeException; @@ -29,9 +31,6 @@ import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; @Data public class ReshapeVertex extends GraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ScaleVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ScaleVertex.java index a51fd312b..c34e0af34 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ScaleVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ScaleVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.graph; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InvalidInputTypeException; @@ -28,7 +29,6 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; @Data public class ScaleVertex extends GraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ShiftVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ShiftVertex.java index c1e878c60..b01bb6101 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ShiftVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/ShiftVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.graph; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; @@ -31,7 +32,6 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; @Data @NoArgsConstructor diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/SubsetVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/SubsetVertex.java index 52f0e059d..cba3922af 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/SubsetVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/SubsetVertex.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.graph; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.Data; import lombok.val; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -29,9 +31,6 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; @Data public class SubsetVertex extends GraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/UnstackVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/UnstackVertex.java index 091194c1a..73f0f7dde 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/UnstackVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/UnstackVertex.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.graph; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Getter; import lombok.val; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -30,7 +31,6 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; @Getter public class UnstackVertex extends GraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/rnn/DuplicateToTimeSeriesVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/rnn/DuplicateToTimeSeriesVertex.java index 892562a4a..324d674f9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/rnn/DuplicateToTimeSeriesVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/rnn/DuplicateToTimeSeriesVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.graph.rnn; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.conf.graph.GraphVertex; @@ -30,7 +31,6 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; @Data @EqualsAndHashCode(callSuper = false) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/rnn/LastTimeStepVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/rnn/LastTimeStepVertex.java index dceb70a12..7f2bdcb56 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/rnn/LastTimeStepVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/rnn/LastTimeStepVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.graph.rnn; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -29,7 +30,6 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.graph.ComputationGraph; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; @Data public class LastTimeStepVertex extends GraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/inputs/InputType.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/inputs/InputType.java index db98572c0..078dd231c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/inputs/InputType.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/inputs/InputType.java @@ -20,25 +20,23 @@ package org.deeplearning4j.nn.conf.inputs; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import java.io.Serializable; +import java.util.Arrays; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.DataFormat; import org.deeplearning4j.nn.conf.RNNFormat; -import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.layers.Convolution3D; -import org.nd4j.common.base.Preconditions; import org.nd4j.common.util.OneTimeLogger; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -import java.io.Serializable; -import java.util.Arrays; @JsonInclude(JsonInclude.Include.NON_NULL) @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") @@ -90,6 +88,7 @@ public abstract class InputType implements Serializable { * * @return int[] */ + @JsonIgnore public long[] getShape() { return getShape(false); } @@ -431,7 +430,7 @@ public abstract class InputType implements Serializable { return height * width * depth * channels; } - @Override + @Override @JsonIgnore public long[] getShape(boolean includeBatchDim) { if(dataFormat == Convolution3D.DataFormat.NDHWC){ if(includeBatchDim) return new long[]{-1, depth, height, width, channels}; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java new file mode 100644 index 000000000..dc90bb8f3 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java @@ -0,0 +1,142 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package org.deeplearning4j.nn.conf.layers; + +import java.util.Arrays; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import lombok.*; +import lombok.experimental.Accessors; +import lombok.experimental.SuperBuilder; +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.conf.CNN2DFormat; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.util.ValidationUtils; + +/** + * ConvolutionLayer nIn in the input layer is the number of channels nOut is the number of filters + * to be used in the net or in other words the channels The builder specifies the filter/kernel + * size, the stride and padding The pooling layer takes the kernel size + * + *

Supports multiple dimensions: In 1D CNN, kernel moves in 1 direction. Input and output data of + * 1D CNN is 2 dimensional. Mostly used on Time-Series data. + * + *

In 2D CNN, kernel moves in 2 directions. Input and output data of 2D CNN is 3 dimensional. + * Mostly used on Image data. + * + *

In 3D CNN, kernel moves in 3 directions. Input and output data of 3D CNN is 4 dimensional. + * Mostly used on 3D Image data (MRI, CT Scans, Video). + */ +@ToString(callSuper = true) +@NoArgsConstructor +@EqualsAndHashCode(callSuper = true) +@Slf4j +@SuperBuilder +public abstract class AbstractConvolutionLayer extends FeedForwardLayer { + /** The kernel of this convolution with size in each n-dimensions */ + @Getter private int[] kernelSize; + /** The stride */ + @Getter private int[] stride; + /** The padding */ + @Getter private int[] padding; + /** The dilation */ + @Getter private int[] dilation; + /** If true (default): include bias parameters in the model. False: no bias. */ + @Builder.Default + @Getter + @Accessors(fluent = true) + @Setter + private boolean hasBias = true; + /** + * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more + * details Default is {@link ConvolutionMode}.Truncate. + */ + @Builder.Default @Getter @Setter + private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + /** + * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation + * be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If + * false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used + */ + @Getter @Setter @Builder.Default private boolean cudnnAllowFallback = true; + + /** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */ + @Getter @Setter @Builder.Default private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST; + + @Getter @Setter private ConvolutionLayer.FwdAlgo cudnnFwdAlgo; + @Getter @Setter private ConvolutionLayer.BwdFilterAlgo cudnnBwdFilterAlgo; + @Getter @Setter private ConvolutionLayer.BwdDataAlgo cudnnBwdDataAlgo; + + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default @Getter @Setter + private CNN2DFormat convFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons + + + /** + * Number of parameters this layer has a result of its configuration. + * + * @return number or parameters + */ + @Override + public long numParams() { + var kern = 1; + for (int i : getKernelSize()) { + kern = kern * i; + } + return nIn * nOut * kern + (hasBias() ? nOut : 0); + } + + public abstract static class AbstractConvolutionLayerBuilder< + C extends AbstractConvolutionLayer, B extends AbstractConvolutionLayerBuilder> + extends FeedForwardLayerBuilder { + + public B kernelSize(int @NonNull ... kernelSize) { + if (this.kernelSize != null) { + log.warn("You are setting the kernel more than once, last call with override prior calls."); + } + this.kernelSize = kernelSize; + return self(); + } + + public B stride(int @NonNull ... stride) { + this.stride = stride; + return self(); + } + + public B padding(int @NonNull ... padding) { + this.padding = padding; + return self(); + } + + public B dilation(int @NonNull ... dilation) { + this.dilation = dilation; + return self(); + } + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java index 13a520590..5f0cfb0a2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java @@ -22,12 +22,10 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; import lombok.experimental.SuperBuilder; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.activations.impl.ActivationTanH; - @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) @SuperBuilder diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java index 17c1631a8..0b4a24f17 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java @@ -24,7 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; -import net.brutex.ai.dnn.api.LayerType; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -40,9 +40,9 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; - @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class ActivationLayer extends NoParamLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java index 0ed7a14fa..803c3eb4f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AutoEncoder.java @@ -20,8 +20,11 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -33,12 +36,9 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - - @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder public class AutoEncoder extends BasePretrainNetwork { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java index 9bdc63e4f..502e5a5e4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; @@ -50,7 +52,16 @@ import org.nd4j.linalg.learning.regularization.WeightDecay; @SuperBuilder public abstract class BaseLayerConfiguration extends LayerConfiguration implements ITraininableLayerConfiguration, Serializable, Cloneable { - + /** + * Number of parameters this layer has a result of its configuration. This default implementation + * calls {@link #initializer()}.numParams( this ). + * + * @return number or parameters + */ + @Override + public long numParams() { + return initializer().numParams(this); + } /** * Set constraints to be applied to all layers. Default: no constraints.
@@ -63,7 +74,8 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration * * @param constraints Constraints to apply to all bias parameters of all layers */ - //@lombok.Builder.Default @Getter protected final List biasConstraints = new ArrayList<>(); + // @lombok.Builder.Default @Getter protected final List biasConstraints = new + // ArrayList<>(); /** * Set constraints to be applied to all layers. Default: no constraints.
* Constraints can be used to enforce certain conditions (non-negativity of parameters, max-norm @@ -78,16 +90,14 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration @lombok.Builder.Default @Getter protected final List constrainWeights = new ArrayList<>(); /** Weight initialization scheme to use, for initial weight values */ - @Getter @Setter - protected IWeightInit weightInit; + @Getter @Setter protected IWeightInit weightInit; /** Bias initialization value, for layers with biases. Defaults to 0 */ - @Getter @Setter @Builder.Default - protected double biasInit = 0.0; + @Getter @Setter @Builder.Default protected double biasInit = 0.0; /** Gain initialization value, for layers with ILayer Normalization. Defaults to 1 */ - @Getter @Setter @Builder.Default - protected double gainInit = 0.0; + @Getter @Setter @Builder.Default protected double gainInit = 0.0; /** Regularization for the parameters (excluding biases). */ - @Builder.Default @Getter @Setter protected List regularization = new ArrayList<>(); + @Builder.Default @Getter @Setter + protected List regularization = new ArrayList<>(); /** Regularization for the bias parameters only */ @Builder.Default @Getter @Setter protected List regularizationBias = new ArrayList<>(); @@ -95,14 +105,12 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link * org.nd4j.linalg.learning.config.Nesterovs} */ - @Getter - protected IUpdater updater; + @Getter protected IUpdater updater; /** * Gradient updater configuration, for the biases only. If not set, biases will use the updater as * set by {@link #setUpdater(IUpdater)} */ - @Getter @Setter - protected IUpdater biasUpdater; + @Getter @Setter protected IUpdater biasUpdater; /** * Gradient normalization strategy. Used to specify gradient renormalization, gradient clipping * etc. Defaults to {@link GradientNormalization#None}. @@ -123,8 +131,8 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration @Builder.Default @Getter @Setter protected double gradientNormalizationThreshold = 1.0; // Threshold for l2 and element-wise gradient clipping -@Getter @Setter - private DataType dataType; + + @Getter @Setter private DataType dataType; /** * Reset the learning related configs of the layer to default. When instantiated with a global @@ -134,7 +142,7 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration */ public void resetLayerDefaultConfig() { // clear the learning related params for all layers in the origConf and set to defaults - this.setUpdater( (IUpdater) null); + this.setUpdater((IUpdater) null); this.setWeightInit(null); this.setBiasInit(Double.NaN); this.setGainInit(Double.NaN); @@ -145,12 +153,15 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration this.biasUpdater = null; } - public void setUpdater(Updater updater) { - setUpdater(updater.getIUpdaterWithDefaultConfig()); - } - public void setUpdater(IUpdater iUpdater) { - this.updater=iUpdater; - } + @JsonIgnore + public void setUpdater(Updater updater) { + setUpdater(updater.getIUpdaterWithDefaultConfig()); + } + + @JsonProperty("updater") + public void setUpdater(IUpdater iUpdater) { + this.updater = iUpdater; + } @Override public BaseLayerConfiguration clone() { @@ -209,23 +220,24 @@ public abstract class BaseLayerConfiguration extends LayerConfiguration if (this.updater == null) this.updater = conf.getUpdater(); if (this.regularizationBias == null) this.regularizationBias = conf.getRegularizationBias(); if (this.regularization == null) this.regularization = conf.getRegularization(); - if( this.weightInit == null) this.weightInit = conf.getWeightInit(); + if (this.weightInit == null) this.weightInit = conf.getWeightInit(); if (this.gradientNormalization == null) this.gradientNormalization = conf.getGradientNormalization(); // if(this.weightInit == null) this.weightInit = conf.getWeightInit(); } - public static abstract class BaseLayerConfigurationBuilder< + public abstract static class BaseLayerConfigurationBuilder< C extends BaseLayerConfiguration, B extends BaseLayerConfigurationBuilder> extends LayerConfigurationBuilder { -public B updater(Updater upd) { - this.updater = upd.getIUpdaterWithDefaultConfig(); - return self(); -} - + @JsonIgnore + public B updater(Updater upd) { + this.updater = upd.getIUpdaterWithDefaultConfig(); + return self(); + } + @JsonProperty("updater") public B updater(IUpdater upd) { this.updater = upd; return self(); @@ -237,6 +249,7 @@ public B updater(Updater upd) { * * @param dist Distribution to use for weight initialization */ + @JsonIgnore @Deprecated public B dist(Distribution dist) { this.weightInit = new WeightInitDistribution(dist); @@ -248,6 +261,7 @@ public B updater(Updater upd) { * * @see WeightInit */ + @JsonIgnore public B weightInit(WeightInit weightInit) { if (weightInit == WeightInit.DISTRIBUTION) { throw new UnsupportedOperationException( @@ -257,15 +271,13 @@ public B updater(Updater upd) { return self(); } + @JsonProperty("weightInit") public B weightInit(IWeightInit weightInit) { - if (weightInit.enumValue() == WeightInit.DISTRIBUTION) { - throw new UnsupportedOperationException( - "Not supported!, Use weightInit(Distribution distribution) instead!"); - } - this.weightInit = weightInit.enumValue().getWeightInitFunction(); + this.weightInit = weightInit; return self(); } + @JsonIgnore public B weightInit(Distribution dist) { this.weightInit = new WeightInitDistribution(dist); return self(); @@ -400,8 +412,6 @@ public B updater(Updater upd) { return weightDecayBias(coefficient, true); } - - /** * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
* diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java index f8441388c..59bcad533 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseOutputLayer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.*; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -29,72 +31,71 @@ import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; - @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) @NoArgsConstructor @SuperBuilder(builderMethodName = "innerBuilder") public abstract class BaseOutputLayer extends FeedForwardLayer { - /** - * Loss function for the output layer - */ - @lombok.Builder.Default @Getter @Setter - protected ILossFunction lossFunction = new LossMCXENT(); - /** - * If true (default): include bias parameters in the model. False: no bias. - * - */ - @lombok.Builder.Default @Getter @Setter - protected boolean hasBias = true; + /** Loss function for the output layer */ + @lombok.Builder.Default @Getter @Setter protected ILossFunction lossFunction = new LossMCXENT(); + /** If true (default): include bias parameters in the model. False: no bias. */ + @lombok.Builder.Default @Getter @Setter protected boolean hasBias = true; + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + // Basically a dense layer... + InputType outputType = getOutputType(-1, inputType); + val numParams = initializer().numParams(this); + val updaterStateSize = (int) getUpdater().stateSize(numParams); - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - //Basically a dense layer... - InputType outputType = getOutputType(-1, inputType); - - val numParams = initializer().numParams(this); - val updaterStateSize = (int) getUpdater().stateSize(numParams); - - int trainSizeFixed = 0; - int trainSizeVariable = 0; - if (getDropOut() != null) { - if (false) { - //TODO drop connect - //Dup the weights... note that this does NOT depend on the minibatch size... - trainSizeVariable += 0; //TODO - } else { - //Assume we dup the input - trainSizeVariable += inputType.arrayElementsPerExample(); - } - } - - //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size - // which is modified in-place by activation function backprop - // then we have 'epsilonNext' which is equivalent to input size - trainSizeVariable += outputType.arrayElementsPerExample(); - - return new LayerMemoryReport.Builder(name, OutputLayer.class, inputType, outputType) - .standardMemory(numParams, updaterStateSize) - .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); + int trainSizeFixed = 0; + int trainSizeVariable = 0; + if (getDropOut() != null) { + if (false) { + // TODO drop connect + // Dup the weights... note that this does NOT depend on the minibatch size... + trainSizeVariable += 0; // TODO + } else { + // Assume we dup the input + trainSizeVariable += inputType.arrayElementsPerExample(); + } } - public static abstract class BaseOutputLayerBuilder> extends - FeedForwardLayerBuilder { - public B lossFunction(LossFunctions.LossFunction lossFn) { - this.lossFunction$value = lossFn.getILossFunction(); - this.lossFunction$set = true; - return self(); - } + // Also, during backprop: we do a preOut call -> gives us activations size equal to the output + // size + // which is modified in-place by activation function backprop + // then we have 'epsilonNext' which is equivalent to input size + trainSizeVariable += outputType.arrayElementsPerExample(); - public B lossFunction(ILossFunction lossFn) { - this.lossFunction$value = lossFn; - this.lossFunction$set = true; - return self(); - } + return new LayerMemoryReport.Builder(name, OutputLayer.class, inputType, outputType) + .standardMemory(numParams, updaterStateSize) + .workingMemory( + 0, + 0, + trainSizeFixed, + trainSizeVariable) // No additional memory (beyond activations) for inference + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + public abstract static class BaseOutputLayerBuilder< + C extends BaseOutputLayer, B extends BaseOutputLayerBuilder> + extends FeedForwardLayerBuilder { + @JsonIgnore + public B lossFunction(LossFunctions.LossFunction lossFn) { + this.lossFunction$value = lossFn.getILossFunction(); + this.lossFunction$set = true; + return self(); } + + @JsonProperty("lossFunction") + public B lossFunction(ILossFunction lossFn) { + this.lossFunction$value = lossFn; + this.lossFunction$set = true; + return self(); + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java index 10f24c4ae..db89f9a06 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.java @@ -30,7 +30,6 @@ import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.weights.IWeightInit; - @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) @SuperBuilder diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java index 8d031b6c6..1374801c9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseUpsamplingLayer.java @@ -32,6 +32,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType; */ @ToString(callSuper = true) +@NoArgsConstructor @EqualsAndHashCode(callSuper = true) @SuperBuilder public abstract class BaseUpsamplingLayer extends NoParamLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java index 7544b5717..35f5e4583 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java @@ -23,8 +23,11 @@ package org.deeplearning4j.nn.conf.layers; import java.util.Collection; import java.util.List; import java.util.Map; + +import com.fasterxml.jackson.annotation.JsonIgnore; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; @@ -48,6 +51,7 @@ import org.nd4j.linalg.learning.regularization.Regularization; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class BatchNormalization extends FeedForwardLayer { @@ -80,6 +84,7 @@ public class BatchNormalization extends FeedForwardLayer { * * @param minibatch Minibatch parameter */ + @JsonIgnore @lombok.Builder.Default protected boolean isMinibatch = true; /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java index 32b0d8efe..de7b1bf86 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeRecurrent; import org.deeplearning4j.nn.conf.inputs.InputType.Type; @@ -39,9 +40,12 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") +@NoArgsConstructor public class CapsuleLayer extends SameDiffLayer { + private static final String WEIGHT_PARAM = "weight"; private static final String BIAS_PARAM = "bias"; /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java index 99980c490..3c5878518 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleStrengthLayer.java @@ -22,8 +22,8 @@ package org.deeplearning4j.nn.conf.layers; import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeRecurrent; import org.deeplearning4j.nn.conf.inputs.InputType.Type; @@ -33,6 +33,7 @@ import org.nd4j.autodiff.samediff.SameDiff; @Data @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder public class CapsuleStrengthLayer extends SameDiffLambdaLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java index f7573c56b..cb7a53b3b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CenterLossOutputLayer.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -40,6 +41,7 @@ import org.nd4j.linalg.learning.config.NoOp; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder public class CenterLossOutputLayer extends BaseOutputLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java index 15c31aaf2..b55595e4b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -37,9 +38,9 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; - @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder public class Cnn3DLossLayer extends FeedForwardLayer { @Getter @Setter diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java index 8c6643d6d..84cde598c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -41,6 +42,7 @@ import org.nd4j.linalg.lossfunctions.ILossFunction; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder public class CnnLossLayer extends FeedForwardLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java index 1074b3092..21ab00585 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java @@ -20,15 +20,230 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; -import lombok.ToString; +import java.util.Collection; +import java.util.Map; +import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; +import org.deeplearning4j.nn.conf.CNN2DFormat; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.optimize.api.TrainingListener; +import org.deeplearning4j.util.Convolution1DUtils; +import org.deeplearning4j.util.ValidationUtils; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; + +//TODO: We will eventually want to NOT subclass off of ConvolutionLayer. +//Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1 +/** + * This approach treats a multivariate time series with L time steps and + * P variables as an L x 1 x P image (L rows high, 1 column wide, P + * channels deep). The kernel should be H + * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat dataFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons + + @Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW; + /** + * Size of the convolution + * + * @param kernelSize the length of the kernel + */ + @Builder.Default private int[] kernelSize = new int[] {1, 1}; + /** + * Stride for the convolution. Must be > 0 + * + * @param stride Stride + */ + @Builder.Default private int[] stride = new int[] {1, 1}; + /** + * Padding value for the convolution. Not used with {@link + * org.deeplearning4j.nn.conf.ConvolutionMode#Same} + * + * @param padding Padding value + */ + @Builder.Default private int[] padding = new int[] {0, 0}; + + @Builder.Default private int[] dilation = new int[] {1}; + + public static Convolution1DBuilder builder() { + return innerBuilder(); + } + + public static Convolution1DBuilder builder(int kernelSize) { + return innerBuilder() + .kernelSize(new int[] {kernelSize}) + .stride(new int[] {1, 1}) + .padding(new int[] {0, 0}) + .dilation(new int[] {0}); + } + + public static Convolution1DBuilder builder(int kernelSize, int stride) { + return innerBuilder() + .kernelSize(new int[] {kernelSize}) + .stride(new int[] {stride}) + .padding(new int[] {0, 0}) + .dilation(new int[] {0}); + } + + public static Convolution1DBuilder builder(int kernelSize, int stride, int padding) { + return innerBuilder() + .kernelSize(new int[] {kernelSize}) + .stride(new int[] {stride}) + .padding(new int[] {padding}) + .dilation(new int[] {0}); + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + setNetConfiguration(conf); + LayerValidation.assertNInNOutSet("Convolution1D", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.convolution.Convolution1DLayer ret = + new org.deeplearning4j.nn.layers.convolution.Convolution1DLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for 1D CNN layer (layer index = " + + layerIndex + + ", layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); + } + InputType.InputTypeRecurrent it = (InputType.InputTypeRecurrent) inputType; + long inputTsLength = it.getTimeSeriesLength(); + long outLength; + if (inputTsLength < 0) { + // Probably: user did InputType.recurrent(x) without specifying sequence length + outLength = -1; + } else { + outLength = + Convolution1DUtils.getOutputSize( + inputTsLength, + kernelSize[0], + stride[0], + padding[0], + getConvolutionMode(), + dilation[0]); + } + + return InputType.recurrent(nOut, outLength, rnnDataFormat); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for 1D CNN layer (layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); + } + + InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; + if (nIn <= 0 || override) { + this.nIn = r.getSize(); + } + if (this.rnnDataFormat == null || override) this.rnnDataFormat = r.getFormat(); + + if (this.dataFormat == null || override) + this.dataFormat = rnnDataFormat == RNNFormat.NCW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC; + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for Convolution1D layer (layer name=\"" + + getName() + + "\"): input is null"); + } + + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat, getName()); + } + + protected boolean allowCausal() { + return true; + } + + private static final class Convolution1DBuilderImpl + extends Convolution1DBuilder { + public Convolution1D build() { + Convolution1D l = new Convolution1D(this); + Convolution1DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()[0]); + Convolution1DUtils.validateCnn1DKernelStridePadding( + l.getKernelSize()[0], l.getStride()[0], l.getPadding()[0]); + l.initializeConstraints(); + return l; + } + } + + public abstract static class Convolution1DBuilder< + C extends Convolution1D, B extends Convolution1DBuilder> + extends ConvolutionLayerBuilder { + + @Override + public B kernelSize(int @NonNull ... kernelSize) { + //Todo, we always provide arrays, but only first element is really used + super.kernelSize(ValidationUtils.validate1NonNegative(new int[]{kernelSize[0]},"kernelSize")); + return self(); + } + + public B padding(int @NonNull ... padding) { + //Todo, we always provide arrays, but only first element is really used + super.padding(ValidationUtils.validate1NonNegative(new int[]{padding[0]}, "padding")); + + return self(); + } + public B dilation(int @NonNull ... dilation) { + //Todo, we always provide arrays, but only first element is really used + super.dilation(ValidationUtils.validate1NonNegative(new int[]{dilation[0]}, "dilation")); + return self(); + } + + public B stride(int @NonNull ... stride) { + //Todo, we always provide arrays, but only first element is really used + super.stride(ValidationUtils.validate1NonNegative(new int[]{stride[0]}, "stride")); + return self(); + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java similarity index 56% rename from cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java index 9db0c9d61..93f8f2065 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java @@ -24,31 +24,41 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.params.ConvolutionNewParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.util.Convolution1DUtils; -import org.deeplearning4j.util.ConvolutionUtils; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -/* -//TODO: We will eventually want to NOT subclass off of ConvolutionLayer. -//Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1 - * This approach treats a multivariate time series with L timesteps and - * P variables as an L x 1 x P image (L rows high, 1 column wide, P - * channels deep). The kernel should be HIn 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions. Input and output data of + * 1D CNN is 2-dimensional. Mostly used on Time-Series data. */ @Data +@Slf4j @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") -public class Convolution1DLayer extends ConvolutionLayer { +@Jacksonized +@SuperBuilder +public class Convolution1DNew extends AbstractConvolutionLayer { + /** * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). * See {@link CNN2DFormat} for more details.
@@ -58,44 +68,13 @@ public class Convolution1DLayer extends ConvolutionLayer { */ @Builder.Default protected CNN2DFormat dataFormat = - CNN2DFormat.NCHW; // default value for legacy serialization reasons + CNN2DFormat.NCHW; // default value for legacy serialization reasons + @Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW; - /** - * Size of the convolution - * - * @param kernelSize the length of the kernel - */ - @Builder.Default private int[] kernelSize = new int[] {1, 1}; - /** - * Stride for the convolution. Must be > 0 - * - * @param stride Stride - */ - @Builder.Default private int[] stride = new int[] {1, 1}; - /** - * Padding value for the convolution. Not used with {@link - * org.deeplearning4j.nn.conf.ConvolutionMode#Same} - * - * @param padding Padding value - */ - @Builder.Default private int[] padding = new int[] {0, 0}; - private int[] dilation; - - public static Convolution1DLayerBuilder builder() { - return innerBuilder(); - } - - public static Convolution1DLayerBuilder builder(int kernelSize) { - return innerBuilder().kernelSize(kernelSize); - } - - public static Convolution1DLayerBuilder builder(int kernelSize, int stride, int padding) { - return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding); - } - - public static Convolution1DLayerBuilder builder(int kernelSize, int stride) { - return innerBuilder().kernelSize(kernelSize).stride(stride); + @Override + public ParamInitializer initializer() { + return ConvolutionNewParamInitializer.getInstance(); } @Override @@ -107,17 +86,27 @@ public class Convolution1DLayer extends ConvolutionLayer { boolean initializeParams, DataType networkDataType) { setNetConfiguration(conf); - LayerValidation.assertNInNOutSet( - "Convolution1DLayer", getName(), layerIndex, getNIn(), getNOut()); + LayerValidation.assertNInNOutSet("Convolution1D", getName(), layerIndex, getNIn(), getNOut()); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - org.deeplearning4j.nn.layers.convolution.Convolution1DLayer ret = - new org.deeplearning4j.nn.layers.convolution.Convolution1DLayer(lconf, networkDataType); + /* + Object ret; + try { + ret = lconf.getCanConfigure() + .getConstructor(LayerConfiguration.class, DataType.class) + .newInstance(new Object[] { lconf, networkDataType }); + } catch (Exception e) { + throw new RuntimeException(e); + + */ + org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer ret = + new org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map paramTable = initializer().init(this, layerParamsView, initializeParams); ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); + ret.setLayerConfiguration(this); return ret; } @@ -141,7 +130,12 @@ public class Convolution1DLayer extends ConvolutionLayer { } else { outLength = Convolution1DUtils.getOutputSize( - inputTsLength, kernelSize[0], stride[0], padding[0], getConvolutionMode(), dilation[0]); + inputTsLength, + getKernelSize()[0], + getStride()[0], + getPadding()[0], + getConvolutionMode(), + getDilation()[0]); } return InputType.recurrent(nOut, outLength, rnnDataFormat); @@ -179,45 +173,77 @@ public class Convolution1DLayer extends ConvolutionLayer { return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat, getName()); } + /** + * This is a report of the estimated memory consumption for the given layer + * + * @param inputType Input type to the layer. Memory consumption is often a function of the input + * type + * @return Memory report for the layer + */ + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return null; + } + protected boolean allowCausal() { return true; } - private static final class Convolution1DLayerBuilderImpl extends Convolution1DLayerBuilder { - public Convolution1DLayer build() { - Convolution1DLayer l = new Convolution1DLayer(this); - ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); - ConvolutionUtils.validateCnnKernelStridePadding( - l.getKernelSize(), l.getStride(), l.getPadding()); + private static final class Convolution1DNewBuilderImpl + extends Convolution1DNewBuilder { + public Convolution1DNew build() { + Convolution1DNew l = new Convolution1DNew(this); + if (l.getDilation() == null) { + dilation(1, 1); + } + if (l.getPadding() == null) { + padding(0); + } + l = new Convolution1DNew(this); + + Convolution1DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()[0]); + Convolution1DUtils.validateCnn1DKernelStridePadding( + l.getKernelSize()[0], l.getStride()[0], l.getPadding()[0]); l.initializeConstraints(); return l; } } - public static abstract class Convolution1DLayerBuilder< - C extends ConvolutionLayer, B extends Convolution1DLayerBuilder> - extends ConvolutionLayerBuilder { + public abstract static class Convolution1DNewBuilder< + C extends Convolution1DNew, B extends Convolution1DNewBuilder> + extends AbstractConvolutionLayerBuilder { + private int dimensions(Class arrayType) { + return arrayType.isArray() ? 1 + dimensions(arrayType.getComponentType()) : 0; + } - public B kernelSize(int @NonNull ... kernelSize) { - this.kernelSize$value[0] = ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]; - this.kernelSize$set = true; + @Override + public B kernelSize(int @NonNull ... kernel) { + // Todo, we always provide arrays, but only first element is really used + if (dimensions(kernel.getClass()) > 1) + log.warn( + "Kernel size has '{}' dimensions, only using first dimensions for 1D convolution layer.", + dimensions(kernel.getClass())); + super.kernelSize( + ValidationUtils.validate1NonNegative(new int[] {kernel[0]}, "kernelSize")[0], 1); return self(); } public B padding(int @NonNull ... padding) { - this.padding$value[0] = ValidationUtils.validate1NonNegative(padding, "padding")[0]; - this.padding$set = true; + // Todo, we always provide arrays, but only first element is really used + super.padding(ValidationUtils.validate1NonNegative(new int[] {padding[0]}, "padding")); + return self(); } public B dilation(int @NonNull ... dilation) { - this.dilation[0] = ValidationUtils.validate1NonNegative(dilation, "dilation")[0]; + // Todo, we always provide arrays, but only first element is really used + super.dilation(ValidationUtils.validate1NonNegative(new int[] {dilation[0]}, "dilation")); return self(); } public B stride(int @NonNull ... stride) { - this.stride$value[0] = ValidationUtils.validate1NonNegative(stride, "stride")[0]; - this.stride$set = true; + // Todo, we always provide arrays, but only first element is really used + super.stride(ValidationUtils.validate1NonNegative(new int[] {stride[0]}, "stride")[0], 1); return self(); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java index dfe5f5c83..8af4ad2eb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution2D.java @@ -22,14 +22,15 @@ package org.deeplearning4j.nn.conf.layers; import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; import lombok.ToString; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder public class Convolution2D extends ConvolutionLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java index debe05588..dc8a3b89b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.*; @@ -32,14 +33,16 @@ import org.deeplearning4j.nn.layers.convolution.Convolution3DLayer; import org.deeplearning4j.nn.params.Convolution3DParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.util.Convolution3DUtils; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @Data +@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class Convolution3D extends ConvolutionLayer { @@ -238,7 +241,7 @@ public class Convolution3D extends ConvolutionLayer { private static final class Convolution3DBuilderImpl extends Convolution3DBuilder { public Convolution3D build() { Convolution3D l = new Convolution3D(this); - ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + Convolution2DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); Convolution3DUtils.validateCnn3DKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding()); return l; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java index 351aea24c..0e76f5776 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java @@ -36,7 +36,7 @@ import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -45,11 +45,39 @@ import org.nd4j.linalg.api.ndarray.INDArray; * ConvolutionLayer nIn in the input layer is the number of channels nOut is the number of filters * to be used in the net or in other words the channels The builder specifies the filter/kernel * size, the stride and padding The pooling layer takes the kernel size + * + *

Supports multiple dimensions: + * In 1D CNN, kernel moves in 1 direction. Input and output data of + * 1D CNN is 2 dimensional. Mostly used on Time-Series data. + * + * In 2D CNN, kernel moves in 2 + * directions. Input and output data of 2D CNN is 3 dimensional. Mostly used on Image data. + * + * In 3D CNN, kernel moves in 3 directions. Input and output data of 3D CNN is 4 dimensional. Mostly used + * on 3D Image data (MRI, CT Scans, Video). */ @ToString(callSuper = true) +@NoArgsConstructor @EqualsAndHashCode(callSuper = true) @SuperBuilder(builderMethodName = "innerBuilder") public class ConvolutionLayer extends FeedForwardLayer { + + public static ConvolutionLayerBuilder builder() { + return innerBuilder(); + } + + public static ConvolutionLayerBuilder builder(int... kernelSize) { + return innerBuilder().kernelSize(kernelSize); + } + + public static ConvolutionLayerBuilder builder(int[] kernelSize, int[] stride) { + return innerBuilder().kernelSize(kernelSize).stride(stride); + } + + public static ConvolutionLayerBuilder builder( + int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding); + } /** * Size of the convolution rows/columns * @@ -111,23 +139,6 @@ public class ConvolutionLayer extends FeedForwardLayer { @Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter private boolean defaultValueOverriden = false; - public static ConvolutionLayerBuilder builder() { - return innerBuilder(); - } - - public static ConvolutionLayerBuilder builder(int... kernelSize) { - return innerBuilder().kernelSize(kernelSize); - } - - public static ConvolutionLayerBuilder builder(int[] kernelSize, int[] stride) { - return innerBuilder().kernelSize(kernelSize).stride(stride); - } - - public static ConvolutionLayerBuilder builder( - int[] kernelSize, int[] stride, int[] padding) { - return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding); - } - public boolean hasBias() { return hasBias; } @@ -361,17 +372,18 @@ public class ConvolutionLayer extends FeedForwardLayer { * @param kernelSize kernel size */ public B kernelSize(int... kernelSize) { - this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"); + this.kernelSize$value = ValidationUtils.validate3NonNegative(kernelSize, "kernelSize"); this.kernelSize$set = true; return self(); } + /** * Set stride size for 3D convolutions in (depth, height, width) order * * @param stride kernel size */ public B stride(int... stride) { - this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride"); + this.stride$value = ValidationUtils.validate3NonNegative(stride, "stride"); this.stride$set = true; return self(); } @@ -382,7 +394,7 @@ public class ConvolutionLayer extends FeedForwardLayer { * @param padding kernel size */ public B padding(int... padding) { - this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding"); + this.padding$value = ValidationUtils.validate3NonNegative(padding, "padding"); this.padding$set = true; return self(); } @@ -392,12 +404,15 @@ public class ConvolutionLayer extends FeedForwardLayer { * @param dilation kernel size */ public B dilation(int... dilation) { - this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); + this.dilation$value = ValidationUtils.validate3NonNegative(dilation, "dilation"); this.dilation$set = true; return self(); } + public B dilation(int dilation) { + return dilation(new int[]{dilation}); + } /** * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper @@ -413,47 +428,52 @@ public class ConvolutionLayer extends FeedForwardLayer { return self(); } } - private static final class ConvolutionLayerBuilderImpl extends ConvolutionLayerBuilder { + + /* + private static final class ConvolutionLayerBuilderImpl + extends ConvolutionLayerBuilder { public ConvolutionLayer build() { ConvolutionLayer l = new ConvolutionLayer(this); - ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); - ConvolutionUtils.validateCnnKernelStridePadding( - l.getKernelSize(), l.getStride(), l.getPadding()); + Convolution2DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + Convolution2DUtils.validateCnnKernelStridePadding( + l.getKernelSize(), l.getStride(), l.getPadding()); if (l.getKernelSize().length != l.getConvolutionDim()) { throw new IllegalArgumentException( - "Kernel argument should be a " - + l.getConvolutionDim() - + "d array, got " - + Arrays.toString(l.getKernelSize())); + "Kernel argument should be a " + + l.getConvolutionDim() + + "d array, got " + + Arrays.toString(l.getKernelSize())); } if (l.getStride().length != l.getConvolutionDim()) { throw new IllegalArgumentException( - "Strides argument should be a " - + l.getConvolutionDim() - + "d array, got " - + Arrays.toString(l.getStride())); + "Strides argument should be a " + + l.getConvolutionDim() + + "d array, got " + + Arrays.toString(l.getStride())); } if (l.getPadding().length != l.getConvolutionDim()) { throw new IllegalArgumentException( - "Padding argument should be a " - + l.getConvolutionDim() - + "d array, got " - + Arrays.toString(l.getPadding())); + "Padding argument should be a " + + l.getConvolutionDim() + + "d array, got " + + Arrays.toString(l.getPadding())); } if (l.getDilation().length != l.getConvolutionDim()) { throw new IllegalArgumentException( - "Dilation argument should be a " - + l.getConvolutionDim() - + "d array, got " - + Arrays.toString(l.getDilation())); + "Dilation argument should be a " + + l.getConvolutionDim() + + "d array, got " + + Arrays.toString(l.getDilation())); } l.setType(LayerType.CONV); l.initializeConstraints(); return l; } } +*/ + } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java index 2847168cb..62fab4f7f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java @@ -20,12 +20,14 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer; @@ -35,114 +37,132 @@ import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - /** - * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of filters to be used - * in the net or in other words the channels The builder specifies the filter/kernel size, the stride and padding - * The pooling layer takes the kernel size + * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of + * filters to be used in the net or in other words the channels The builder specifies the + * filter/kernel size, the stride and padding The pooling layer takes the kernel size */ @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized +@SuperBuilder public class Deconvolution2D extends ConvolutionLayer { + @Builder.Default private CNN2DFormat format = CNN2DFormat.NCHW; -@Builder.Default -private CNN2DFormat format = CNN2DFormat.NCHW; - protected boolean allowCausal() { - //Causal convolution - allowed for 1D only - return false; + protected boolean allowCausal() { + // Causal convolution - allowed for 1D only + return false; + } + + public boolean hasBias() { + return isHasBias(); + } + + @Override + public Deconvolution2D clone() { + Deconvolution2D clone = (Deconvolution2D) super.clone(); + if (clone.getKernelSize() != null) { + clone.setKernelSize(clone.getKernelSize().clone()); + } + if (clone.getStride() != null) { + clone.setStride(clone.getStride().clone()); + } + if (clone.getPadding() != null) { + clone.setPadding(clone.getPadding().clone()); + } + return clone; + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + setNetConfiguration(conf); + LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + + org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret = + new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType); + + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return DeconvolutionParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalStateException( + "Invalid input for Convolution layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); } - private static final class Deconvolution2DBuilderImpl extends Deconvolution2DBuilder { - public Deconvolution2D build() { - Deconvolution2D l = new Deconvolution2D(this); - l.initializeConstraints(); - return l; - } + return InputTypeUtil.getOutputTypeDeconvLayer( + inputType, + getKernelSize(), + getStride(), + getPadding(), + getDilation(), + getConvolutionMode(), + nOut, + layerIndex, + getName(), + Deconvolution2DLayer.class); + } + + private static final class Deconvolution2DBuilderImpl + extends Deconvolution2DBuilder { + public Deconvolution2D build() { + Deconvolution2D l = new Deconvolution2D(this); + l.initializeConstraints(); + return l; } - public static abstract class Deconvolution2DBuilder> extends ConvolutionLayerBuilder { + } + public abstract static class Deconvolution2DBuilder< + C extends Deconvolution2D, B extends Deconvolution2DBuilder> + extends ConvolutionLayerBuilder { - - @Override - public B kernelSize(int... kernelSize) { - super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize")); - return self(); - } - @Override - public B stride(int... stride) { - super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride")); - return self(); - } - @Override - public B padding(int... padding) { - super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding")); - return self(); - } - @Override - public B dilation(int... dilation) { - super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation")); - return self(); - } - } - public boolean hasBias() { - return isHasBias(); + @Override + public B kernelSize(int... kernelSize) { + super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize")); + return self(); } @Override - public Deconvolution2D clone() { - Deconvolution2D clone = (Deconvolution2D) super.clone(); - if (clone.getKernelSize() != null) { - clone.setKernelSize( clone.getKernelSize().clone()); - } - if (clone.getStride() != null) { - clone.setStride( clone.getStride().clone()); - } - if (clone.getPadding() != null) { - clone.setPadding( clone.getPadding().clone()); - } - return clone; + public B stride(int... stride) { + super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride")); + return self(); } @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - setNetConfiguration(conf); - LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - runInheritance(); - - org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret = - new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType); - - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + public B padding(int... padding) { + super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding")); + return self(); } @Override - public ParamInitializer initializer() { - return DeconvolutionParamInitializer.getInstance(); + public B dilation(int... dilation) { + super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation")); + return self(); } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getName() - + "\"): Expected CNN input, got " + inputType); - } - - return InputTypeUtil.getOutputTypeDeconvLayer(inputType, getKernelSize(), getStride(), getPadding(), getDilation(), getConvolutionMode(), - nOut, layerIndex, getName(), Deconvolution2DLayer.class); - } - - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java index f2b383ad9..81191bea2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution3D.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.ConvolutionMode; @@ -44,7 +45,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class Deconvolution3D extends ConvolutionLayer { /** * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java index ce34310ed..752435358 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -38,8 +39,10 @@ import org.nd4j.linalg.api.ndarray.INDArray; /** Dense Layer Uses WeightInitXavier as default */ @Data +@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder public class DenseLayer extends FeedForwardLayer { @@ -60,6 +63,7 @@ public class DenseLayer extends FeedForwardLayer { LayerValidation.assertNInNOutSet( "DenseLayerConfiguration", getName(), layerIndex, getNIn(), getNOut()); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + lconf.setNetConfiguration(conf); runInheritance(); org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret = @@ -120,15 +124,14 @@ public class DenseLayer extends FeedForwardLayer { public abstract static class DenseLayerBuilder< C extends DenseLayer, B extends DenseLayerBuilder> - extends FeedForwardLayerBuilder { + extends FeedForwardLayerBuilder {} - } - private static final class DenseLayerBuilderImpl extends DenseLayerBuilder { + private static final class DenseLayerBuilderImpl + extends DenseLayerBuilder { public DenseLayer build() { DenseLayer l = new DenseLayer(this); l.initializeConstraints(); return l; } } - - } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java index cb2cfd73b..4f6dc9535 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers; import java.util.*; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -31,7 +32,7 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.layers.convolution.DepthwiseConvolution2DLayer; import org.deeplearning4j.nn.params.DepthwiseConvolutionParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; @@ -40,7 +41,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class DepthwiseConvolution2D extends ConvolutionLayer { /** * Set channels multiplier for depth-wise convolution @@ -153,8 +154,8 @@ public class DepthwiseConvolution2D extends ConvolutionLayer { "Depth multiplier must be > 0, got %s", l.getDepthMultiplier()); - ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); - ConvolutionUtils.validateCnnKernelStridePadding( + Convolution2DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + Convolution2DUtils.validateCnnKernelStridePadding( l.getKernelSize(), l.getStride(), l.getPadding()); l.initializeConstraints(); return l; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java index 8c8e5d63d..b52838438 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DropoutLayer.java @@ -25,7 +25,7 @@ import java.util.List; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; -import net.brutex.ai.dnn.api.LayerType; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -48,7 +48,7 @@ import org.nd4j.linalg.learning.regularization.Regularization; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class DropoutLayer extends FeedForwardLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java index bb5286e92..dae3637af 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java @@ -25,6 +25,7 @@ import java.util.Map; import lombok.*; import lombok.experimental.Accessors; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -44,7 +45,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class EmbeddingLayer extends FeedForwardLayer { /** * If true: include bias parameters in the layer. False (default): no bias. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java index a720a81a9..63d3e4a3b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingSequenceLayer.java @@ -20,8 +20,11 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -40,13 +43,10 @@ import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class EmbeddingSequenceLayer extends FeedForwardLayer { /** * Set input sequence length for this embedding layer. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java index a4fba86c9..46fe079c3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/FeedForwardLayer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.*; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.DataFormat; @@ -44,19 +46,19 @@ public abstract class FeedForwardLayer extends BaseLayerConfiguration { */ @Getter protected long nIn; - + @JsonIgnore public void setNIn(int in) { this.nIn = in; } + @JsonProperty("nIn") public void setNIn(long in) { this.nIn = in; } /** - * Number of inputs for the layer (usually the size of the last layer).
Note that for Convolutional layers, - * this is the input channels, otherwise is the previous layer size. + * Number of outputs for the layer. * */ - @Getter @Setter + @Getter @Setter @JsonProperty("nOut") protected long nOut; protected DataFormat timeDistributedFormat; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java index 88b6afc3f..0f4e7ee8e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GlobalPoolingLayer.java @@ -20,8 +20,11 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -36,12 +39,9 @@ import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class GlobalPoolingLayer extends NoParamLayer { public static abstract class GlobalPoolingLayerBuilder> extends diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java index 091410bb7..188d8106d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers; import java.util.*; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -41,7 +42,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) @Deprecated -@SuperBuilder +@Jacksonized @SuperBuilder public class GravesBidirectionalLSTM extends BaseRecurrentLayer { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java index a92151445..3c2daee00 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java @@ -26,6 +26,7 @@ import java.util.Collections; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -43,7 +44,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Deprecated @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class GravesLSTM extends AbstractLSTM { private double forgetGateBiasInit; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/InputTypeUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/InputTypeUtil.java index d6015e022..aa23d76ad 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/InputTypeUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/InputTypeUtil.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Arrays; import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.exception.DL4JInvalidConfigException; @@ -31,11 +32,8 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.preprocessor.CnnToRnnPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToCnnPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; -import org.nd4j.common.base.Preconditions; import org.nd4j.common.primitives.Counter; -import java.util.Arrays; - @Slf4j public class InputTypeUtil { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java index 80b64fbf8..eab35b5f9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LSTM.java @@ -26,6 +26,7 @@ import java.util.Collections; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -41,7 +42,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class LSTM extends AbstractLSTM { private double forgetGateBiasInit; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java index eb24b60ce..5c04fa32c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonTypeInfo; import java.io.Serializable; import java.lang.reflect.Field; @@ -30,6 +31,7 @@ import lombok.experimental.SuperBuilder; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.ILayerConfiguration; import net.brutex.ai.dnn.api.LayerType; +import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -50,11 +52,12 @@ import org.nd4j.linalg.learning.regularization.Regularization; /** A neural network layer. */ @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") +@JsonInclude(JsonInclude.Include.NON_NULL) @EqualsAndHashCode @NoArgsConstructor // @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") @Slf4j -@SuperBuilder +@SuperBuilder(toBuilder = true) public abstract class LayerConfiguration implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration @@ -64,10 +67,20 @@ public abstract class LayerConfiguration @Getter @Setter protected List biasConstraints; @Getter @Setter protected List constraints; @Getter @Setter protected IWeightNoise weightNoise; - @Builder.Default private @Getter @Setter LinkedHashSet variables = new LinkedHashSet<>(); + @Builder.Default private @Getter @Setter @NonNull LinkedHashSet variables = new LinkedHashSet<>(); @Getter @Setter private IDropout dropOut; /** The type of the layer, basically defines the base class and its properties */ @Builder.Default @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN; + + /** + * Number of parameters this layer has a result of its configuration + * @return number or parameters + */ + public long numParams() { + return initializer().numParams(this); + } + + /** * A reference to the neural net configuration. This field is excluded from json serialization as * well as from equals check to avoid circular referenced. @@ -92,6 +105,7 @@ public abstract class LayerConfiguration * * @return activation function */ + @JsonIgnore public IActivation getActivationFn() { if (activation == null) throw new RuntimeException( @@ -103,10 +117,12 @@ public abstract class LayerConfiguration variables.add(s); } - public String toJson() { +public String toJson() { throw new RuntimeException("toJson is not implemented for LayerConfiguration"); } + + /** Initialize the weight constraints. Should be called last, in the outer-most constructor */ protected void initializeConstraints() { // Note: this has to be done AFTER all constructors have finished - otherwise the required @@ -326,7 +342,7 @@ public abstract class LayerConfiguration log.warn("Calling getUpdater() in {} will always return no-Op Updater.", LayerConfiguration.class.getSimpleName()); return Updater.NONE.getIUpdaterWithDefaultConfig(); } - @Deprecated + @Deprecated @JsonIgnore public void setUpdater(Updater updater) { setUpdater(updater.getIUpdaterWithDefaultConfig()); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java index 1fd8db106..8b63d01cc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerValidation.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -30,10 +33,6 @@ import org.nd4j.linalg.learning.regularization.L1Regularization; import org.nd4j.linalg.learning.regularization.L2Regularization; import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; - @Slf4j public class LayerValidation { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java index aa9351902..10d3df8ac 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; @@ -41,7 +42,7 @@ import org.nd4j.linalg.factory.Nd4j; @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class LearnedSelfAttentionLayer extends SameDiffLayer { private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java index 1dbf9a9c5..d5a8f0da8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -42,7 +43,7 @@ import org.nd4j.linalg.learning.regularization.Regularization; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class LocalResponseNormalization extends LayerConfiguration { /** Number of adjacent kernel maps to use when doing LRN. default: 5 */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java index 9d3352dac..8367bc965 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java @@ -22,16 +22,17 @@ package org.deeplearning4j.nn.conf.layers; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import java.util.*; + +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer; -import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.deeplearning4j.util.Convolution1DUtils; @@ -48,7 +49,7 @@ import org.nd4j.linalg.factory.Nd4j; @Data @EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties({"paramShapes"}) -@SuperBuilder +@Jacksonized @SuperBuilder public class LocallyConnected1D extends SameDiffLayer { private static final List WEIGHT_KEYS = @@ -60,10 +61,12 @@ public class LocallyConnected1D extends SameDiffLayer { /** * @param nIn Number of inputs to the layer (input size) */ + @JsonProperty("nIn") private long nIn; /** * @param nOut Number of outputs (output size) */ + @JsonProperty("nOut") private long nOut; /** * @param activation Activation function for the layer diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java index 83dde1e49..68d609fd4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java @@ -24,17 +24,16 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import java.util.*; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer; -import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.nn.weights.WeightInitUtil; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.autodiff.samediff.SDIndex; import org.nd4j.autodiff.samediff.SDVariable; @@ -48,7 +47,7 @@ import org.nd4j.linalg.factory.Nd4j; @Data @EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties({"paramShapes"}) -@SuperBuilder +@Jacksonized @SuperBuilder public class LocallyConnected2D extends SameDiffLayer { private static final List WEIGHT_KEYS = @@ -120,7 +119,7 @@ public class LocallyConnected2D extends SameDiffLayer { if (convolutionMode == ConvolutionMode.Same) { this.outputSize = - ConvolutionUtils.getOutputSize( + Convolution2DUtils.getOutputSize( dummyInputForShapeInference, kernel, stride, @@ -129,14 +128,14 @@ public class LocallyConnected2D extends SameDiffLayer { dilation, dataFormat); this.padding = - ConvolutionUtils.getSameModeTopLeftPadding( + Convolution2DUtils.getSameModeTopLeftPadding( outputSize, inputSize, kernel, stride, dilation); this.paddingBr = - ConvolutionUtils.getSameModeBottomRightPadding( + Convolution2DUtils.getSameModeBottomRightPadding( outputSize, inputSize, kernel, stride, dilation); } else { this.outputSize = - ConvolutionUtils.getOutputSize( + Convolution2DUtils.getOutputSize( dummyInputForShapeInference, kernel, stride, diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java index 2384f506c..8d41747ae 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LossLayer.java @@ -24,9 +24,9 @@ import java.util.Collection; import java.util.Map; import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; import lombok.ToString; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -42,6 +42,7 @@ import org.nd4j.linalg.lossfunctions.ILossFunction; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder public class LossLayer extends FeedForwardLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java index ab77c3012..f05fe3c36 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java @@ -21,8 +21,8 @@ package org.deeplearning4j.nn.conf.layers; import java.util.List; +import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; -import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -30,9 +30,20 @@ import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; +@NoArgsConstructor @SuperBuilder public abstract class NoParamLayer extends LayerConfiguration { + /** + * Number of parameters this layer. This will always return 0 + * + * @return 0 + */ + @Override + public long numParams() { + return 0; + } + @Override public ParamInitializer initializer() { return EmptyParamInitializer.getInstance(); @@ -57,6 +68,7 @@ public abstract class NoParamLayer extends LayerConfiguration { /** * Will always return no-Op updater. + * * @return */ @Override @@ -64,7 +76,7 @@ public abstract class NoParamLayer extends LayerConfiguration { return Updater.NONE.getIUpdaterWithDefaultConfig(); } - public static abstract class NoParamLayerBuilder> - extends LayerConfigurationBuilder - {} + public abstract static class NoParamLayerBuilder< + C extends NoParamLayer, B extends NoParamLayerBuilder> + extends LayerConfigurationBuilder {} } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java index 40ec2808d..479e36827 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/OutputLayer.java @@ -24,20 +24,21 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.LossFunctions; @Data +@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class OutputLayer extends BaseOutputLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java index 4244c252c..7c6431c21 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PReLULayer.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -40,7 +41,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class PReLULayer extends BaseLayerConfiguration { /** * Explicitly set input shape of incoming activations so that parameters can be initialized diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java index e9a8f1b08..2bc8360bb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling1D.java @@ -20,11 +20,10 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; import lombok.ToString; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; /** * 1D Pooling (subsampling) layer. Equivalent to {@link Subsampling1DLayer} @@ -35,6 +34,7 @@ import lombok.experimental.SuperBuilder; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder public class Pooling1D extends Subsampling1DLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java index 1bf16dc5d..8d0f7ca69 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Pooling2D.java @@ -20,11 +20,10 @@ package org.deeplearning4j.nn.conf.layers; -import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; import lombok.ToString; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; /** * 2D Pooling (subsampling) layer. Equivalent to {@link SubsamplingLayer} @@ -35,6 +34,7 @@ import lombok.experimental.SuperBuilder; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder public class Pooling2D extends SubsamplingLayer { } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java index 081ecbe4d..610dca712 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeConvolutional; @@ -41,7 +42,7 @@ import org.nd4j.linalg.factory.Nd4j; @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class PrimaryCapsules extends SameDiffLayer { private static final String WEIGHT_PARAM = "weight"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java index dedebd23c..4879d537f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java @@ -20,16 +20,16 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.samediff.SDLayerParams; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer; -import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayerUtils; import org.deeplearning4j.nn.params.SimpleRnnParamInitializer; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.autodiff.samediff.SDVariable; @@ -41,11 +41,9 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import java.util.Map; - @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class RecurrentAttentionLayer extends SameDiffLayer implements ITraininableLayerConfiguration { private DataType dataType; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java index 860a43a9d..c24f09ff0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -42,7 +43,7 @@ import org.nd4j.linalg.lossfunctions.LossFunctions; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class RnnLossLayer extends FeedForwardLayer { /** * @param lossFunction Loss function for the loss layer diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java index 3e1198cb0..95e4ee7f2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnOutputLayer.java @@ -26,6 +26,7 @@ import lombok.Data; import lombok.EqualsAndHashCode; import lombok.ToString; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -41,6 +42,7 @@ import org.nd4j.linalg.lossfunctions.LossFunctions; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class RnnOutputLayer extends BaseOutputLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java index af971cde6..9fc2323b6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -38,7 +39,7 @@ import org.nd4j.linalg.factory.Nd4j; @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class SelfAttentionLayer extends SameDiffLayer { private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java index 00e7c0277..602dff6fd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers; import java.util.*; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; @@ -44,7 +45,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class SeparableConvolution2D extends ConvolutionLayer { /** * Set constraints to be applied to the point-wise convolution weight parameters of this layer. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java index 85d1b11e4..ff6edf0c6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -41,7 +42,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class SpaceToBatchLayer extends NoParamLayer { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java index 1c3c625a1..57ede962e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java @@ -20,8 +20,11 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -34,13 +37,10 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class SpaceToDepthLayer extends NoParamLayer { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java index e2e6ab230..582c8b1b4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java @@ -22,11 +22,11 @@ package org.deeplearning4j.nn.conf.layers; import java.util.Collection; import java.util.Map; -import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; +import lombok.NonNull; import lombok.ToString; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -34,8 +34,9 @@ import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.util.Convolution1DUtils; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.ValidationUtils; +import org.jetbrains.annotations.NotNull; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -50,9 +51,92 @@ import org.nd4j.linalg.api.ndarray.INDArray; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized +@SuperBuilder(builderMethodName = "innerBuilder") public class Subsampling1DLayer extends SubsamplingLayer { + public static Subsampling1DLayerBuilder builder() { + return innerBuilder(); + } + + public static Subsampling1DLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { + return innerBuilder() + .poolingType(poolingType); + } + + public static Subsampling1DLayerBuilder builder(PoolingType poolingType) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()); + } + + public static Subsampling1DLayerBuilder builder(int... kernelSize) { + return innerBuilder() + .kernelSize(kernelSize); + } + + public static Subsampling1DLayerBuilder builder(int[] kernelSize, int[] stride) { + return innerBuilder() + .kernelSize(kernelSize) + .stride(stride); + } + + public static Subsampling1DLayerBuilder builder(int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .kernelSize(kernelSize) + .stride(stride) + .padding(padding); + } + + public static Subsampling1DLayerBuilder builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + ; + } + + public static Subsampling1DLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .poolingType(poolingType) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + ; + } + + public static Subsampling1DLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) { + return innerBuilder() + .poolingType(poolingType) + .kernelSize(kernelSize) + ; + } + + public static Subsampling1DLayerBuilder builder(PoolingType poolingType, int[] kernelSize) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()) + .kernelSize(kernelSize) + ; + } + + + public static Subsampling1DLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride) { + return innerBuilder() + .poolingType(poolingType) + .kernelSize(kernelSize) + .stride(stride) + ; + } + + public static Subsampling1DLayerBuilder builder(PoolingType poolingType, int[] kernelSize, int[] stride) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()) + .kernelSize(kernelSize) + .stride(stride) + ; + } + + @Override public org.deeplearning4j.nn.api.Layer instantiate( NeuralNetConfiguration conf, @@ -160,8 +244,8 @@ public class Subsampling1DLayer extends SubsamplingLayer { throw new IllegalStateException( "Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM"); } - ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); - ConvolutionUtils.validateCnnKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding()); + Convolution2DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + Convolution2DUtils.validateCnnKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding()); return l; } @@ -176,20 +260,20 @@ public class Subsampling1DLayer extends SubsamplingLayer { * @return */ @Override - public B kernelSize(int... kernelSize) { - super.kernelSize( ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]); + public B kernelSize(int @NonNull ... kernelSize) { + super.kernelSize(ValidationUtils.validate1NonNegative(new int[]{kernelSize[0]}, "kernelSize")[0]); //fix width = 1 return self(); } @Override - public B stride(int... stride) { - super.stride( ValidationUtils.validate1NonNegative(stride, "stride")[0]); + public B stride(@NotNull int... stride) { + super.stride( ValidationUtils.validate1NonNegative(new int[]{stride[0]}, "stride")[0]); return self(); } @Override - public B padding(int... padding) { - super.padding( ValidationUtils.validate1NonNegative(padding, "padding")[0]); + public B padding(@NotNull int... padding) { + super.padding( ValidationUtils.validate1NonNegative(new int[]{padding[0]}, "padding")); return self(); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java index 150419817..f5ba65dff 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -35,7 +36,7 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.util.Convolution3DUtils; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -45,7 +46,7 @@ import org.nd4j.linalg.learning.regularization.Regularization; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class Subsampling3DLayer extends NoParamLayer { @Builder.Default protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; @@ -318,7 +319,7 @@ public class Subsampling3DLayer extends NoParamLayer { throw new IllegalArgumentException("Invalid stride, must be length 3"); } - ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + Convolution2DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); Convolution3DUtils.validateCnn3DKernelStridePadding(l.getKernelSize(), l.getStride(), l.getPadding()); return l; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java index 93987e7ec..9dfb38036 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java @@ -24,19 +24,16 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import java.util.Collection; import java.util.Map; import lombok.*; -import lombok.experimental.Accessors; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.params.EmptyParamInitializer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; @@ -45,7 +42,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class SubsamplingLayer extends NoParamLayer { public static final CNN2DFormat DEFAULT_FORMAT = CNN2DFormat.NCHW; @@ -84,7 +81,8 @@ public class SubsamplingLayer extends NoParamLayer { * @param padding padding in the height and width dimensions */ @Builder.Default protected int[] padding = new int[] {0, 0}; - protected int pnorm; + + protected int pnorm; @Builder.Default protected double eps = 1e-8; /** * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper @@ -104,6 +102,7 @@ public class SubsamplingLayer extends NoParamLayer { */ protected @Builder.Default CNN2DFormat dataFormat = CNN2DFormat.NCHW; // default value for legacy reasons + protected @Builder.Default RNNFormat rnnFormat = RNNFormat.NCW; /** * When doing average pooling, should the padding values be included in the divisor or not?
* Not applicable for max and p-norm pooling.
@@ -127,6 +126,7 @@ public class SubsamplingLayer extends NoParamLayer { * average pooling */ @Builder.Default protected boolean avgPoolIncludePadInDivisor = true; + /** * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated * convolutions, which are also known as atrous convolutions.
@@ -301,7 +301,7 @@ public class SubsamplingLayer extends NoParamLayer { public void setNIn(InputType inputType, boolean override) { // No op: subsampling layer doesn't have nIn value if (!defaultValueOverridden || override) { - this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + this.rnnFormat = ((InputType.InputTypeRecurrent) inputType).getFormat(); defaultValueOverridden = true; } } @@ -355,14 +355,6 @@ public class SubsamplingLayer extends NoParamLayer { .build(); } - public int getPnorm() { - return pnorm; - } - - public double getEps() { - return eps; - } - public enum PoolingType { MAX, AVG, @@ -394,33 +386,33 @@ public class SubsamplingLayer extends NoParamLayer { return self(); } - public B eps(int eps) { + public B eps(double eps) { ValidationUtils.validateNonNegative(eps, "eps"); this.eps$value = eps; this.eps$set = true; return self(); } - public B kernelSize(int... kernelSize) { - this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"); + public B kernelSize(int @NonNull... kernelSize) { + this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize"); this.kernelSize$set = true; return self(); } - public B stride(int... stride) { - this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride"); + public B stride(int @NonNull ... stride) { + this.stride$value = ValidationUtils.validate2NonNegative(stride, true, "stride"); this.stride$set = true; return self(); } - public B padding(int... padding) { - this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding"); + public B padding(int @NonNull ... padding) { + this.padding$value = ValidationUtils.validate2NonNegative(padding, true, "padding"); this.padding$set = true; return self(); } - public B dilation(int... dilation) { - this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); + public B dilation(int @NonNull ... dilation) { + this.dilation$value = ValidationUtils.validate2NonNegative(dilation, true, "dilation"); this.dilation$set = true; return self(); } @@ -455,8 +447,8 @@ public class SubsamplingLayer extends NoParamLayer { throw new IllegalStateException( "Incorrect Subsampling config: p-norm must be set when using PoolingType.PNORM"); } - ConvolutionUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); - ConvolutionUtils.validateCnnKernelStridePadding( + Convolution2DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()); + Convolution2DUtils.validateCnnKernelStridePadding( l.getKernelSize(), l.getStride(), l.getPadding()); return l; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java index cf83a6164..dbd068d92 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling1D.java @@ -21,11 +21,13 @@ package org.deeplearning4j.nn.conf.layers; import java.util.Arrays; +import java.util.Collection; +import java.util.Map; import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.NoArgsConstructor; import lombok.ToString; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -37,12 +39,10 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class Upsampling1D extends BaseUpsamplingLayer { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java index febaa0fb1..f2082affe 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling2D.java @@ -20,8 +20,12 @@ package org.deeplearning4j.nn.conf.layers; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -33,15 +37,11 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; - -import java.util.Collection; -import java.util.Map; @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class Upsampling2D extends BaseUpsamplingLayer { @JsonDeserialize(using = LegacyIntArrayDeserializer.class) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java index 4aba279ae..8c828be3e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Upsampling3D.java @@ -20,8 +20,11 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -33,13 +36,11 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data +@NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class Upsampling3D extends BaseUpsamplingLayer { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java index a641a16f7..f9d9f70ff 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding1DLayer.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -39,7 +40,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class ZeroPadding1DLayer extends NoParamLayer { /** Padding value for left and right. Must be length 2 array */ @Builder.Default private int[] padding = new int[] {0, 0}; // Padding: left, right diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java index 02701dbb8..534401a96 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPadding3DLayer.java @@ -20,8 +20,11 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -34,13 +37,10 @@ import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Arrays; -import java.util.Collection; -import java.util.Map; - @Data +@NoArgsConstructor @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class ZeroPadding3DLayer extends NoParamLayer { /** * [padLeftD, padRightD, padLeftH, padRightH, padLeftW, padRightW] diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java index f6338d3bd..a6a1ffa1d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ZeroPaddingLayer.java @@ -25,6 +25,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -32,15 +33,16 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.optimize.api.TrainingListener; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @Data +@NoArgsConstructor @EqualsAndHashCode(callSuper = true) -@SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class ZeroPaddingLayer extends NoParamLayer { /** * @param padding Padding value for top, bottom, left, and right. Must be length 4 array @@ -105,7 +107,7 @@ public class ZeroPaddingLayer extends NoParamLayer { @Override public InputType getOutputType(int layerIndex, InputType inputType) { - int[] hwd = ConvolutionUtils.getHWDFromInputType(inputType); + int[] hwd = Convolution2DUtils.getHWDFromInputType(inputType); int outH = hwd[0] + padding[0] + padding[1]; int outW = hwd[1] + padding[2] + padding[3]; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java index 1e43e3031..6445d3a16 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping1D.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -41,6 +42,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; /** Amount of cropping to apply to both the top and the bottom of the input activations */ @Data @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class Cropping1D extends NoParamLayer { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java index cf3f6bcb6..17c8017de 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping2D.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -34,7 +35,7 @@ import org.deeplearning4j.nn.conf.layers.NoParamLayer; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.layers.convolution.Cropping2DLayer; import org.deeplearning4j.optimize.api.TrainingListener; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.ValidationUtils; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; @@ -42,6 +43,7 @@ import org.nd4j.linalg.api.ndarray.INDArray; @Data @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class Cropping2D extends NoParamLayer { /** Cropping amount for top/bottom/left/right (in that order). A length 4 array. */ @@ -111,7 +113,7 @@ public class Cropping2D extends NoParamLayer { @Override public InputType getOutputType(int layerIndex, InputType inputType) { - int[] hwd = ConvolutionUtils.getHWDFromInputType(inputType); + int[] hwd = Convolution2DUtils.getHWDFromInputType(inputType); int outH = hwd[0] - cropping[0] - cropping[1]; int outW = hwd[1] - cropping[2] - cropping[3]; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java index 9c4e70196..586efded7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/convolutional/Cropping3D.java @@ -20,8 +20,11 @@ package org.deeplearning4j.nn.conf.layers.convolutional; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -36,11 +39,10 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data +@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class Cropping3D extends NoParamLayer { @Builder.Default diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java index 6865119e4..5525adcf2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/ElementWiseMultiplicationLayer.java @@ -20,13 +20,15 @@ package org.deeplearning4j.nn.conf.layers.misc; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; @@ -35,14 +37,10 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - - @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class ElementWiseMultiplicationLayer extends org.deeplearning4j.nn.conf.layers.FeedForwardLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java index 83f66b966..e8133dadb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java @@ -27,6 +27,7 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.Setter; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -42,6 +43,7 @@ import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; @EqualsAndHashCode(callSuper = false) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class FrozenLayer extends LayerConfiguration { @@ -72,6 +74,7 @@ public class FrozenLayer extends LayerConfiguration { boolean initializeParams, DataType networkDataType) { + innerConfiguration.setNetConfiguration(conf); // Need to be able to instantiate a layer, from a config - for JSON -> net type situations org.deeplearning4j.nn.api.Layer underlying = innerConfiguration.instantiate( diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java index b7f42a5cf..0db8c4e3c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayerWithBackprop.java @@ -20,9 +20,12 @@ package org.deeplearning4j.nn.conf.layers.misc; -import lombok.Data; +import java.util.Collection; +import java.util.List; +import java.util.Set; import lombok.EqualsAndHashCode; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -34,14 +37,9 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Collection; -import java.util.List; -import java.util.Set; - @EqualsAndHashCode(callSuper = false) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class FrozenLayerWithBackprop extends BaseWrapperLayerConfiguration { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java index 62736066f..ae21368b0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/RepeatVector.java @@ -20,8 +20,11 @@ package org.deeplearning4j.nn.conf.layers.misc; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; @@ -35,13 +38,10 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class RepeatVector extends FeedForwardLayer { /** * Set repetition factor for RepeatVector layer diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/BoundingBoxesDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/BoundingBoxesDeserializer.java index 8dac21edc..ab221e1e7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/BoundingBoxesDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/BoundingBoxesDeserializer.java @@ -19,17 +19,15 @@ */ package org.deeplearning4j.nn.conf.layers.objdetect; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.serde.jackson.shaded.NDArrayTextDeSerializer; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonNode; - -import java.io.IOException; public class BoundingBoxesDeserializer extends JsonDeserializer { @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java index 7b4020dea..c9311edfe 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -46,9 +47,8 @@ import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.impl.LossL2; import org.nd4j.serde.jackson.shaded.NDArrayTextSerializer; - @EqualsAndHashCode(callSuper = false) -@SuperBuilder +@Jacksonized @SuperBuilder public class Yolo2OutputLayer extends LayerConfiguration { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java index 26e9e12ec..d12e858da 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/Bidirectional.java @@ -20,8 +20,15 @@ package org.deeplearning4j.nn.conf.layers.recurrent; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.Collection; +import java.util.List; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -40,17 +47,11 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -import java.util.Collection; -import java.util.List; -import java.util.Map; - -import static org.nd4j.linalg.indexing.NDArrayIndex.interval; @Data @EqualsAndHashCode(callSuper = true, exclude = {"initializer"}) @JsonIgnoreProperties({"initializer"}) +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class Bidirectional extends LayerConfiguration { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java index 6abc8b3c2..de024e391 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/LastTimeStep.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers.recurrent; +import java.util.Collection; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -31,7 +33,7 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; +@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") public class LastTimeStep extends BaseWrapperLayerConfiguration { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java index 832d48034..b190daaf2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java @@ -20,9 +20,12 @@ package org.deeplearning4j.nn.conf.layers.recurrent; +import java.util.Collection; +import java.util.Map; import lombok.*; import lombok.experimental.Accessors; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -36,12 +39,8 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - - @EqualsAndHashCode(callSuper = false) -@SuperBuilder +@Jacksonized @SuperBuilder public class SimpleRnn extends BaseRecurrentLayer { /** * If true (default = false): enable layer normalization on this layer diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java index d78b4acf3..0bb2f611a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers.recurrent; import java.util.Collection; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; @@ -35,7 +36,7 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class TimeDistributed extends BaseWrapperLayerConfiguration { @Getter @Setter private RNNFormat rnnDataFormat = RNNFormat.NCW; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index 12239673e..6419df8c0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import com.fasterxml.jackson.annotation.JsonIgnore; import java.util.Collection; import java.util.List; import java.util.Map; @@ -52,7 +53,8 @@ import org.nd4j.linalg.learning.regularization.WeightDecay; @EqualsAndHashCode(callSuper = true, doNotUseGetters = true) @NoArgsConstructor @SuperBuilder -public abstract class AbstractSameDiffLayer extends LayerConfiguration { +public abstract class AbstractSameDiffLayer extends LayerConfiguration + implements org.deeplearning4j.nn.api.ITraininableLayerConfiguration { /** * The regularization for the parameters (excluding biases) - for example {@link WeightDecay} @@ -63,16 +65,14 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { * @param regularization Regularization to apply for the network parameters/weights (excluding * biases) */ - @Getter - protected List regularization; + @Getter protected List regularization; /** * The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the * regularization for the biases only - for example {@link WeightDecay} * * @param regularizationBias Regularization to apply for the network biases only */ - @Getter - protected List regularizationBias; + @Getter protected List regularizationBias; /** * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link * org.nd4j.linalg.learning.config.Nesterovs} @@ -87,21 +87,23 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { * @param biasUpdater Updater to use for bias parameters */ protected @Getter @Setter IUpdater biasUpdater; -@Getter @Setter - protected GradientNormalization gradientNormalization; -@Getter @Setter - protected double gradientNormalizationThreshold = Double.NaN; -@Getter @Setter - private SDLayerParams layerParams; + + @Getter @Setter protected GradientNormalization gradientNormalization; + @Getter @Setter protected double gradientNormalizationThreshold = Double.NaN; + @Getter @Setter private SDLayerParams layerParams; + + @Getter @Setter private DataType dataType; @Override public void runInheritance(@NotNull NeuralNetConfiguration conf) { super.runInheritance(conf); - if (this.biasUpdater == null ) this.biasUpdater = conf.getBiasUpdater(); + if (this.biasUpdater == null) this.biasUpdater = conf.getBiasUpdater(); if (this.updater == null) this.updater = conf.getUpdater(); - if (this.regularizationBias == null || regularizationBias.isEmpty()) this.regularizationBias = conf.getRegularizationBias(); - if (this.regularization == null || regularization.isEmpty()) this.regularization = conf.getRegularization(); - // if( this.weightInit == null) this.weightInit = conf.getWeightInit(); + if (this.regularizationBias == null || regularizationBias.isEmpty()) + this.regularizationBias = conf.getRegularizationBias(); + if (this.regularization == null || regularization.isEmpty()) + this.regularization = conf.getRegularization(); + // if( this.weightInit == null) this.weightInit = conf.getWeightInit(); if (this.gradientNormalization == null) this.gradientNormalization = conf.getGradientNormalization(); // if(this.weightInit == null) this.weightInit = conf.getWeightInit(); @@ -109,6 +111,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { this.gradientNormalizationThreshold = conf.getGradientNormalizationThreshold(); } } + @Override public List getRegularizationByParam(String paramName) { if (layerParams.isWeightParam(paramName)) { @@ -119,6 +122,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { return null; } + @JsonIgnore public SDLayerParams getLayerParams() { if (layerParams == null) { layerParams = new SDLayerParams(); @@ -138,7 +142,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { return null; } - /** * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String, * long...)} and {@link SDLayerParams#addBiasParam(String, long...)} @@ -207,7 +210,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array); } - /** * This method generates an "all ones" mask array for use in the SameDiff model when none is * provided. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java index 99b3f85f8..2000d7463 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDLayerParams.java @@ -20,17 +20,15 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.io.Serializable; +import java.util.*; import lombok.Data; import lombok.NoArgsConstructor; import lombok.NonNull; import org.nd4j.common.base.Preconditions; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -import java.io.Serializable; -import java.util.*; @JsonIgnoreProperties({"paramsList", "weightParamsList", "biasParamsList"}) @NoArgsConstructor diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDVertexParams.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDVertexParams.java index 1a3c51c3a..2cff3a650 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDVertexParams.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SDVertexParams.java @@ -20,11 +20,10 @@ package org.deeplearning4j.nn.conf.layers.samediff; -import lombok.Data; -import org.nd4j.common.base.Preconditions; - import java.util.Arrays; import java.util.List; +import lombok.Data; +import org.nd4j.common.base.Preconditions; @Data public class SDVertexParams extends SDLayerParams { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java index 83f41e663..105b44917 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import java.util.Map; import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -27,7 +28,6 @@ import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Map; @NoArgsConstructor @SuperBuilder public abstract class SameDiffLambdaLayer extends SameDiffLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaVertex.java index 7ec4fb2d5..32269d2cd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLambdaVertex.java @@ -20,15 +20,13 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import java.util.*; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.*; - - public abstract class SameDiffLambdaVertex extends SameDiffVertex { protected transient VertexInputs inputs; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java index cd53db1fd..95fa33a6b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; @@ -38,17 +41,15 @@ import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; - - @EqualsAndHashCode(callSuper = true) @Data @NoArgsConstructor @SuperBuilder public abstract class SameDiffLayer extends AbstractSameDiffLayer { + public static abstract class SameDiffLayerBuilder> extends AbstractSameDiffLayerBuilder { + + } /** * WeightInit, default is XAVIER. */ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayerUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayerUtils.java index 295d48b87..28bfb3583 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayerUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayerUtils.java @@ -20,11 +20,10 @@ package org.deeplearning4j.nn.conf.layers.samediff; -import org.nd4j.linalg.activations.Activation; -import org.nd4j.linalg.activations.IActivation; - import java.util.HashMap; import java.util.Map; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.activations.IActivation; public class SameDiffLayerUtils { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java index 7e8e0834a..f26b8efc9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffOutputLayer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import java.util.Collection; +import java.util.Map; import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -30,8 +32,6 @@ import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; @NoArgsConstructor @SuperBuilder public abstract class SameDiffOutputLayer extends AbstractSameDiffLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java index 9d6144c90..4d44e2a55 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java @@ -20,10 +20,12 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import java.util.List; +import java.util.Map; import lombok.Data; import lombok.EqualsAndHashCode; -import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; +import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.GraphVertex; @@ -34,15 +36,12 @@ import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.layers.samediff.SameDiffGraphVertex; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.util.ArrayUtil; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -import org.nd4j.common.primitives.Pair; -import org.nd4j.common.util.ArrayUtil; - -import java.util.List; -import java.util.Map; @Data @EqualsAndHashCode(callSuper = false) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java index 50b71e837..87210ae8f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskLayer.java @@ -20,8 +20,11 @@ package org.deeplearning4j.nn.conf.layers.util; -import lombok.NoArgsConstructor; +import java.util.Collection; +import java.util.List; +import java.util.Map; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -35,10 +38,7 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.Collection; -import java.util.List; -import java.util.Map; - +@Jacksonized @SuperBuilder public class MaskLayer extends NoParamLayer { @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java index 3161bfb02..bdeb22bf4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/util/MaskZeroLayer.java @@ -20,24 +20,21 @@ package org.deeplearning4j.nn.conf.layers.util; +import java.util.Collection; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Collection; - @EqualsAndHashCode(callSuper = false) -@SuperBuilder +@Jacksonized @SuperBuilder public class MaskZeroLayer extends BaseWrapperLayerConfiguration { @Builder.Default @Getter @Setter private double maskingValue = 0.0; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/CompositeReconstructionDistribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/CompositeReconstructionDistribution.java index ca96fb46e..1b9c04d0e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/CompositeReconstructionDistribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/CompositeReconstructionDistribution.java @@ -20,6 +20,10 @@ package org.deeplearning4j.nn.conf.layers.variational; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import lombok.Data; import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.api.ndarray.INDArray; @@ -27,11 +31,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.lossfunctions.ILossFunction; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; @Data public class CompositeReconstructionDistribution implements ReconstructionDistribution { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/LossFunctionWrapper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/LossFunctionWrapper.java index 3622018b6..fc348068d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/LossFunctionWrapper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/LossFunctionWrapper.java @@ -20,22 +20,22 @@ package org.deeplearning4j.nn.conf.layers.variational; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; -import com.fasterxml.jackson.annotation.JsonProperty; @Data public class LossFunctionWrapper implements ReconstructionDistribution { - private final IActivation activationFn; + private final IActivation activation; private final ILossFunction lossFunction; - public LossFunctionWrapper(@JsonProperty("activationFn") IActivation activationFn, + public LossFunctionWrapper(@JsonProperty("activation") IActivation activation, @JsonProperty("lossFunction") ILossFunction lossFunction) { - this.activationFn = activationFn; + this.activation = activation; this.lossFunction = lossFunction; } @@ -59,17 +59,17 @@ public class LossFunctionWrapper implements ReconstructionDistribution { //NOTE: The returned value here is NOT negative log probability, but it (the loss function value) // is equivalent, in terms of being something we want to minimize... - return lossFunction.computeScore(x, preOutDistributionParams, activationFn, null, average); + return lossFunction.computeScore(x, preOutDistributionParams, activation, null, average); } @Override public INDArray exampleNegLogProbability(INDArray x, INDArray preOutDistributionParams) { - return lossFunction.computeScoreArray(x, preOutDistributionParams, activationFn, null); + return lossFunction.computeScoreArray(x, preOutDistributionParams, activation, null); } @Override public INDArray gradient(INDArray x, INDArray preOutDistributionParams) { - return lossFunction.computeGradient(x, preOutDistributionParams, activationFn, null); + return lossFunction.computeGradient(x, preOutDistributionParams, activation, null); } @Override @@ -82,11 +82,11 @@ public class LossFunctionWrapper implements ReconstructionDistribution { public INDArray generateAtMean(INDArray preOutDistributionParams) { //Loss functions: not probabilistic -> not random INDArray out = preOutDistributionParams.dup(); - return activationFn.getActivation(out, true); + return activation.getActivation(out, true); } @Override public String toString() { - return "LossFunctionWrapper(afn=" + activationFn + "," + lossFunction + ")"; + return "LossFunctionWrapper(afn=" + activation + "," + lossFunction + ")"; } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/ReconstructionDistribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/ReconstructionDistribution.java index 94b9a77d8..e8197b9c5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/ReconstructionDistribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/ReconstructionDistribution.java @@ -20,11 +20,9 @@ package org.deeplearning4j.nn.conf.layers.variational; -import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; - import java.io.Serializable; +import org.nd4j.linalg.api.ndarray.INDArray; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") public interface ReconstructionDistribution extends Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java index bf2ae8255..611006aca 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -46,7 +47,7 @@ import org.nd4j.linalg.lossfunctions.LossFunctions; @Data @EqualsAndHashCode(callSuper = true) -@SuperBuilder +@Jacksonized @SuperBuilder public class VariationalAutoencoder extends BasePretrainNetwork { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/LayerMemoryReport.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/LayerMemoryReport.java index 771df513c..fb0ee6538 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/LayerMemoryReport.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/LayerMemoryReport.java @@ -20,15 +20,13 @@ package org.deeplearning4j.nn.conf.memory; +import java.util.HashMap; +import java.util.Map; import lombok.*; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.buffer.DataType; -import java.util.HashMap; -import java.util.Map; - @Data @AllArgsConstructor @NoArgsConstructor diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/MemoryReport.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/MemoryReport.java index a0a6103d8..3afafd1e5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/MemoryReport.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/MemoryReport.java @@ -20,20 +20,18 @@ package org.deeplearning4j.nn.conf.memory; -import lombok.EqualsAndHashCode; -import lombok.NonNull; -import org.deeplearning4j.nn.conf.CacheMode; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.nd4j.linalg.api.buffer.DataBuffer; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.api.buffer.util.DataTypeUtil; import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.fasterxml.jackson.core.JsonProcessingException; - import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.Map; +import lombok.EqualsAndHashCode; +import lombok.NonNull; +import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.serde.CavisMapper; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.buffer.util.DataTypeUtil; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY) @EqualsAndHashCode @@ -151,7 +149,7 @@ public abstract class MemoryReport { public String toJson() { try { - return NeuralNetConfiguration.mapper().writeValueAsString(this); + return CavisMapper.getMapper(CavisMapper.Type.JSON).writeValueAsString(this); } catch (JsonProcessingException e) { throw new RuntimeException(e); } @@ -159,7 +157,7 @@ public abstract class MemoryReport { public String toYaml() { try { - return NeuralNetConfiguration.mapperYaml().writeValueAsString(this); + return CavisMapper.getMapper(CavisMapper.Type.YAML).writeValueAsString(this); } catch (JsonProcessingException e) { throw new RuntimeException(e); } @@ -167,7 +165,7 @@ public abstract class MemoryReport { public static MemoryReport fromJson(String json) { try { - return NeuralNetConfiguration.mapper().readValue(json, MemoryReport.class); + return CavisMapper.getMapper(CavisMapper.Type.JSON).readValue(json, MemoryReport.class); } catch (IOException e) { throw new RuntimeException(e); } @@ -175,7 +173,7 @@ public abstract class MemoryReport { public static MemoryReport fromYaml(String yaml) { try { - return NeuralNetConfiguration.mapperYaml().readValue(yaml, MemoryReport.class); + return CavisMapper.getMapper(CavisMapper.Type.JSON).readValue(yaml, MemoryReport.class); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/NetworkMemoryReport.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/NetworkMemoryReport.java index d3f7b1955..fa6ce0094 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/NetworkMemoryReport.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/memory/NetworkMemoryReport.java @@ -20,19 +20,17 @@ package org.deeplearning4j.nn.conf.memory; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.text.DecimalFormat; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NonNull; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.nd4j.linalg.api.buffer.DataBuffer; import org.nd4j.linalg.api.buffer.DataType; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.text.DecimalFormat; -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.Map; @Getter @EqualsAndHashCode(callSuper = true) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java index 9e9f123bc..5eaaad062 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/misc/DummyConfig.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.misc; +import java.util.List; import lombok.AllArgsConstructor; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.GradientNormalization; @@ -28,8 +29,6 @@ import org.nd4j.linalg.learning.config.IUpdater; import org.nd4j.linalg.learning.config.NoOp; import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.List; - @AllArgsConstructor public class DummyConfig implements ITraininableLayerConfiguration { private final String name; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java index fb7757e87..54b7279ca 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/ocnn/OCNNOutputLayer.java @@ -48,7 +48,7 @@ public class OCNNOutputLayer extends BaseOutputLayer { * The hidden layer size for the one class neural network. Note this would be nOut on a dense * layer. NOut in this neural net is always set to 1 though. */ - @Builder.Default @Getter private int hiddenLayerSize; // embedded hidden layer size aka "K" + @Getter private int hiddenLayerSize; // embedded hidden layer size aka "K" /** For nu definition see the paper */ @Builder.Default @Getter private double nu = 0.04; /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java index f621c6d21..649dbdde3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/BaseInputPreProcessor.java @@ -22,8 +22,8 @@ package org.deeplearning4j.nn.conf.preprocessor; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.api.ndarray.INDArray; /** * @author Adam Gibson diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/Cnn3DToFeedForwardPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/Cnn3DToFeedForwardPreProcessor.java index ea57d875a..010ebe578 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/Cnn3DToFeedForwardPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/Cnn3DToFeedForwardPreProcessor.java @@ -20,6 +20,11 @@ package org.deeplearning4j.nn.conf.preprocessor; +import static org.nd4j.linalg.api.shape.Shape.hasDefaultStridesForShape; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.Data; import lombok.val; import org.deeplearning4j.nn.api.MaskState; @@ -28,14 +33,8 @@ import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.Convolution3D; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; - -import static org.nd4j.linalg.api.shape.Shape.hasDefaultStridesForShape; +import org.nd4j.linalg.api.ndarray.INDArray; @Data public class Cnn3DToFeedForwardPreProcessor implements InputPreProcessor { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java index 8f7be97d5..cafd4d059 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToFeedForwardPreProcessor.java @@ -20,21 +20,20 @@ package org.deeplearning4j.nn.conf.preprocessor; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.Data; import lombok.val; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.shape.Shape; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; @Data public class CnnToFeedForwardPreProcessor implements InputPreProcessor { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java index 42a38e786..cf58b81a9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/CnnToRnnPreProcessor.java @@ -20,22 +20,21 @@ package org.deeplearning4j.nn.conf.preprocessor; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.*; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.TimeSeriesUtils; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.shape.Shape; import org.nd4j.common.primitives.Pair; import org.nd4j.common.util.ArrayUtil; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.shape.Shape; @Data @EqualsAndHashCode(exclude = {"product"}) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java index be8b50316..cc908c467 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/ComposableInputPreProcessor.java @@ -20,17 +20,17 @@ package org.deeplearning4j.nn.conf.preprocessor; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.workspace.ArrayType; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.api.ndarray.INDArray; @Data @EqualsAndHashCode(callSuper = false) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnn3DPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnn3DPreProcessor.java index 9d667cc07..0b4187e30 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnn3DPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnn3DPreProcessor.java @@ -20,21 +20,20 @@ package org.deeplearning4j.nn.conf.preprocessor; +import static org.nd4j.linalg.api.shape.Shape.hasDefaultStridesForShape; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.*; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; import org.nd4j.common.util.ArrayUtil; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; - -import static org.nd4j.linalg.api.shape.Shape.hasDefaultStridesForShape; +import org.nd4j.linalg.api.ndarray.INDArray; @Data @EqualsAndHashCode(exclude = {"shape"}) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java index 111e253c5..810ae513a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToCnnPreProcessor.java @@ -20,20 +20,19 @@ package org.deeplearning4j.nn.conf.preprocessor; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.*; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.shape.Shape; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.common.primitives.Pair; import org.nd4j.common.util.ArrayUtil; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.shape.Shape; @Data @EqualsAndHashCode(exclude = {"shape"}) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java index cdc0bfab8..c165f114f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/FeedForwardToRnnPreProcessor.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.preprocessor; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.Data; import lombok.NoArgsConstructor; import lombok.val; @@ -27,14 +29,12 @@ import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.TimeSeriesUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.shape.Shape; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; -import com.fasterxml.jackson.annotation.JsonProperty; -import java.util.Arrays; @Data @NoArgsConstructor diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java index af53377e3..ec1097e39 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToCnnPreProcessor.java @@ -20,21 +20,20 @@ package org.deeplearning4j.nn.conf.preprocessor; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.*; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.TimeSeriesUtils; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.api.shape.Shape; import org.nd4j.common.primitives.Pair; import org.nd4j.common.util.ArrayUtil; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.shape.Shape; @Data @EqualsAndHashCode(exclude = {"product"}) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java index 0afc0d82f..c575741ac 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/preprocessor/RnnToFeedForwardPreProcessor.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.conf.preprocessor; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.Data; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -28,15 +30,12 @@ import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.TimeSeriesUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.shape.Shape; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; @Data @Slf4j diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java index 01e1fc00a..96ce4097d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java @@ -20,8 +20,19 @@ package org.deeplearning4j.nn.conf.serde; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.deser.ResolvableDeserializer; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.Updater; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; @@ -36,19 +47,6 @@ import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.WeightDecay; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.impl.*; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.deser.ResolvableDeserializer; -import com.fasterxml.jackson.databind.deser.std.StdDeserializer; -import com.fasterxml.jackson.databind.node.ObjectNode; - -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; @Slf4j public abstract class BaseNetConfigDeserializer extends StdDeserializer implements ResolvableDeserializer { @@ -251,7 +249,7 @@ public abstract class BaseNetConfigDeserializer extends StdDeserializer im Distribution d = null; if(w == WeightInit.DISTRIBUTION && on.has("dist")){ String dist = on.get("dist").toString(); - d = NeuralNetConfiguration.mapper().readValue(dist, Distribution.class); + d = CavisMapper.getMapper(CavisMapper.Type.JSON).readValue(dist, Distribution.class); } IWeightInit iwi = w.getWeightInitFunction(d); baseLayerConfiguration.setWeightInit(iwi); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/CavisMapper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/CavisMapper.java new file mode 100644 index 000000000..1181fbda4 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/CavisMapper.java @@ -0,0 +1,60 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package org.deeplearning4j.nn.conf.serde; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.databind.json.JsonMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import lombok.NonNull; + +public class CavisMapper { + + public static ObjectMapper getMapper(@NonNull Type type) { + ObjectMapper mapper; + switch (type) { + case JSON: + mapper = JsonMapper.builder() + .enable(SerializationFeature.INDENT_OUTPUT) + .enable(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY) + .enable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES) + .enable(DeserializationFeature.FAIL_ON_INVALID_SUBTYPE) + .enable(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES) + .build(); + break; + case YAML: + mapper = YAMLMapper.builder().build(); + break; + default: + throw new RuntimeException("Mapper type not recognised."); + } + + return mapper; + } + + public static enum Type { + JSON, + YAML + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java index a3ea58079..0299619d9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/ComputationGraphConfigurationDeserializer.java @@ -20,9 +20,18 @@ package org.deeplearning4j.nn.conf.serde; +import com.fasterxml.jackson.core.JsonLocation; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.io.IOException; +import java.io.StringReader; +import java.util.*; import org.apache.commons.io.IOUtils; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.dropout.Dropout; import org.deeplearning4j.nn.conf.graph.GraphVertex; import org.deeplearning4j.nn.conf.graph.LayerVertex; @@ -32,18 +41,6 @@ import org.deeplearning4j.nn.conf.layers.BatchNormalization; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.params.BatchNormalizationParamInitializer; -import com.fasterxml.jackson.core.JsonLocation; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ObjectNode; - -import java.io.IOException; -import java.io.StringReader; -import java.util.*; - public class ComputationGraphConfigurationDeserializer extends BaseNetConfigDeserializer { @@ -96,7 +93,7 @@ public class ComputationGraphConfigurationDeserializer } jsonSubString = s.substring((int) charOffsetStart - 1, charOffsetEnd.intValue()); - ObjectMapper om = NeuralNetConfiguration.mapper(); + ObjectMapper om = CavisMapper.getMapper(CavisMapper.Type.JSON); JsonNode rootNode = om.readTree(jsonSubString); ObjectNode verticesNode = (ObjectNode) rootNode.get("vertices"); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java index 4f7ac3b78..e050734c9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/JsonMappers.java @@ -20,14 +20,14 @@ package org.deeplearning4j.nn.conf.serde; -import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.serde.legacy.LegacyJsonFormat; import com.fasterxml.jackson.databind.*; import com.fasterxml.jackson.databind.deser.BeanDeserializerModifier; import com.fasterxml.jackson.databind.module.SimpleModule; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.serde.legacy.LegacyJsonFormat; @Slf4j public class JsonMappers { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java index 08d7f4c6c..57f70f723 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/NeuralNetConfigurationDeserializer.java @@ -20,6 +20,17 @@ package org.deeplearning4j.nn.conf.serde; +import com.fasterxml.jackson.core.JsonLocation; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.io.IOException; +import java.io.StringReader; +import java.util.Set; import org.apache.commons.io.IOUtils; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.dropout.Dropout; @@ -29,18 +40,6 @@ import org.deeplearning4j.nn.conf.layers.BatchNormalization; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.weightnoise.DropConnect; import org.deeplearning4j.nn.params.BatchNormalizationParamInitializer; -import com.fasterxml.jackson.core.JsonLocation; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ArrayNode; -import com.fasterxml.jackson.databind.node.ObjectNode; - -import java.io.IOException; -import java.io.StringReader; -import java.util.Set; public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserializer { @@ -78,7 +77,7 @@ public class NeuralNetConfigurationDeserializer extends BaseNetConfigDeserialize } String jsonSubString = s.substring((int) charOffsetStart - 1, (int) charOffsetEnd); - ObjectMapper om = NeuralNetConfiguration.mapper(); + ObjectMapper om = CavisMapper.getMapper(CavisMapper.Type.JSON); JsonNode rootNode = om.readTree(jsonSubString); ArrayNode confsNode = (ArrayNode)rootNode.get("confs"); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/format/DataFormatDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/format/DataFormatDeserializer.java index e9397126a..0e98fd7ca 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/format/DataFormatDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/format/DataFormatDeserializer.java @@ -19,16 +19,14 @@ */ package org.deeplearning4j.nn.conf.serde.format; -import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.DataFormat; -import org.deeplearning4j.nn.conf.RNNFormat; import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonDeserializer; import com.fasterxml.jackson.databind.JsonNode; - import java.io.IOException; +import org.deeplearning4j.nn.conf.CNN2DFormat; +import org.deeplearning4j.nn.conf.DataFormat; +import org.deeplearning4j.nn.conf.RNNFormat; public class DataFormatDeserializer extends JsonDeserializer { @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/format/DataFormatSerializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/format/DataFormatSerializer.java index 835f15120..213dbd43a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/format/DataFormatSerializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/format/DataFormatSerializer.java @@ -19,14 +19,11 @@ */ package org.deeplearning4j.nn.conf.serde.format; -import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.DataFormat; -import org.deeplearning4j.nn.conf.RNNFormat; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonSerializer; import com.fasterxml.jackson.databind.SerializerProvider; - import java.io.IOException; +import org.deeplearning4j.nn.conf.DataFormat; public class DataFormatSerializer extends JsonSerializer { @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyIntArrayDeserializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyIntArrayDeserializer.java index 804655669..31573a26a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyIntArrayDeserializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyIntArrayDeserializer.java @@ -21,12 +21,10 @@ package org.deeplearning4j.nn.conf.serde.legacy; import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonDeserializer; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; - import java.io.IOException; public class LegacyIntArrayDeserializer extends JsonDeserializer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyJsonFormat.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyJsonFormat.java index ceb645be7..6cae0d069 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyJsonFormat.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/serde/legacy/LegacyJsonFormat.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.conf.serde.legacy; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.databind.ObjectMapper; import lombok.AccessLevel; import lombok.NoArgsConstructor; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -43,9 +46,6 @@ import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.*; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.impl.*; -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.databind.ObjectMapper; public class LegacyJsonFormat { @@ -100,7 +100,7 @@ public class LegacyJsonFormat { @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.WRAPPER_OBJECT) @JsonSubTypes(value = {@JsonSubTypes.Type(value = AutoEncoder.class, name = "autoEncoder"), @JsonSubTypes.Type(value = ConvolutionLayer.class, name = "convolution"), - @JsonSubTypes.Type(value = Convolution1DLayer.class, name = "convolution1d"), + @JsonSubTypes.Type(value = Convolution1D.class, name = "convolution1d"), @JsonSubTypes.Type(value = GravesLSTM.class, name = "gravesLSTM"), @JsonSubTypes.Type(value = LSTM.class, name = "LSTM"), @JsonSubTypes.Type(value = GravesBidirectionalLSTM.class, name = "gravesBidirectionalLSTM"), diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/stepfunctions/StepFunction.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/stepfunctions/StepFunction.java index 3fbbefc3a..ca42bd92b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/stepfunctions/StepFunction.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/stepfunctions/StepFunction.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.fasterxml.jackson.annotation.JsonTypeInfo.As; import com.fasterxml.jackson.annotation.JsonTypeInfo.Id; - import java.io.Serializable; @JsonTypeInfo(use = Id.NAME, include = As.WRAPPER_OBJECT) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java index 926d2017d..410ebe9d3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/DropConnect.java @@ -20,16 +20,16 @@ package org.deeplearning4j.nn.conf.weightnoise; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.random.impl.DropOut; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.schedule.ISchedule; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; -import com.fasterxml.jackson.annotation.JsonProperty; @Data public class DropConnect implements IWeightNoise { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java index c6c77d3d2..a1f7ccac3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/IWeightNoise.java @@ -20,12 +20,11 @@ package org.deeplearning4j.nn.conf.weightnoise; -import org.deeplearning4j.nn.api.Layer; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import com.fasterxml.jackson.annotation.JsonTypeInfo; - import java.io.Serializable; +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.linalg.api.ndarray.INDArray; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") public interface IWeightNoise extends Serializable, Cloneable{ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java index cccb3d476..f3c1c9580 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/weightnoise/WeightNoise.java @@ -20,18 +20,18 @@ package org.deeplearning4j.nn.conf.weightnoise; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.Data; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.distribution.Distributions; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.AddOp; import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.MulOp; import org.nd4j.linalg.factory.Nd4j; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; -import com.fasterxml.jackson.annotation.JsonProperty; @Data public class WeightNoise implements IWeightNoise { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/DefaultGradient.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/DefaultGradient.java index 3f147df0a..70b6e3bff 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/DefaultGradient.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/DefaultGradient.java @@ -20,12 +20,11 @@ package org.deeplearning4j.nn.gradient; +import java.util.*; import lombok.Setter; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import java.util.*; - public class DefaultGradient implements Gradient { public static final char DEFAULT_FLATTENING_ORDER = 'f'; private final Map gradients = new LinkedHashMap<>(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/Gradient.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/Gradient.java index f09ae540d..dd6ecb5d6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/Gradient.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/gradient/Gradient.java @@ -20,12 +20,10 @@ package org.deeplearning4j.nn.gradient; -import org.nd4j.linalg.api.ndarray.INDArray; - import java.io.Serializable; -import java.util.List; import java.util.Map; import java.util.Set; +import org.nd4j.linalg.api.ndarray.INDArray; public interface Gradient extends Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index e041ca68f..405da9597 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.graph; +import java.io.*; +import java.util.*; +import java.util.concurrent.atomic.AtomicLong; import lombok.Getter; import lombok.NonNull; import lombok.Setter; @@ -29,12 +32,8 @@ import net.brutex.ai.dnn.networks.ArtificialNeuralNetwork; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.bytedeco.javacpp.Pointer; -import org.deeplearning4j.exception.DL4JInvalidConfigException; -import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.util.*; -import org.nd4j.adapters.OutputAdapter; -import org.nd4j.linalg.dataset.AsyncMultiDataSetIterator; import org.deeplearning4j.exception.DL4JException; +import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.nn.api.*; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.api.layers.IOutputLayer; @@ -42,6 +41,7 @@ import org.deeplearning4j.nn.api.layers.RecurrentLayer; import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -63,7 +63,12 @@ import org.deeplearning4j.optimize.Solver; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.optimize.solvers.accumulation.GradientsAccumulator; +import org.deeplearning4j.util.*; +import org.nd4j.adapters.OutputAdapter; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.primitives.Triple; +import org.nd4j.common.util.OneTimeLogger; import org.nd4j.evaluation.IEvaluation; import org.nd4j.evaluation.classification.Evaluation; import org.nd4j.evaluation.classification.ROC; @@ -71,12 +76,14 @@ import org.nd4j.evaluation.classification.ROCMultiClass; import org.nd4j.evaluation.regression.RegressionEvaluation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; +import org.nd4j.linalg.api.memory.abstracts.DummyWorkspace; import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration; import org.nd4j.linalg.api.memory.enums.AllocationPolicy; import org.nd4j.linalg.api.memory.enums.LearningPolicy; import org.nd4j.linalg.api.memory.enums.ResetPolicy; import org.nd4j.linalg.api.memory.enums.SpillPolicy; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.AsyncMultiDataSetIterator; import org.nd4j.linalg.dataset.adapter.MultiDataSetIteratorAdapter; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.DataSetUtil; @@ -92,17 +99,9 @@ import org.nd4j.linalg.heartbeat.reports.Task; import org.nd4j.linalg.heartbeat.utils.EnvironmentUtils; import org.nd4j.linalg.heartbeat.utils.TaskUtils; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.linalg.api.memory.abstracts.DummyWorkspace; -import org.nd4j.common.primitives.Pair; -import org.nd4j.common.primitives.Triple; import org.nd4j.linalg.schedule.ISchedule; import org.nd4j.linalg.workspace.ND4JWorkspaceException; import org.nd4j.linalg.workspace.WorkspaceUtils; -import org.nd4j.common.util.OneTimeLogger; - -import java.io.*; -import java.util.*; -import java.util.concurrent.atomic.AtomicLong; @Slf4j public class ComputationGraph extends ArtificialNeuralNetwork implements Serializable { @@ -2383,8 +2382,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali //Standard feed-forward case if(i > 0 && current.hasLayer() && prev.hasLayer() && - ConvolutionUtils.layerHasConvolutionLayout(prev.getLayer().getLayerConfiguration()) - && ConvolutionUtils.layerHasConvolutionLayout(current.getLayer().getLayerConfiguration())) { + Convolution2DUtils.layerHasConvolutionLayout(prev.getLayer().getLayerConfiguration()) + && Convolution2DUtils.layerHasConvolutionLayout(current.getLayer().getLayerConfiguration())) { /** * Not QUITE the proper fix, but getting close. @@ -2392,8 +2391,8 @@ public class ComputationGraph extends ArtificialNeuralNetwork implements Seriali * Need to play with output sizes a bit to make sure we put the right parameters in there to get * correct behavior. */ - CNN2DFormat preLayerFormat = ConvolutionUtils.getFormatForLayer(prev.getLayer().getLayerConfiguration()); - CNN2DFormat currLayerFormat = ConvolutionUtils.getFormatForLayer(current.getLayer().getLayerConfiguration()); + CNN2DFormat preLayerFormat = Convolution2DUtils.getFormatForLayer(prev.getLayer().getLayerConfiguration()); + CNN2DFormat currLayerFormat = Convolution2DUtils.getFormatForLayer(current.getLayer().getLayerConfiguration()); if(preLayerFormat != currLayerFormat) { int inputIdx = -1; for(int inputVertex = 0; inputVertex < current.getInputVertices().length; inputVertex++) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/util/ComputationGraphUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/util/ComputationGraphUtil.java index dd0fb43b0..b56ae9b3c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/util/ComputationGraphUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/util/ComputationGraphUtil.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.graph.util; +import java.io.Serializable; +import java.util.List; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.adapter.MultiDataSetIteratorAdapter; import org.nd4j.linalg.dataset.api.DataSet; @@ -27,9 +29,6 @@ import org.nd4j.linalg.dataset.api.MultiDataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -import java.io.Serializable; -import java.util.List; - public class ComputationGraphUtil { private ComputationGraphUtil() {} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/util/GraphIndices.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/util/GraphIndices.java index 13a5edc25..e9e2a7dbf 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/util/GraphIndices.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/util/GraphIndices.java @@ -20,12 +20,11 @@ package org.deeplearning4j.nn.graph.util; +import java.util.Map; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; -import java.util.Map; - @Data @AllArgsConstructor @Builder diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java index 269e67ac0..8d937f399 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseGraphVertex.java @@ -20,18 +20,17 @@ package org.deeplearning4j.nn.graph.vertex; +import java.util.Collections; +import java.util.Map; import lombok.Data; import lombok.Getter; import lombok.Setter; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.impl.LayerVertex; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Collections; -import java.util.Map; @Data public abstract class BaseGraphVertex implements GraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java index d73315645..4357543d6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/BaseWrapperVertex.java @@ -20,15 +20,14 @@ package org.deeplearning4j.nn.graph.vertex; +import java.util.Map; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; - -import java.util.Map; +import org.nd4j.linalg.api.ndarray.INDArray; public abstract class BaseWrapperVertex implements GraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java index 51bd7ee62..6d9039a56 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/GraphVertex.java @@ -20,16 +20,15 @@ package org.deeplearning4j.nn.graph.vertex; +import java.io.Serializable; +import java.util.Map; import org.deeplearning4j.nn.api.ITrainableLayer; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.io.Serializable; -import java.util.Map; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.api.ndarray.INDArray; public interface GraphVertex extends ITrainableLayer, Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/VertexIndices.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/VertexIndices.java index db91a1ffd..521e5f151 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/VertexIndices.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/VertexIndices.java @@ -20,11 +20,10 @@ package org.deeplearning4j.nn.graph.vertex; +import java.io.Serializable; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; -import java.io.Serializable; - @AllArgsConstructor @EqualsAndHashCode public class VertexIndices implements Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java index f678fb782..e2a0e64fd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ElementWiseVertex.java @@ -20,12 +20,16 @@ package org.deeplearning4j.nn.graph.vertex.impl; +import java.util.Arrays; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; @@ -39,11 +43,6 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.conditions.Conditions; import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Arrays; public class ElementWiseVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java index a3f45121a..6ac2c12b8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/FrozenVertex.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.graph.vertex.impl; import java.util.Map; - import lombok.EqualsAndHashCode; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.misc.DummyConfig; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java index b2e9e0dac..5f3421f70 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/InputVertex.java @@ -26,10 +26,10 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class InputVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java index 0931bdb98..64fe5e775 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2NormalizeVertex.java @@ -26,6 +26,9 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; @@ -33,9 +36,6 @@ import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastDivOp; import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class L2NormalizeVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java index d839b9872..30fa34e50 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/L2Vertex.java @@ -26,6 +26,9 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; @@ -33,9 +36,6 @@ import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; import org.nd4j.linalg.api.ops.impl.reduce3.EuclideanDistance; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class L2Vertex extends BaseGraphVertex { private final double eps; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java index d053e27b1..7659a5a94 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/LayerVertex.java @@ -20,11 +20,13 @@ package org.deeplearning4j.nn.graph.vertex.impl; +import java.util.Arrays; +import java.util.Map; import lombok.Data; import lombok.EqualsAndHashCode; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.api.layers.RecurrentLayer; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -35,13 +37,10 @@ import org.deeplearning4j.nn.graph.vertex.VertexIndices; import org.deeplearning4j.nn.layers.BaseOutputLayer; import org.deeplearning4j.nn.layers.FrozenLayer; import org.deeplearning4j.nn.layers.FrozenLayerWithBackprop; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Arrays; -import java.util.Map; @Data @EqualsAndHashCode(callSuper = true) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java index 1187bbabb..f8d2f20df 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/MergeVertex.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.graph.vertex.impl; +import java.util.Arrays; import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; @@ -27,6 +28,9 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; @@ -34,11 +38,6 @@ import org.nd4j.linalg.api.ops.impl.transforms.pairwise.bool.Or; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Arrays; public class MergeVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java index 9885b6983..4b61c893e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PoolHelperVertex.java @@ -26,14 +26,14 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.transforms.pairwise.bool.Or; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class PoolHelperVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java index 4586dd3d8..f190befb1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/PreprocessorVertex.java @@ -28,10 +28,10 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class PreprocessorVertex extends BaseGraphVertex { @Getter diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java index 5ccc81132..ea8cd15c3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ReshapeVertex.java @@ -28,9 +28,9 @@ import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; public class ReshapeVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java index 16863434a..c42ab8a8c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ScaleVertex.java @@ -26,12 +26,12 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class ScaleVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java index d289c4e75..2320c4968 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/ShiftVertex.java @@ -26,12 +26,12 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class ShiftVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java index 6889a0f39..dfa27c89d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/StackVertex.java @@ -27,15 +27,15 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class StackVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java index d3271849c..4e67d3bad 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/SubsetVertex.java @@ -20,21 +20,20 @@ package org.deeplearning4j.nn.graph.vertex.impl; +import java.util.Arrays; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Arrays; public class SubsetVertex extends BaseGraphVertex { private final int from; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java index c31cd1ae1..501d29d68 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/UnstackVertex.java @@ -20,21 +20,20 @@ package org.deeplearning4j.nn.graph.vertex.impl; +import java.util.Arrays; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Arrays; public class UnstackVertex extends BaseGraphVertex { private final long from; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java index 27eb238d3..07a138eff 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/DuplicateToTimeSeriesVertex.java @@ -27,13 +27,13 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class DuplicateToTimeSeriesVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java index 4402dc4c5..f236fd4ab 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/LastTimeStepVertex.java @@ -27,14 +27,14 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; import org.deeplearning4j.nn.graph.vertex.VertexIndices; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class LastTimeStepVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/ReverseTimeSeriesVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/ReverseTimeSeriesVertex.java index 86b5dcab3..22681599e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/ReverseTimeSeriesVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/graph/vertex/impl/rnn/ReverseTimeSeriesVertex.java @@ -26,13 +26,13 @@ import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.BaseGraphVertex; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class ReverseTimeSeriesVertex extends BaseGraphVertex { private final String inputName; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java index 3096b27c3..006b87250 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java @@ -20,13 +20,13 @@ package org.deeplearning4j.nn.layers; +import com.fasterxml.jackson.annotation.JsonIgnore; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; - -import com.fasterxml.jackson.annotation.JsonIgnore; import lombok.*; +import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.Layer; @@ -51,8 +51,9 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; /** A layer with input and output, no parameters or gradients */ @NoArgsConstructor(force = true) -//@JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id") -//@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class") +@Slf4j +// @JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id") +// @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class") public abstract class AbstractLayer implements Layer { private final @Getter List variables = new ArrayList<>(); @@ -79,10 +80,8 @@ public abstract class AbstractLayer impl protected DataType dataType; protected @Getter @Setter int iterationCount; protected @Getter @Setter int epochCount; - @JsonIgnore - private @Getter @Setter IModel net; - @JsonIgnore - @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration; + @JsonIgnore private @Getter @Setter IModel net; + @JsonIgnore @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration; public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) { //noinspection unchecked @@ -94,19 +93,18 @@ public abstract class AbstractLayer impl } this.dataType = dataType; if (layerConfiguration.getNetConfiguration() == null) { - throw new RuntimeException("You cannot create a layer from a layer configuration, that is not part of any neural network configuration."); - } + throw new RuntimeException( + "You cannot create a layer from a layer configuration, that is not part of any neural network configuration."); + } this.net = layerConfiguration.getNetConfiguration().getNet(); } public void addTrainingListeners(TrainingListener... listeners) { - if(listeners != null) - trainingListeners.addAll(List.of(listeners)); + if (listeners != null) trainingListeners.addAll(List.of(listeners)); } public void addTrainingListeners(Collection listeners) { - if(listeners != null) - trainingListeners.addAll(listeners); + if (listeners != null) trainingListeners.addAll(listeners); } @Override @@ -470,7 +468,7 @@ public abstract class AbstractLayer impl @Override public int getInputMiniBatchSize() { - if(input==null) return 0; + if (input == null) return 0; return (int) input.size(0); } @@ -564,8 +562,9 @@ public abstract class AbstractLayer impl */ @Override public void setParamTable(Map paramTable) { - - throw new RuntimeException("Not implemented"); + log.warn( + "Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName()); + // throw new RuntimeException("Not implemented"); } /** @@ -577,7 +576,7 @@ public abstract class AbstractLayer impl */ @Override public Map getParamTable(boolean isBackprop) { -// throw new RuntimeException("Not implemented"); + // throw new RuntimeException("Not implemented"); return null; } @@ -589,7 +588,7 @@ public abstract class AbstractLayer impl */ @Override public INDArray getParams() { - //throw new RuntimeException("Not implemented"); + // throw new RuntimeException("Not implemented"); return null; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java index 48df25694..ab3bad71d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ActivationLayer.java @@ -25,12 +25,11 @@ import java.util.Map; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - public class ActivationLayer extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java index 8c092b4b1..926e8e887 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java @@ -662,6 +662,7 @@ public abstract class BaseLayer */ public boolean hasBias() { // Overridden by layers supporting no bias mode: dense, output, convolutional, embedding + //return true; return true; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java index 5fc9bfde7..01d23178a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseOutputLayer.java @@ -20,6 +20,10 @@ package org.deeplearning4j.nn.layers; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -30,6 +34,7 @@ import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.Solver; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.evaluation.classification.Evaluation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -37,13 +42,6 @@ import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.common.primitives.Pair; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - public abstract class BaseOutputLayer extends BaseLayer implements Serializable, IOutputLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java index e718b4df8..55cc3b21d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BasePretrainNetwork.java @@ -21,24 +21,21 @@ package org.deeplearning4j.nn.layers; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; import lombok.val; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.PretrainParamInitializer; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.common.primitives.Pair; - -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - public abstract class BasePretrainNetwork extends BaseLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java index 6f079e1c7..05ba9eb23 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/DropoutLayer.java @@ -23,11 +23,11 @@ package org.deeplearning4j.nn.layers; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; public class DropoutLayer extends BaseLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java index fe280cc73..f0e21309e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java @@ -24,14 +24,15 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.misc.DummyConfig; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; import org.nd4j.common.util.OneTimeLogger; +import org.nd4j.linalg.api.ndarray.INDArray; @Slf4j public class FrozenLayer extends BaseWrapperLayer { @@ -88,6 +89,8 @@ public class FrozenLayer extends BaseWrapperLayer { return underlying.activate(input, false, workspaceMgr); } + + @Override public void fit() { if (!logFit) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java index 15a863430..43bd1fc2d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayerWithBackprop.java @@ -26,9 +26,9 @@ import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; import org.nd4j.common.util.OneTimeLogger; +import org.nd4j.linalg.api.ndarray.INDArray; @Slf4j public class FrozenLayerWithBackprop extends BaseWrapperLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/HelperUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/HelperUtils.java index dfff491e4..eb39d30a2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/HelperUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/HelperUtils.java @@ -19,13 +19,13 @@ */ package org.deeplearning4j.nn.layers; +import static org.deeplearning4j.common.config.DL4JSystemProperties.DISABLE_HELPER_PROPERTY; +import static org.deeplearning4j.common.config.DL4JSystemProperties.HELPER_DISABLE_DEFAULT_VALUE; + import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.common.config.DL4JClassLoading; import org.nd4j.linalg.factory.Nd4j; -import static org.deeplearning4j.common.config.DL4JSystemProperties.DISABLE_HELPER_PROPERTY; -import static org.deeplearning4j.common.config.DL4JSystemProperties.HELPER_DISABLE_DEFAULT_VALUE; - /** * Simple meta helper util class for instantiating * platform specific layer helpers that handle interaction with diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java index 7a8c6a5fd..d72fc401f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/LossLayer.java @@ -21,27 +21,25 @@ package org.deeplearning4j.nn.layers; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; import org.deeplearning4j.eval.Evaluation; import org.deeplearning4j.nn.api.layers.IOutputLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.Solver; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.util.FeatureUtil; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; - public class LossLayer extends BaseLayer implements Serializable, IOutputLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/OutputLayer.java index 1e1ffde9a..2ca3bf5f9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/OutputLayer.java @@ -20,14 +20,11 @@ package org.deeplearning4j.nn.layers; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Nd4j; - public class OutputLayer extends BaseOutputLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java index 7a8f48b92..21f481fac 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/RepeatVector.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.layers; +import java.util.Arrays; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.RNNFormat; @@ -29,12 +30,10 @@ import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; - -import java.util.Arrays; public class RepeatVector extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cnn3DLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cnn3DLossLayer.java index cb369b0ed..5fe8ad333 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cnn3DLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cnn3DLossLayer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.layers.convolution; +import java.util.Arrays; +import java.util.List; import lombok.Getter; import lombok.Setter; import lombok.val; @@ -31,18 +33,15 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseLayer; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - -import java.util.Arrays; -import java.util.List; public class Cnn3DLossLayer extends BaseLayer implements IOutputLayer { @Setter @@ -75,9 +74,9 @@ public class Cnn3DLossLayer extends BaseLayer implements IOutputLayer { @Setter @@ -67,16 +66,16 @@ public class CnnLossLayer extends BaseLayer backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + if (epsilon.rank() != 3) + throw new DL4JInvalidInputException( + "Got rank " + + epsilon.rank() + + " array as epsilon for Convolution1D backprop with shape " + + Arrays.toString(epsilon.shape()) + + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + + layerId()); + Pair fwd = preOutput(false, true, workspaceMgr); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + INDArray delta = + afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params + + org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration(); + Conv1DConfig conf = + Conv1DConfig.builder() + .k(c.getKernelSize()[0]) + .s(c.getStride()[0]) + .d(c.getDilation()[0]) + .p(c.getPadding()[0]) + .dataFormat(Conv1DConfig.NCW) + .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) + .build(); + + INDArray w = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW); + + INDArray[] inputArrs; + INDArray[] outputArrs; + INDArray wg = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat()); + INDArray epsOut = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); + INDArray input = this.input.castTo(dataType); + if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { + input = input.permute(0, 2, 1); // NHWC to NCHW } - - @Override - public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(true); - if (epsilon.rank() != 3) - throw new DL4JInvalidInputException("Got rank " + epsilon.rank() - + " array as epsilon for Convolution1DLayer backprop with shape " - + Arrays.toString(epsilon.shape()) - + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + layerId()); - Pair fwd = preOutput(false,true,workspaceMgr); - IActivation afn = getTypedLayerConfiguration().getActivationFn(); - INDArray delta = afn.backprop(fwd.getFirst(), epsilon).getFirst(); //TODO handle activation function params - - org.deeplearning4j.nn.conf.layers.Convolution1DLayer c = getTypedLayerConfiguration(); - Conv1DConfig conf = Conv1DConfig.builder() - .k(c.getKernelSize()[0]) - .s(c.getStride()[0]) - .d(c.getDilation()[0]) - .p(c.getPadding()[0]) - .dataFormat(Conv1DConfig.NCW) - .paddingMode(ConvolutionUtils.paddingModeForConvolutionMode(convolutionMode)) - .build(); - - INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( - getParam(ConvolutionParamInitializer.WEIGHT_KEY), - RNNFormat.NCW); - - INDArray[] inputArrs; - INDArray[] outputArrs; - INDArray wg = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( - gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), - getRnnDataFormat()); - INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); - INDArray input = this.input.castTo(dataType); - if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { - input = input.permute(0,2,1); //NHWC to NCHW - } - - if(getTypedLayerConfiguration().hasBias()) { - INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); - b = b.reshape(b.length()); - inputArrs = new INDArray[]{input, w, b, delta}; - INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); - bg = bg.reshape(bg.length()); - outputArrs = new INDArray[]{epsOut, wg, bg}; - } else { - inputArrs = new INDArray[]{input, w, delta}; - outputArrs = new INDArray[]{epsOut, wg}; - } - - Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf); - Nd4j.exec(op); - - Gradient retGradient = new DefaultGradient(); - if(getTypedLayerConfiguration().hasBias()) { - retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, gradientViews.get(ConvolutionParamInitializer.BIAS_KEY)); - } - retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), 'c'); - if (getRnnDataFormat() == RNNFormat.NWC) { - epsOut = epsOut.permute(0, 2, 1); - } - return new Pair<>(retGradient, epsOut); + if (getTypedLayerConfiguration().hasBias()) { + INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); + b = b.reshape(b.length()); + inputArrs = new INDArray[] {input, w, b, delta}; + INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); + bg = bg.reshape(bg.length()); + outputArrs = new INDArray[] {epsOut, wg, bg}; + } else { + inputArrs = new INDArray[] {input, w, delta}; + outputArrs = new INDArray[] {epsOut, wg}; } - @Override - protected Pair preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - Pair preOutput = super.preOutput(true, forBackprop, workspaceMgr); - INDArray p3d = preOutput.getFirst(); - INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1); - preOutput.setFirst(p); - return preOutput; + Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf); + Nd4j.exec(op); + + Gradient retGradient = new DefaultGradient(); + if (getTypedLayerConfiguration().hasBias()) { + retGradient.setGradientFor( + ConvolutionParamInitializer.BIAS_KEY, + gradientViews.get(ConvolutionParamInitializer.BIAS_KEY)); + } + retGradient.setGradientFor( + ConvolutionParamInitializer.WEIGHT_KEY, + gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), + 'c'); + if (getRnnDataFormat() == RNNFormat.NWC) { + epsOut = epsOut.permute(0, 2, 1); + } + return new Pair<>(retGradient, epsOut); + } + + @Override + protected Pair preOutput4d( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + Pair preOutput = super.preOutput(true, forBackprop, workspaceMgr); + INDArray p3d = preOutput.getFirst(); + INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1); + preOutput.setFirst(p); + return preOutput; + } + + @Override + protected Pair preOutput( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + + INDArray input = this.input.castTo(dataType); + if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { + input = input.permute(0, 2, 1); // NHWC to NCHW } - @Override - protected Pair preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(false); + org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration(); + Conv1DConfig conf = + Conv1DConfig.builder() + .k(c.getKernelSize()[0]) + .s(c.getStride()[0]) + .d(c.getDilation()[0]) + .p(c.getPadding()[0]) + .dataFormat(Conv1DConfig.NCW) + .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) + .build(); - INDArray input = this.input.castTo(dataType); - if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { - input = input.permute(0,2,1); //NHWC to NCHW - } + INDArray w = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW); - org.deeplearning4j.nn.conf.layers.Convolution1DLayer c = getTypedLayerConfiguration(); - Conv1DConfig conf = Conv1DConfig.builder() - .k(c.getKernelSize()[0]) - .s(c.getStride()[0]) - .d(c.getDilation()[0]) - .p(c.getPadding()[0]) - .dataFormat(Conv1DConfig.NCW) - .paddingMode(ConvolutionUtils.paddingModeForConvolutionMode(convolutionMode)) - .build(); - - - INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( - getParam(ConvolutionParamInitializer.WEIGHT_KEY) - ,RNNFormat.NCW); - - - INDArray[] inputs; - if(getTypedLayerConfiguration().hasBias()) { - INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); - b = b.reshape(b.length()); - inputs = new INDArray[]{input, w, b}; - } else { - inputs = new INDArray[]{input, w}; - } - - Conv1D op = new Conv1D(inputs, null, conf); - List outShape = op.calculateOutputShape(); - op.setOutputArgument(0, Nd4j.create(outShape.get(0), false)); - Nd4j.exec(op); - INDArray output = op.getOutputArgument(0); - - if(getRnnDataFormat() == RNNFormat.NWC) { - output = output.permute(0,2,1); - } - - return new Pair<>(output, null); + INDArray[] inputs; + if (getTypedLayerConfiguration().hasBias()) { + INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); + b = b.reshape(b.length()); + inputs = new INDArray[] {input, w, b}; + } else { + inputs = new INDArray[] {input, w}; } + Conv1D op = new Conv1D(inputs, null, conf); + List outShape = op.calculateOutputShape(); + op.setOutputArgument(0, Nd4j.create(outShape.get(0), false)); + Nd4j.exec(op); + INDArray output = op.getOutputArgument(0); - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { - INDArray act4d = super.activate(training, workspaceMgr); - INDArray act3d = act4d.rank() > 3 ? - act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d; - - if(maskArray != null) { - INDArray maskOut = feedForwardMaskArray(maskArray, MaskState.Active, (int)act3d.size(0)).getFirst(); - Preconditions.checkState(act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1), - "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s", - act3d.shape(), maskOut.shape()); - Broadcast.mul(act3d, maskOut, act3d, 0, 2); - } - - return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, act3d); //Should be zero copy most of the time + if (getRnnDataFormat() == RNNFormat.NWC) { + output = output.permute(0, 2, 1); } - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, - int minibatchSize) { - INDArray reduced = ConvolutionUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0], - getTypedLayerConfiguration().getStride()[0], getTypedLayerConfiguration().getPadding()[0], getTypedLayerConfiguration().getDilation()[0], - getTypedLayerConfiguration().getConvolutionMode()); - return new Pair<>(reduced, currentMaskState); + return new Pair<>(output, null); + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + INDArray act4d = super.activate(training, workspaceMgr); + INDArray act3d = + act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d; + + if (maskArray != null) { + INDArray maskOut = + feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst(); + Preconditions.checkState( + act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1), + "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s", + act3d.shape(), + maskOut.shape()); + Broadcast.mul(act3d, maskOut, act3d, 0, 2); } - @Override - public org.deeplearning4j.nn.conf.layers.Convolution1DLayer getTypedLayerConfiguration() { - return (org.deeplearning4j.nn.conf.layers.Convolution1DLayer)layerConfiguration; - } + return workspaceMgr.leverageTo( + ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time + } - private RNNFormat getRnnDataFormat(){ - return getTypedLayerConfiguration().getRnnDataFormat(); - } + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + INDArray reduced = + Convolution2DUtils.cnn1dMaskReduction( + maskArray, + getTypedLayerConfiguration().getKernelSize()[0], + getTypedLayerConfiguration().getStride()[0], + getTypedLayerConfiguration().getPadding()[0], + getTypedLayerConfiguration().getDilation()[0], + getTypedLayerConfiguration().getConvolutionMode()); + return new Pair<>(reduced, currentMaskState); + } + + private RNNFormat getRnnDataFormat() { + return getTypedLayerConfiguration().getRnnDataFormat(); + } + +/** +* + * @return +*/ + @Override + public Convolution1D getTypedLayerConfiguration() { + return (Convolution1D) super.getTypedLayerConfiguration(); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java new file mode 100644 index 000000000..91e4fbe3c --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java @@ -0,0 +1,226 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.nn.layers.convolution; + +import java.util.Arrays; +import java.util.List; +import org.deeplearning4j.exception.DL4JInvalidInputException; +import org.deeplearning4j.nn.api.MaskState; +import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.layers.Convolution1DNew; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.gradient.DefaultGradient; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.params.ConvolutionParamInitializer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.Convolution1DUtils; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1D; +import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1DDerivative; +import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv1DConfig; +import org.nd4j.linalg.api.shape.LongShapeDescriptor; +import org.nd4j.linalg.factory.Broadcast; +import org.nd4j.linalg.factory.Nd4j; + +public class Convolution1DNewLayer + extends ConvolutionNewLayer { + public Convolution1DNewLayer(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + } + + @Override + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + if (epsilon.rank() != 3) + throw new DL4JInvalidInputException( + "Got rank " + + epsilon.rank() + + " array as epsilon for Convolution1D backprop with shape " + + Arrays.toString(epsilon.shape()) + + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + + layerId()); + Pair fwd = preOutput(false, true, workspaceMgr); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + INDArray delta = + afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params + + Convolution1DNew c = getTypedLayerConfiguration(); + Conv1DConfig conf = + Conv1DConfig.builder() + .k(c.getKernelSize()[0]) + .s(c.getStride()[0]) + .d(c.getDilation()[0]) + .p(c.getPadding()[0]) + .dataFormat(Conv1DConfig.NCW) + .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) + .build(); + + INDArray w = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW); + + INDArray[] inputArrs; + INDArray[] outputArrs; + INDArray wg = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat()); + INDArray epsOut = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); + INDArray input = this.input.castTo(dataType); + if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { + input = input.permute(0, 2, 1); // NHWC to NCHW + } + + if (getTypedLayerConfiguration().hasBias()) { + INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); + b = b.reshape(b.length()); + inputArrs = new INDArray[] {input, w, b, delta}; + INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); + bg = bg.reshape(bg.length()); + outputArrs = new INDArray[] {epsOut, wg, bg}; + } else { + inputArrs = new INDArray[] {input, w, delta}; + outputArrs = new INDArray[] {epsOut, wg}; + } + + Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf); + Nd4j.exec(op); + + Gradient retGradient = new DefaultGradient(); + if (getTypedLayerConfiguration().hasBias()) { + retGradient.setGradientFor( + ConvolutionParamInitializer.BIAS_KEY, + gradientViews.get(ConvolutionParamInitializer.BIAS_KEY)); + } + retGradient.setGradientFor( + ConvolutionParamInitializer.WEIGHT_KEY, + gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), + 'c'); + if (getRnnDataFormat() == RNNFormat.NWC) { + epsOut = epsOut.permute(0, 2, 1); + } + return new Pair<>(retGradient, epsOut); + } + + @Override + protected Pair preOutput4d( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + Pair preOutput = super.preOutput(true, forBackprop, workspaceMgr); + INDArray p3d = preOutput.getFirst(); + INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1); + preOutput.setFirst(p); + return preOutput; + } + + @Override + protected Pair preOutput( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + + INDArray input = this.input.castTo(dataType); + if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { + input = input.permute(0, 2, 1); // NHWC to NCHW + } + + Convolution1DNew c = getTypedLayerConfiguration(); + Conv1DConfig conf = + Conv1DConfig.builder() + .k(c.getKernelSize()[0]) + .s(c.getStride()[0]) + .d(c.getDilation()[0]) + .p(c.getPadding()[0]) + .dataFormat(Conv1DConfig.NCW) + .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) + .build(); + + INDArray w = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW); + + INDArray[] inputs; + if (getTypedLayerConfiguration().hasBias()) { + INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); + b = b.reshape(b.length()); + inputs = new INDArray[] {input, w, b}; + } else { + inputs = new INDArray[] {input, w}; + } + + Conv1D op = new Conv1D(inputs, null, conf); + List outShape = op.calculateOutputShape(); + op.setOutputArgument(0, Nd4j.create(outShape.get(0), false)); + Nd4j.exec(op); + INDArray output = op.getOutputArgument(0); + + if (getRnnDataFormat() == RNNFormat.NWC) { + output = output.permute(0, 2, 1); + } + + return new Pair<>(output, null); + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + INDArray act4d = super.activate(training, workspaceMgr); + INDArray act3d = + act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d; + + if (maskArray != null) { + INDArray maskOut = + feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst(); + Preconditions.checkState( + act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1), + "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s", + act3d.shape(), + maskOut.shape()); + Broadcast.mul(act3d, maskOut, act3d, 0, 2); + } + + return workspaceMgr.leverageTo( + ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time + } + + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + INDArray reduced = + Convolution2DUtils.cnn1dMaskReduction( + maskArray, + getTypedLayerConfiguration().getKernelSize()[0], + getTypedLayerConfiguration().getStride()[0], + getTypedLayerConfiguration().getPadding()[0], + getTypedLayerConfiguration().getDilation()[0], + getTypedLayerConfiguration().getConvolutionMode()); + return new Pair<>(reduced, currentMaskState); + } + + private RNNFormat getRnnDataFormat() { + return getTypedLayerConfiguration().getRnnDataFormat(); + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution3DLayer.java index 930f33ddb..536384c76 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution3DLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution; +import java.util.Arrays; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.layers.Convolution3D; @@ -30,15 +31,13 @@ import org.deeplearning4j.nn.params.Convolution3DParamInitializer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.Convolution3DUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Arrays; public class Convolution3DLayer extends ConvolutionLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java index c517e92b8..601be3000 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionHelper.java @@ -28,10 +28,10 @@ import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.BwdFilterAlgo; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.FwdAlgo; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.LayerHelper; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public interface ConvolutionHelper extends LayerHelper { boolean checkSupported(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java index be4aea4fe..bcaa68930 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java @@ -20,7 +20,7 @@ package org.deeplearning4j.nn.layers.convolution; - +import java.util.Arrays; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.exception.DL4JInvalidInputException; @@ -28,6 +28,7 @@ import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.layers.Convolution1DNew; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -35,7 +36,10 @@ import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.layers.LayerHelper; import org.deeplearning4j.nn.layers.mkldnn.MKLDNNConvHelper; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; @@ -45,559 +49,700 @@ import org.nd4j.linalg.convolution.Convolution; import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.linalg.exception.ND4JOpProfilerException; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - -import java.util.Arrays; - @Slf4j -public class ConvolutionLayer extends BaseLayer { +public class ConvolutionLayer + extends BaseLayer { - protected INDArray i2d; - protected ConvolutionHelper helper = null; - protected int helperCountFail = 0; - protected ConvolutionMode convolutionMode; - protected transient INDArray dummyBias; //Used only when: hasBias == false AND helpers are used - protected transient INDArray dummyBiasGrad; //As above + protected INDArray i2d; + protected ConvolutionHelper helper = null; + protected int helperCountFail = 0; + protected ConvolutionMode convolutionMode; + protected transient INDArray dummyBias; // Used only when: hasBias == false AND helpers are used + protected transient INDArray dummyBiasGrad; // As above - public ConvolutionLayer(LayerConfiguration conf, DataType dataType) { - super(conf, dataType); - initializeHelper(); - convolutionMode = ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode(); + public ConvolutionLayer(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + initializeHelper(); + if (conf instanceof Convolution1DNew) { + convolutionMode = + ((Convolution1DNew) conf).getConvolutionMode(); + } else + if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) { + convolutionMode = + ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode(); } - void initializeHelper() { - String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); - if("CUDA".equalsIgnoreCase(backend)) { - helper = DL4JClassLoading.createNewInstance( - "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper", - ConvolutionHelper.class, - dataType); - log.debug("CudnnConvolutionHelper successfully initialized"); - if (!helper.checkSupported()) { - helper = null; - } - } else if("CPU".equalsIgnoreCase(backend)){ - helper = new MKLDNNConvHelper(dataType); - log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName()); - } + } - if (helper != null && !helper.checkSupported()) { - log.debug("Removed helper {} as not supported", helper.getClass()); - helper = null; - } + void initializeHelper() { + String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); + if ("CUDA".equalsIgnoreCase(backend)) { + helper = + DL4JClassLoading.createNewInstance( + "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper", + ConvolutionHelper.class, + dataType); + log.debug("CudnnConvolutionHelper successfully initialized"); + if (!helper.checkSupported()) { + helper = null; + } + } else if ("CPU".equalsIgnoreCase(backend)) { + helper = new MKLDNNConvHelper(dataType); + log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName()); } - @Override - public Type type() { - return Type.CONVOLUTIONAL; + if (helper != null && !helper.checkSupported()) { + log.debug("Removed helper {} as not supported", helper.getClass()); + helper = null; + } + } + + @Override + public Type type() { + return Type.CONVOLUTIONAL; + } + + @Override + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + INDArray weights = + getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr); + INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); + + INDArray input = this.input.castTo(dataType); // No op if correct type + if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType); + + INDArray origInput = input; + INDArray origEps = epsilon; + if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) { + input = input.permute(0, 3, 1, 2); // NHWC to NCHW + epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW } - @Override - public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(true); - INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr); - INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); + long miniBatch = input.size(0); + int inH = (int) input.size(2); + int inW = (int) input.size(3); - INDArray input = this.input.castTo(dataType); //No op if correct type - if(epsilon.dataType() != dataType) - epsilon = epsilon.castTo(dataType); + long outDepth = weights.size(0); + long inDepth = weights.size(1); + int kH = (int) weights.size(2); + int kW = (int) weights.size(3); - INDArray origInput = input; - INDArray origEps = epsilon; - if(getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) { - input = input.permute(0,3,1,2); //NHWC to NCHW - epsilon = epsilon.permute(0,3,1,2); //NHWC to NCHW + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); + int[] pad; + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + null, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Also performs validation + pad = + Convolution2DUtils.getSameModeTopLeftPadding( + outSize, new int[] {inH, inW}, kernel, strides, dilation); + } else { + pad = getTypedLayerConfiguration().getPadding(); + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + pad, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Also performs validation + } + + int outH = outSize[0]; + int outW = outSize[1]; + + INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); + INDArray weightGradView = + gradientViews.get( + ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW] + INDArray weightGradView2df = + Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false) + .transpose(); + + INDArray delta; + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + + Pair p = preOutput4d(true, true, workspaceMgr); + INDArray z = p.getFirst(); + CNN2DFormat f = getTypedLayerConfiguration().getConvFormat(); + if (f != CNN2DFormat.NCHW) { + z = z.permute(0, 3, 1, 2); // NHWC to NCHW + } + delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params + + if (helper != null + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + INDArray helperDelta = delta; + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) + helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC + + if (!hasBias() && !(helper instanceof MKLDNNConvHelper)) { + // MKL-DNN supports no bias, CuDNN doesn't + if (dummyBiasGrad == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); + } + } + biasGradView = dummyBiasGrad; + } + + Pair ret = null; + try { + ret = + helper.backpropGradient( + origInput, + weights, + bias, + helperDelta, + kernel, + strides, + pad, + biasGradView, + weightGradView, + afn, + getTypedLayerConfiguration().getCudnnAlgoMode(), + getTypedLayerConfiguration().getCudnnBwdFilterAlgo(), + getTypedLayerConfiguration().getCudnnBwdDataAlgo(), + convolutionMode, + dilation, + getTypedLayerConfiguration().getConvFormat(), + workspaceMgr); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; } - - long miniBatch = input.size(0); - int inH = (int) input.size(2); - int inW = (int) input.size(3); - - long outDepth = weights.size(0); - long inDepth = weights.size(1); - int kH = (int) weights.size(2); - int kW = (int) weights.size(3); - - int[] dilation = getTypedLayerConfiguration().getDilation(); - int[] kernel = getTypedLayerConfiguration().getKernelSize(); - int[] strides = getTypedLayerConfiguration().getStride(); - int[] pad; - int[] outSize; - if (convolutionMode == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, CNN2DFormat.NCHW); //Also performs validation - pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } } else { - pad = getTypedLayerConfiguration().getPadding(); - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, CNN2DFormat.NCHW); //Also performs validation + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false", + e); } + } - int outH = outSize[0]; - int outW = outSize[1]; - - - INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); - INDArray weightGradView = gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY); //4d, c order. Shape: [outDepth,inDepth,kH,kW] - INDArray weightGradView2df = Shape - .newShapeNoCopy(weightGradView, new long[]{outDepth, inDepth * kH * kW}, false).transpose(); - - - - INDArray delta; - IActivation afn = getTypedLayerConfiguration().getActivationFn(); - - Pair p = preOutput4d(true, true, workspaceMgr); - INDArray z = p.getFirst(); - CNN2DFormat f = getTypedLayerConfiguration().getConvFormat(); - if(f != CNN2DFormat.NCHW){ - z = z.permute(0,3,1,2); //NHWC to NCHW - } - delta = afn.backprop(z, epsilon).getFirst(); //TODO handle activation function params - - if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { - INDArray helperDelta = delta; - if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) - helperDelta = delta.permute(0,2,3,1); //NCHW to NHWC - - if(!hasBias() && !(helper instanceof MKLDNNConvHelper)){ - //MKL-DNN supports no bias, CuDNN doesn't - if(dummyBiasGrad == null){ - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); - } - } - biasGradView = dummyBiasGrad; - } - - Pair ret = null; - try { - ret = helper.backpropGradient(origInput, weights, bias, helperDelta, kernel, strides, - pad, biasGradView, weightGradView, afn, - getTypedLayerConfiguration().getCudnnAlgoMode(), getTypedLayerConfiguration().getCudnnBwdFilterAlgo(), getTypedLayerConfiguration().getCudnnBwdDataAlgo(), - convolutionMode, dilation, getTypedLayerConfiguration().getConvFormat(), workspaceMgr); - } catch (ND4JOpProfilerException e){ - throw e; //NaN panic etc for debugging - } catch (Exception e){ - if(e.getMessage().contains("Failed to allocate")){ - //This is a memory exception - don't fallback to built-in implementation - throw e; - } - - if(getTypedLayerConfiguration().isCudnnAllowFallback()){ - helperCountFail++; - if(helper instanceof MKLDNNConvHelper){ - log.warn("MKL-DNN execution failed - falling back on built-in implementation",e); - } else { - log.warn("CuDNN execution failed - falling back on built-in implementation",e); - } - } else { - throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false", e); - } - } - - if (ret != null) { - //Backprop dropout, if present - INDArray gradPostDropout = ret.getRight(); - gradPostDropout = backpropDropOutIfPresent(gradPostDropout); - ret.setSecond(gradPostDropout); - return ret; - } - } - - delta = delta.permute(1, 0, 2, 3); //To shape: [outDepth,miniBatch,outH,outW] - - //Note: due to the permute in preOut, and the fact that we essentially do a preOut.muli(epsilon), this reshape - // should be zero-copy; only possible exception being sometimes with the "identity" activation case - INDArray delta2d = delta.reshape('c', outDepth, miniBatch * outH * outW); //Shape.newShapeNoCopy(delta,new int[]{outDepth,miniBatch*outH*outW},false); - - //Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation - //To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that - //to get old order from required order: permute(0,3,4,5,1,2) - INDArray im2col2d = p.getSecond(); //Re-use im2col2d array from forward pass if available; recalculate if not - if (im2col2d == null) { - INDArray col = Nd4j.createUninitialized(dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); - INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); - Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], dilation[0], dilation[1], - convolutionMode == ConvolutionMode.Same, col2); - //Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape - im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW); - } - - //Calculate weight gradients, using cc->c mmul. - //weightGradView2df is f order, but this is because it's transposed from c order - //Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c order, not usual f order - Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0); - - //Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally in c order for some reason) - INDArray wPermuted = weights.permute(3, 2, 1, 0); //Start with c order weights, switch order to f order - INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth); - - //Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format before col2im reduction) - //Note: cc -> f mmul here, then reshape to 6d in f order - INDArray epsNext2d = w2d.mmul(delta2d); //TODO can we reuse im2col array instead of allocating new result array? - INDArray eps6d = Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true); - - //Calculate epsilonNext by doing im2col reduction. - //Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW] - //currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first - eps6d = eps6d.permute(5, 2, 1, 0, 4, 3); - INDArray epsNextOrig = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, eps6d.dataType(), new long[] {inDepth, miniBatch, inH, inW}, 'c'); - - //Note: we are execute col2im in a way that the output array should be used in a stride 1 muli in the layer below... (same strides as zs/activations) - INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3); - Convolution.col2im(eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]); - - Gradient retGradient = new DefaultGradient(); - if(getTypedLayerConfiguration().hasBias()){ - delta2d.sum(biasGradView, 1); //biasGradView is initialized/zeroed first in sum op - retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView); - } - retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c'); - - weightNoiseParams.clear(); - - epsNext = backpropDropOutIfPresent(epsNext); - - if(getTypedLayerConfiguration().getConvFormat()!= CNN2DFormat.NCHW){ - epsNext = epsNext.permute(0,2,3,1); //NCHW to NHWC - } - - return new Pair<>(retGradient, epsNext); + if (ret != null) { + // Backprop dropout, if present + INDArray gradPostDropout = ret.getRight(); + gradPostDropout = backpropDropOutIfPresent(gradPostDropout); + ret.setSecond(gradPostDropout); + return ret; + } } - /** - * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1DLayer) can maintain their standard - * non-4d preOutput method, while overriding this to return 4d activations (for use in backprop) without modifying - * the public API - */ - protected Pair preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - return preOutput(training, forBackprop, workspaceMgr); + delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW] + + // Note: due to the permute in preOut, and the fact that we essentially do a + // preOut.muli(epsilon), this reshape + // should be zero-copy; only possible exception being sometimes with the "identity" activation + // case + INDArray delta2d = + delta.reshape( + 'c', + outDepth, + miniBatch * outH + * outW); // Shape.newShapeNoCopy(delta,new + // int[]{outDepth,miniBatch*outH*outW},false); + + // Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input + // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation + // To get this: create an array of the order we want, permute it to the order required by im2col + // implementation, and then do im2col on that + // to get old order from required order: permute(0,3,4,5,1,2) + INDArray im2col2d = + p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not + if (im2col2d == null) { + INDArray col = + Nd4j.createUninitialized( + dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); + Convolution.im2col( + input, + kH, + kW, + strides[0], + strides[1], + pad[0], + pad[1], + dilation[0], + dilation[1], + convolutionMode == ConvolutionMode.Same, + col2); + // Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape + im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW); } - protected void validateInputRank() { - //Input validation: expect rank 4 matrix - if (input.rank() != 4) { - String layerName = layerConfiguration.getName(); - if (layerName == null) - layerName = "(not named)"; - throw new DL4JInvalidInputException("Got rank " + input.rank() - + " array as input to ConvolutionLayer (layer name = " + layerName + ", layer index = " - + index + ") with shape " + Arrays.toString(input.shape()) + ". " - + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]." - + (input.rank() == 2 - ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" - : "") - + " " + layerId()); - } + // Calculate weight gradients, using cc->c mmul. + // weightGradView2df is f order, but this is because it's transposed from c order + // Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c + // order, not usual f order + Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0); + + // Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally + // in c order for some reason) + INDArray wPermuted = + weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order + INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth); + + // Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format + // before col2im reduction) + // Note: cc -> f mmul here, then reshape to 6d in f order + INDArray epsNext2d = + w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array? + INDArray eps6d = + Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true); + + // Calculate epsilonNext by doing im2col reduction. + // Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW] + // currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first + eps6d = eps6d.permute(5, 2, 1, 0, 4, 3); + INDArray epsNextOrig = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, + eps6d.dataType(), + new long[] {inDepth, miniBatch, inH, inW}, + 'c'); + + // Note: we are execute col2im in a way that the output array should be used in a stride 1 muli + // in the layer below... (same strides as zs/activations) + INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3); + Convolution.col2im( + eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]); + + Gradient retGradient = new DefaultGradient(); + if (getTypedLayerConfiguration().hasBias()) { + delta2d.sum(biasGradView, 1); // biasGradView is initialized/zeroed first in sum op + retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView); + } + retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c'); + + weightNoiseParams.clear(); + + epsNext = backpropDropOutIfPresent(epsNext); + + if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) { + epsNext = epsNext.permute(0, 2, 3, 1); // NCHW to NHWC } - protected void validateInputDepth(long inDepth) { - CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); - int dim = format == CNN2DFormat.NHWC ? 3 : 1; - if (input.size(dim) != inDepth) { - String layerName = layerConfiguration.getName(); - if (layerName == null) - layerName = "(not named)"; + return new Pair<>(retGradient, epsNext); + } - String s = "Cannot do forward pass in Convolution layer (layer name = " + layerName - + ", layer index = " + index + "): input array channels does not match CNN layer configuration" - + " (data format = " + format + ", data input channels = " + input.size(dim) + ", " + getTypedLayerConfiguration().getConvFormat().dimensionNames() - + "=" + Arrays.toString(input.shape()) + "; expected" + " input channels = " + inDepth + ") " - + layerId(); + /** + * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain + * their standard non-4d preOutput method, while overriding this to return 4d activations (for use + * in backprop) without modifying the public API + */ + protected Pair preOutput4d( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + return preOutput(training, forBackprop, workspaceMgr); + } - int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3; - if(input.size(dimIfWrongFormat) == inDepth){ - //User might have passed NCHW data to a NHWC net, or vice versa? - s += "\n" + ConvolutionUtils.NCHW_NHWC_ERROR_MSG; - } + protected void validateInputRank() { + // Input validation: expect rank 4 matrix + if (input.rank() != 4) { + String layerName = layerConfiguration.getName(); + if (layerName == null) layerName = "(not named)"; + throw new DL4JInvalidInputException( + "Got rank " + + input.rank() + + " array as input to ConvolutionLayer (layer name = " + + layerName + + ", layer index = " + + index + + ") with shape " + + Arrays.toString(input.shape()) + + ". " + + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]." + + (input.rank() == 2 + ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" + : "") + + " " + + layerId()); + } + } + protected void validateInputDepth(long inDepth) { + CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); + int dim = format == CNN2DFormat.NHWC ? 3 : 1; + if (input.size(dim) != inDepth) { + String layerName = layerConfiguration.getName(); + if (layerName == null) layerName = "(not named)"; - throw new DL4JInvalidInputException(s); - } + String s = + "Cannot do forward pass in Convolution layer (layer name = " + + layerName + + ", layer index = " + + index + + "): input array channels does not match CNN layer configuration" + + " (data format = " + + format + + ", data input channels = " + + input.size(dim) + + ", " + + getTypedLayerConfiguration().getConvFormat().dimensionNames() + + "=" + + Arrays.toString(input.shape()) + + "; expected" + + " input channels = " + + inDepth + + ") " + + layerId(); + + int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3; + if (input.size(dimIfWrongFormat) == inDepth) { + // User might have passed NCHW data to a NHWC net, or vice versa? + s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG; + } + + throw new DL4JInvalidInputException(s); + } + } + + /** + * PreOutput method that also returns the im2col2d array (if being called for backprop), as this + * can be re-used instead of being calculated again. + * + * @param training Train or test time (impacts dropout) + * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return + * null for second pair entry. Note that it may still be null in the case of CuDNN and the + * like. + * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array + */ + protected Pair preOutput( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr); + INDArray weights = + getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr); + + validateInputRank(); + + INDArray input = this.input.castTo(dataType); + INDArray inputOrig = input; + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { + input = input.permute(0, 3, 1, 2).dup(); // NHWC to NCHW } - /** - * PreOutput method that also returns the im2col2d array (if being called for backprop), as this can be re-used - * instead of being calculated again. - * - * @param training Train or test time (impacts dropout) - * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return null for second - * pair entry. Note that it may still be null in the case of CuDNN and the like. - * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array - */ - protected Pair preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(false); - INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr); - INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr); + long miniBatch = input.size(0); + long outDepth = weights.size(0); + long inDepth = weights.size(1); + validateInputDepth(inDepth); - validateInputRank(); + long kH = weights.size(2); + long kW = weights.size(3); - INDArray input = this.input.castTo(dataType); - INDArray inputOrig = input; - if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { - input = input.permute(0,3,1,2).dup(); //NHWC to NCHW + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); + + int[] pad; + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + null, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method + + if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE) + throw new ND4JArraySizeException(); + int[] inWidthHeight; + // if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW) + // TODO: Switch hardcoded state later. For now, convolution is implemented as + // switch to NCHW then permute back for NWHC + inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)}; + + /* else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) { + inWidthHeight = new int[] {(int) input.size(1), (int) input.size(2)}; + } + else + throw new IllegalStateException("No data format configured!");*/ + pad = + Convolution2DUtils.getSameModeTopLeftPadding( + outSize, inWidthHeight, kernel, strides, dilation); + } else { + pad = getTypedLayerConfiguration().getPadding(); + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + pad, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method + } + + int outH = outSize[0]; + int outW = outSize[1]; + + if (helper != null + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + if (preOutput != null && forBackprop) { + return new Pair<>(preOutput, null); + } + + // For no-bias convolutional layers: use an empty (all 0s) value for biases + if (!hasBias()) { + if (dummyBias == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); + } + } + bias = dummyBias; + } + + INDArray ret = null; + try { + ret = + helper.preOutput( + inputOrig, + weights, + bias, + kernel, + strides, + pad, + getTypedLayerConfiguration().getCudnnAlgoMode(), + getTypedLayerConfiguration().getCudnnFwdAlgo(), + convolutionMode, + dilation, + getTypedLayerConfiguration().getConvFormat(), + workspaceMgr); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; } - long miniBatch = input.size(0); - long outDepth = weights.size(0); - long inDepth = weights.size(1); - validateInputDepth(inDepth); - - long kH = weights.size(2); - long kW = weights.size(3); - - - int[] dilation = getTypedLayerConfiguration().getDilation(); - int[] kernel = getTypedLayerConfiguration().getKernelSize(); - int[] strides = getTypedLayerConfiguration().getStride(); - - - - int[] pad; - int[] outSize; - if (convolutionMode == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getOutputSize( - input, - kernel, - strides, - null, - convolutionMode, - dilation, - CNN2DFormat.NCHW); //Note: hardcoded to NCHW due to permute earlier in this method - - if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - int[] inWidthHeight; - // if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW) - //TODO: Switch hardcoded state later. For now, convolution is implemented as - //switch to NCHW then permute back for NWHC - inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)}; - - /* else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) { - inWidthHeight = new int[] {(int) input.size(1), (int) input.size(2)}; - } - else - throw new IllegalStateException("No data format configured!");*/ - pad = ConvolutionUtils.getSameModeTopLeftPadding( - outSize, - inWidthHeight, - kernel, - strides, - dilation); + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } } else { - pad = getTypedLayerConfiguration().getPadding(); - outSize = ConvolutionUtils.getOutputSize( - input, - kernel, - strides, - pad, - convolutionMode, - dilation, - CNN2DFormat.NCHW); //Note: hardcoded to NCHW due to permute earlier in this method + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", + e); } - - int outH = outSize[0]; - int outW = outSize[1]; - - - if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { - if (preOutput != null && forBackprop) { - return new Pair<>(preOutput, null); - } - - //For no-bias convolutional layers: use an empty (all 0s) value for biases - if(!hasBias()){ - if(dummyBias == null){ - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); - } - } - bias = dummyBias; - } - - INDArray ret = null; - try { - ret = helper.preOutput(inputOrig, weights, bias, kernel, strides, pad, getTypedLayerConfiguration().getCudnnAlgoMode(), - getTypedLayerConfiguration().getCudnnFwdAlgo(), convolutionMode, dilation, getTypedLayerConfiguration().getConvFormat(), workspaceMgr); - } catch (ND4JOpProfilerException e){ - throw e; //NaN panic etc for debugging - } catch (Exception e){ - if(e.getMessage() != null && e.getMessage().contains("Failed to allocate")){ - //This is a memory exception - don't fallback to built-in implementation - throw e; - } - - if(getTypedLayerConfiguration().isCudnnAllowFallback()) { - helperCountFail++; - if(helper instanceof MKLDNNConvHelper) { - log.warn("MKL-DNN execution failed - falling back on built-in implementation",e); - } else { - log.warn("CuDNN execution failed - falling back on built-in implementation",e); - } - } else { - throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", e); - } - } - if (ret != null) { - return new Pair<>(ret, null); - } - } - - if (preOutput != null && i2d != null && forBackprop) { - return new Pair<>(preOutput, i2d); - } - - //im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation - //To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that - //to get old order from required order: permute(0,3,4,5,1,2) - //Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through the rows post-reshape - INDArray col = Nd4j.createUninitialized(weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); - int[] permute = new int[]{0, 3, 4, 5, 1, 2}; - INDArray col2 = col.permute(permute); - INDArray im2ColIn = input.castTo(col2.dataType()); //No op if already (for example) float - if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - Convolution.im2col( - im2ColIn, - (int)kH, - (int)kW, - strides[0], strides[1], - pad[0], pad[1], - dilation[0], dilation[1], - convolutionMode == ConvolutionMode.Same, - col2); - - - INDArray im2col2d = Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false); - - //Current order of weights: [depthOut,depthIn,kH,kW], c order - //Permute to give [kW,kH,depthIn,depthOut], f order - //Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless weights aren't in c order for some reason - INDArray permutedW = weights.permute(3, 2, 1, 0); - INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth); - - //Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut] - INDArray z = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, weights.dataType(), new long[]{im2col2d.size(0), reshapedW.size(1)}, 'f'); - im2col2d.mmuli(reshapedW, z); - - //Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is [miniBatch*outH*outW,depthOut] -> addiRowVector - if(getTypedLayerConfiguration().hasBias()){ - z.addiRowVector(bias); - } - - //Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order: [miniBatch,outDepth,outH,outW]; - z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true); - z = z.permute(2, 3, 1, 0); - - if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { - try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { - i2d = im2col2d.unsafeDuplication(); - } - } - - if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { - z = z.permute(0,2,3,1); //NCHW to NHWC - z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z); - } - - return new Pair<>(z, forBackprop ? im2col2d : null); + } + if (ret != null) { + return new Pair<>(ret, null); + } } - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { - if (input == null) { - throw new IllegalArgumentException("Cannot perform forward pass with null input " + layerId()); + if (preOutput != null && i2d != null && forBackprop) { + return new Pair<>(preOutput, i2d); + } + + // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input + // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation + // To get this: create an array of the order we want, permute it to the order required by im2col + // implementation, and then do im2col on that + // to get old order from required order: permute(0,3,4,5,1,2) + // Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through + // the rows post-reshape + INDArray col = + Nd4j.createUninitialized( + weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + int[] permute = new int[] {0, 3, 4, 5, 1, 2}; + INDArray col2 = col.permute(permute); + INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float + if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException(); + Convolution.im2col( + im2ColIn, + (int) kH, + (int) kW, + strides[0], + strides[1], + pad[0], + pad[1], + dilation[0], + dilation[1], + convolutionMode == ConvolutionMode.Same, + col2); + + INDArray im2col2d = + Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false); + + // Current order of weights: [depthOut,depthIn,kH,kW], c order + // Permute to give [kW,kH,depthIn,depthOut], f order + // Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless + // weights aren't in c order for some reason + INDArray permutedW = weights.permute(3, 2, 1, 0); + INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth); + + // Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut] + INDArray z = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATIONS, + weights.dataType(), + new long[] {im2col2d.size(0), reshapedW.size(1)}, + 'f'); + im2col2d.mmuli(reshapedW, z); + + // Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is + // [miniBatch*outH*outW,depthOut] -> addiRowVector + if (getTypedLayerConfiguration().hasBias()) { + z.addiRowVector(bias); + } + + // Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order: + // [miniBatch,outDepth,outH,outW]; + z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true); + z = z.permute(2, 3, 1, 0); + + if (training + && cacheMode != CacheMode.NONE + && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) + && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { + try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { + i2d = im2col2d.unsafeDuplication(); + } + } + + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { + z = z.permute(0, 2, 3, 1); // NCHW to NHWC + z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z); + } + + return new Pair<>(z, forBackprop ? im2col2d : null); + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + if (input == null) { + throw new IllegalArgumentException( + "Cannot perform forward pass with null input " + layerId()); + } + + if (cacheMode == null) cacheMode = CacheMode.NONE; + + applyDropOutIfNecessary(training, workspaceMgr); + + INDArray z = preOutput(training, false, workspaceMgr).getFirst(); + + // we do cache only if cache workspace exists. Skip otherwise + if (training + && cacheMode != CacheMode.NONE + && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) + && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { + try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { + preOutput = z.unsafeDuplication(); + } + } + + // String afn = conf.getLayer().getActivationFunction(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + + if (helper != null + && Shape.strideDescendingCAscendingF(z) + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + INDArray ret = null; + try { + ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; } - if (cacheMode == null) - cacheMode = CacheMode.NONE; - - applyDropOutIfNecessary(training, workspaceMgr); - - INDArray z = preOutput(training, false, workspaceMgr).getFirst(); - - // we do cache only if cache workspace exists. Skip otherwise - if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { - try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { - preOutput = z.unsafeDuplication(); - } + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } + } else { + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", + e); } + } - //String afn = conf.getLayer().getActivationFunction(); - IActivation afn = getTypedLayerConfiguration().getActivationFn(); - - if (helper != null && Shape.strideDescendingCAscendingF(z) && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { - INDArray ret = null; - try { - ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training); - } catch (ND4JOpProfilerException e){ - throw e; //NaN panic etc for debugging - } catch (Exception e) { - if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) { - //This is a memory exception - don't fallback to built-in implementation - throw e; - } - - if (getTypedLayerConfiguration().isCudnnAllowFallback()) { - helperCountFail++; - if (helper instanceof MKLDNNConvHelper) { - log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); - } else { - log.warn("CuDNN execution failed - falling back on built-in implementation", e); - } - } else { - throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", e); - } - } - - if (ret != null) { - return ret; - } - } - - INDArray activation = afn.getActivation(z, training); - return activation; + if (ret != null) { + return ret; + } } - @Override - public boolean hasBias() { - return getTypedLayerConfiguration().hasBias(); - } - - @Override - public boolean isPretrainLayer() { - return false; - } - - @Override - public LayerHelper getHelper() { - return helper; - } - - @Override - public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public void setParamsTable(INDArray paramsTable) { - //Override, as base layer does f order parameter flattening by default - setParams(paramsTable, 'c'); - } - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - if (maskArray == null) { - //For same mode (with stride 1): output activations size is always same size as input activations size -> mask array is same size - return new Pair<>(maskArray, currentMaskState); - } - - INDArray outMask = ConvolutionUtils.cnn2dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize(), getTypedLayerConfiguration().getStride(), - getTypedLayerConfiguration().getPadding(), getTypedLayerConfiguration().getDilation(), getTypedLayerConfiguration().getConvolutionMode()); - return new Pair<>(outMask, currentMaskState); + INDArray activation = afn.getActivation(z, training); + return activation; + } + + @Override + public boolean hasBias() { + return getTypedLayerConfiguration().hasBias(); + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public LayerHelper getHelper() { + return helper; + } + + @Override + public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public void setParamsTable(INDArray paramsTable) { + // Override, as base layer does f order parameter flattening by default + setParams(paramsTable, 'c'); + } + + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + if (maskArray == null) { + // For same mode (with stride 1): output activations size is always same size as input + // activations size -> mask array is same size + return new Pair<>(maskArray, currentMaskState); } + INDArray outMask = + Convolution2DUtils.cnn2dMaskReduction( + maskArray, + getTypedLayerConfiguration().getKernelSize(), + getTypedLayerConfiguration().getStride(), + getTypedLayerConfiguration().getPadding(), + getTypedLayerConfiguration().getDilation(), + getTypedLayerConfiguration().getConvolutionMode()); + return new Pair<>(outMask, currentMaskState); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java new file mode 100644 index 000000000..18606f39d --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java @@ -0,0 +1,753 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.nn.layers.convolution; + +import java.util.Arrays; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.common.config.DL4JClassLoading; +import org.deeplearning4j.exception.DL4JInvalidInputException; +import org.deeplearning4j.nn.api.MaskState; +import org.deeplearning4j.nn.conf.CNN2DFormat; +import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.layers.Convolution1DNew; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.gradient.DefaultGradient; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.BaseLayer; +import org.deeplearning4j.nn.layers.LayerHelper; +import org.deeplearning4j.nn.layers.mkldnn.MKLDNNConvHelper; +import org.deeplearning4j.nn.params.ConvolutionParamInitializer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.memory.MemoryWorkspace; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.shape.Shape; +import org.nd4j.linalg.convolution.Convolution; +import org.nd4j.linalg.exception.ND4JArraySizeException; +import org.nd4j.linalg.exception.ND4JOpProfilerException; +import org.nd4j.linalg.factory.Nd4j; + +@Slf4j +public class ConvolutionNewLayer< + LayerConf_T extends org.deeplearning4j.nn.conf.layers.Convolution1DNew> + extends BaseLayer { + + protected INDArray i2d; + protected ConvolutionHelper helper = null; + protected int helperCountFail = 0; + protected ConvolutionMode convolutionMode; + protected transient INDArray dummyBias; // Used only when: hasBias == false AND helpers are used + protected transient INDArray dummyBiasGrad; // As above + + + public ConvolutionNewLayer(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + initializeHelper(); + if (conf instanceof Convolution1DNew) { + convolutionMode = ((Convolution1DNew) conf).getConvolutionMode(); + } else if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) { + convolutionMode = + ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode(); + } + } + + void initializeHelper() { + String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); + if ("CUDA".equalsIgnoreCase(backend)) { + helper = + DL4JClassLoading.createNewInstance( + "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper", + ConvolutionHelper.class, + dataType); + log.debug("CudnnConvolutionHelper successfully initialized"); + if (!helper.checkSupported()) { + helper = null; + } + } else if ("CPU".equalsIgnoreCase(backend)) { + helper = new MKLDNNConvHelper(dataType); + log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName()); + } + + if (helper != null && !helper.checkSupported()) { + log.debug("Removed helper {} as not supported", helper.getClass()); + helper = null; + } + } + + @Override + public Type type() { + return Type.CONVOLUTIONAL; + } + +/** +* + * @return +*/ + @Override + public Convolution1DNew getTypedLayerConfiguration() { + return super.getTypedLayerConfiguration(); + } + + @Override + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + INDArray weights = + getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr); + INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); + + INDArray input = this.input.castTo(dataType); // No op if correct type + if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType); + + INDArray origInput = input; + INDArray origEps = epsilon; + if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) { + input = input.permute(0, 3, 1, 2); // NHWC to NCHW + epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW + } + + long miniBatch = input.size(0); + int inH = (int) input.size(2); + int inW = (int) input.size(3); + + long outDepth = weights.size(0); + long inDepth = weights.size(1); + int kH = (int) weights.size(2); + int kW = (int) weights.size(3); + + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); + int[] pad; + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + null, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Also performs validation + pad = + Convolution2DUtils.getSameModeTopLeftPadding( + outSize, new int[] {inH, inW}, kernel, strides, dilation); + } else { + pad = getTypedLayerConfiguration().getPadding(); + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + pad, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Also performs validation + } + + int outH = outSize[0]; + int outW = outSize[1]; + + INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); + INDArray weightGradView = + gradientViews.get( + ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW] + INDArray weightGradView2df = + Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false) + .transpose(); + + INDArray delta; + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + + Pair p = preOutput4d(true, true, workspaceMgr); + INDArray z = p.getFirst(); + CNN2DFormat f = getTypedLayerConfiguration().getConvFormat(); + if (f != CNN2DFormat.NCHW) { + z = z.permute(0, 3, 1, 2); // NHWC to NCHW + } + delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params + + if (helper != null + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + INDArray helperDelta = delta; + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) + helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC + + if (!hasBias() && !(helper instanceof MKLDNNConvHelper)) { + // MKL-DNN supports no bias, CuDNN doesn't + if (dummyBiasGrad == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); + } + } + biasGradView = dummyBiasGrad; + } + + Pair ret = null; + try { + ret = + helper.backpropGradient( + origInput, + weights, + bias, + helperDelta, + kernel, + strides, + pad, + biasGradView, + weightGradView, + afn, + getTypedLayerConfiguration().getCudnnAlgoMode(), + getTypedLayerConfiguration().getCudnnBwdFilterAlgo(), + getTypedLayerConfiguration().getCudnnBwdDataAlgo(), + convolutionMode, + dilation, + getTypedLayerConfiguration().getConvFormat(), + workspaceMgr); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; + } + + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } + } else { + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false", + e); + } + } + + if (ret != null) { + // Backprop dropout, if present + INDArray gradPostDropout = ret.getRight(); + gradPostDropout = backpropDropOutIfPresent(gradPostDropout); + ret.setSecond(gradPostDropout); + return ret; + } + } + + delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW] + + // Note: due to the permute in preOut, and the fact that we essentially do a + // preOut.muli(epsilon), this reshape + // should be zero-copy; only possible exception being sometimes with the "identity" activation + // case + INDArray delta2d = + delta.reshape('c', outDepth, miniBatch * outH * outW); // Shape.newShapeNoCopy(delta,new + // int[]{outDepth,miniBatch*outH*outW},false); + + // Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input + // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation + // To get this: create an array of the order we want, permute it to the order required by im2col + // implementation, and then do im2col on that + // to get old order from required order: permute(0,3,4,5,1,2) + INDArray im2col2d = + p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not + if (im2col2d == null) { + INDArray col = + Nd4j.createUninitialized( + dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); + Convolution.im2col( + input, + kH, + kW, + strides[0], + strides[1], + pad[0], + pad[1], + dilation[0], + dilation[1], + convolutionMode == ConvolutionMode.Same, + col2); + // Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape + im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW); + } + + // Calculate weight gradients, using cc->c mmul. + // weightGradView2df is f order, but this is because it's transposed from c order + // Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c + // order, not usual f order + Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0); + + // Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally + // in c order for some reason) + INDArray wPermuted = + weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order + INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth); + + // Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format + // before col2im reduction) + // Note: cc -> f mmul here, then reshape to 6d in f order + INDArray epsNext2d = + w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array? + INDArray eps6d = + Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true); + + // Calculate epsilonNext by doing im2col reduction. + // Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW] + // currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first + eps6d = eps6d.permute(5, 2, 1, 0, 4, 3); + INDArray epsNextOrig = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, + eps6d.dataType(), + new long[] {inDepth, miniBatch, inH, inW}, + 'c'); + + // Note: we are execute col2im in a way that the output array should be used in a stride 1 muli + // in the layer below... (same strides as zs/activations) + INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3); + Convolution.col2im( + eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]); + + Gradient retGradient = new DefaultGradient(); + if (getTypedLayerConfiguration().hasBias()) { + delta2d.sum(biasGradView, 1); // biasGradView is initialized/zeroed first in sum op + retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView); + } + retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c'); + + weightNoiseParams.clear(); + + epsNext = backpropDropOutIfPresent(epsNext); + + if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) { + epsNext = epsNext.permute(0, 2, 3, 1); // NCHW to NHWC + } + + return new Pair<>(retGradient, epsNext); + } + + /** + * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain + * their standard non-4d preOutput method, while overriding this to return 4d activations (for use + * in backprop) without modifying the public API + */ + protected Pair preOutput4d( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + return preOutput(training, forBackprop, workspaceMgr); + } + + protected void validateInputRank() { + // Input validation: expect rank 4 matrix + if (input.rank() != 4) { + String layerName = layerConfiguration.getName(); + if (layerName == null) layerName = "(not named)"; + throw new DL4JInvalidInputException( + "Got rank " + + input.rank() + + " array as input to ConvolutionLayer (layer name = " + + layerName + + ", layer index = " + + index + + ") with shape " + + Arrays.toString(input.shape()) + + ". " + + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]." + + (input.rank() == 2 + ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" + : "") + + " " + + layerId()); + } + } + + protected void validateInputDepth(long inDepth) { + CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); + int dim = format == CNN2DFormat.NHWC ? 3 : 1; + if (input.size(dim) != inDepth) { + String layerName = layerConfiguration.getName(); + if (layerName == null) layerName = "(not named)"; + + String s = + "Cannot do forward pass in Convolution layer (layer name = " + + layerName + + ", layer index = " + + index + + "): input array channels does not match CNN layer configuration" + + " (data format = " + + format + + ", data input channels = " + + input.size(dim) + + ", " + + getTypedLayerConfiguration().getConvFormat().dimensionNames() + + "=" + + Arrays.toString(input.shape()) + + "; expected" + + " input channels = " + + inDepth + + ") " + + layerId(); + + int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3; + if (input.size(dimIfWrongFormat) == inDepth) { + // User might have passed NCHW data to a NHWC net, or vice versa? + s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG; + } + + throw new DL4JInvalidInputException(s); + } + } + + /** + * PreOutput method that also returns the im2col2d array (if being called for backprop), as this + * can be re-used instead of being calculated again. + * + * @param training Train or test time (impacts dropout) + * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return + * null for second pair entry. Note that it may still be null in the case of CuDNN and the + * like. + * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array + */ + protected Pair preOutput( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr); + INDArray weights = + getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr); + + validateInputRank(); + + INDArray input = this.input.castTo(dataType); + INDArray inputOrig = input; + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { + input = input.permute(0, 3, 1, 2).dup(); // NHWC to NCHW + } + + long miniBatch = input.size(0); + long outDepth = weights.size(0); + long inDepth = weights.size(1); + validateInputDepth(inDepth); + + long kH = weights.size(2); + long kW = weights.size(3); + + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); + + int[] pad; + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + null, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method + + if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE) + throw new ND4JArraySizeException(); + int[] inWidthHeight; + // if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW) + // TODO: Switch hardcoded state later. For now, convolution is implemented as + // switch to NCHW then permute back for NWHC + inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)}; + + /* else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) { + inWidthHeight = new int[] {(int) input.size(1), (int) input.size(2)}; + } + else + throw new IllegalStateException("No data format configured!");*/ + pad = + Convolution2DUtils.getSameModeTopLeftPadding( + outSize, inWidthHeight, kernel, strides, dilation); + } else { + pad = getTypedLayerConfiguration().getPadding(); + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + pad, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method + } + + int outH = outSize[0]; + int outW = outSize[1]; + + if (helper != null + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + if (preOutput != null && forBackprop) { + return new Pair<>(preOutput, null); + } + + // For no-bias convolutional layers: use an empty (all 0s) value for biases + if (!hasBias()) { + if (dummyBias == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); + } + } + bias = dummyBias; + } + + INDArray ret = null; + try { + ret = + helper.preOutput( + inputOrig, + weights, + bias, + kernel, + strides, + pad, + getTypedLayerConfiguration().getCudnnAlgoMode(), + getTypedLayerConfiguration().getCudnnFwdAlgo(), + convolutionMode, + dilation, + getTypedLayerConfiguration().getConvFormat(), + workspaceMgr); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; + } + + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } + } else { + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", + e); + } + } + if (ret != null) { + return new Pair<>(ret, null); + } + } + + if (preOutput != null && i2d != null && forBackprop) { + return new Pair<>(preOutput, i2d); + } + + // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input + // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation + // To get this: create an array of the order we want, permute it to the order required by im2col + // implementation, and then do im2col on that + // to get old order from required order: permute(0,3,4,5,1,2) + // Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through + // the rows post-reshape + INDArray col = + Nd4j.createUninitialized( + weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + int[] permute = new int[] {0, 3, 4, 5, 1, 2}; + INDArray col2 = col.permute(permute); + INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float + if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException(); + Convolution.im2col( + im2ColIn, + (int) kH, + (int) kW, + strides[0], + strides[1], + pad[0], + pad[1], + dilation[0], + dilation[1], + convolutionMode == ConvolutionMode.Same, + col2); + + INDArray im2col2d = + Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false); + + // Current order of weights: [depthOut,depthIn,kH,kW], c order + // Permute to give [kW,kH,depthIn,depthOut], f order + // Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless + // weights aren't in c order for some reason + INDArray permutedW = weights.permute(3, 2, 1, 0); + INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth); + + // Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut] + INDArray z = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATIONS, + weights.dataType(), + new long[] {im2col2d.size(0), reshapedW.size(1)}, + 'f'); + im2col2d.mmuli(reshapedW, z); + + // Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is + // [miniBatch*outH*outW,depthOut] -> addiRowVector + if (getTypedLayerConfiguration().hasBias()) { + z.addiRowVector(bias); + } + + // Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order: + // [miniBatch,outDepth,outH,outW]; + z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true); + z = z.permute(2, 3, 1, 0); + + if (training + && cacheMode != CacheMode.NONE + && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) + && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { + try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { + i2d = im2col2d.unsafeDuplication(); + } + } + + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { + z = z.permute(0, 2, 3, 1); // NCHW to NHWC + z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z); + } + + return new Pair<>(z, forBackprop ? im2col2d : null); + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + if (input == null) { + throw new IllegalArgumentException( + "Cannot perform forward pass with null input " + layerId()); + } + + if (cacheMode == null) cacheMode = CacheMode.NONE; + + applyDropOutIfNecessary(training, workspaceMgr); + + INDArray z = preOutput(training, false, workspaceMgr).getFirst(); + + // we do cache only if cache workspace exists. Skip otherwise + if (training + && cacheMode != CacheMode.NONE + && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) + && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { + try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { + preOutput = z.unsafeDuplication(); + } + } + + // String afn = conf.getLayer().getActivationFunction(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + + if (helper != null + && Shape.strideDescendingCAscendingF(z) + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + INDArray ret = null; + try { + ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; + } + + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } + } else { + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", + e); + } + } + + if (ret != null) { + return ret; + } + } + + INDArray activation = afn.getActivation(z, training); + return activation; + } + + @Override + public boolean hasBias() { + return getTypedLayerConfiguration().hasBias(); + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public LayerHelper getHelper() { + return helper; + } + + @Override + public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public void setParamsTable(INDArray paramsTable) { + // Override, as base layer does f order parameter flattening by default + setParams(paramsTable, 'c'); + } + + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + if (maskArray == null) { + // For same mode (with stride 1): output activations size is always same size as input + // activations size -> mask array is same size + return new Pair<>(maskArray, currentMaskState); + } + + INDArray outMask = + Convolution2DUtils.cnn2dMaskReduction( + maskArray, + getTypedLayerConfiguration().getKernelSize(), + getTypedLayerConfiguration().getStride(), + getTypedLayerConfiguration().getPadding(), + getTypedLayerConfiguration().getDilation(), + getTypedLayerConfiguration().getConvolutionMode()); + return new Pair<>(outMask, currentMaskState); + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping1DLayer.java index 94f752a6e..7064c9af1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping1DLayer.java @@ -20,26 +20,22 @@ package org.deeplearning4j.nn.layers.convolution; -import java.util.Map; +import static org.nd4j.linalg.indexing.NDArrayIndex.all; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; + import lombok.val; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; - import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; -import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; - -import static org.nd4j.linalg.indexing.NDArrayIndex.all; -import static org.nd4j.linalg.indexing.NDArrayIndex.interval; public class Cropping1DLayer extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java index 83f17f216..df7727d87 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping2DLayer.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.layers.convolution; +import static org.nd4j.linalg.indexing.NDArrayIndex.all; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; + import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -27,14 +30,11 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - -import static org.nd4j.linalg.indexing.NDArrayIndex.all; -import static org.nd4j.linalg.indexing.NDArrayIndex.interval; public class Cropping2DLayer extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping3DLayer.java index 5c37b665d..e6d77f314 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Cropping3DLayer.java @@ -20,21 +20,20 @@ package org.deeplearning4j.nn.layers.convolution; +import static org.nd4j.linalg.indexing.NDArrayIndex.all; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; + import lombok.val; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; - -import static org.nd4j.linalg.indexing.NDArrayIndex.all; -import static org.nd4j.linalg.indexing.NDArrayIndex.interval; public class Cropping3DLayer extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution2DLayer.java index b2c2f09c0..0761a1189 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution2DLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution; +import java.util.Arrays; import lombok.val; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -29,7 +30,10 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.DeconvolutionParamInitializer; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -37,11 +41,6 @@ import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - -import java.util.Arrays; public class Deconvolution2DLayer extends ConvolutionLayer { @@ -87,7 +86,7 @@ public class Deconvolution2DLayer extends ConvolutionLayer { int[] pad; if (convolutionMode == ConvolutionMode.Same) { int[] outSize = new int[]{(int)epsilon.size(hDim), (int)epsilon.size(wDim)}; - pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int)inH, (int)inW}, kernel, strides, dilation); + pad = Convolution2DUtils.getSameModeTopLeftPadding(outSize, new int[] {(int)inH, (int)inW}, kernel, strides, dilation); } else { pad = getTypedLayerConfiguration().getPadding(); } @@ -190,7 +189,7 @@ public class Deconvolution2DLayer extends ConvolutionLayer { int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3; if(input.size(dimIfWrongFormat) == inDepth){ //User might have passed NCHW data to a NHWC net, or vice versa? - s += "\n" + ConvolutionUtils.NCHW_NHWC_ERROR_MSG; + s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG; } throw new DL4JInvalidInputException(s); @@ -205,12 +204,12 @@ public class Deconvolution2DLayer extends ConvolutionLayer { int[] pad; int[] outSize; if (convolutionMode == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getDeconvolutionOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation - pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) input.size(hDim), (int) input.size(wDim)}, kernel, + outSize = Convolution2DUtils.getDeconvolutionOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation + pad = Convolution2DUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) input.size(hDim), (int) input.size(wDim)}, kernel, strides, dilation ); } else { pad = getTypedLayerConfiguration().getPadding(); - outSize = ConvolutionUtils.getDeconvolutionOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation + outSize = Convolution2DUtils.getDeconvolutionOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation } long outH = outSize[0]; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution3DLayer.java index bc78592be..b51971494 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Deconvolution3DLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution; +import java.util.Arrays; import lombok.val; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CacheMode; @@ -33,17 +34,15 @@ import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.params.DeconvolutionParamInitializer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.util.ArrayUtil; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.nd4j.common.util.ArrayUtil; - -import java.util.Arrays; public class Deconvolution3DLayer extends BaseLayer { @@ -154,11 +153,11 @@ public class Deconvolution3DLayer extends BaseLayer { long[] outSize; int[] inSize = df == Convolution3D.DataFormat.NCDHW ? new int[]{(int)input.size(2), (int)input.size(3), (int)input.size(4)} : new int[]{(int)input.size(1), (int)input.size(2), (int)input.size(3)}; if (cm == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getDeconvolution3DOutputSize(input, kernel, strides, null, dilation, cm, getTypedLayerConfiguration().getDataFormat()); //Also performs validation - pad = ConvolutionUtils.getSameModeTopLeftPadding(ArrayUtil.toInts(outSize), inSize, kernel, strides, dilation ); + outSize = Convolution2DUtils.getDeconvolution3DOutputSize(input, kernel, strides, null, dilation, cm, getTypedLayerConfiguration().getDataFormat()); //Also performs validation + pad = Convolution2DUtils.getSameModeTopLeftPadding(ArrayUtil.toInts(outSize), inSize, kernel, strides, dilation ); } else { pad = getTypedLayerConfiguration().getPadding(); - outSize = ConvolutionUtils.getDeconvolution3DOutputSize(input, kernel, strides, pad, dilation, cm, getTypedLayerConfiguration().getDataFormat()); //Also performs validation + outSize = Convolution2DUtils.getDeconvolution3DOutputSize(input, kernel, strides, pad, dilation, cm, getTypedLayerConfiguration().getDataFormat()); //Also performs validation } long outH = outSize[0]; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java index 8b7d2daa2..98e76609b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/DepthwiseConvolution2DLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution; +import java.util.Arrays; import lombok.val; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -31,7 +32,8 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.DepthwiseConvolutionParamInitializer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -39,9 +41,6 @@ import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Arrays; public class DepthwiseConvolution2DLayer extends ConvolutionLayer { @@ -86,12 +85,12 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { int[] strides = getTypedLayerConfiguration().getStride(); int[] pad; if (convolutionMode == ConvolutionMode.Same) { - int[] outSize = ConvolutionUtils.getOutputSize( + int[] outSize = Convolution2DUtils.getOutputSize( input, kernel, strides, null, convolutionMode, dilation, format); - pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[]{inH, inW}, kernel, strides, dilation); + pad = Convolution2DUtils.getSameModeTopLeftPadding(outSize, new int[]{inH, inW}, kernel, strides, dilation); } else { pad = getTypedLayerConfiguration().getPadding(); - ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); + Convolution2DUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); } INDArray biasGradView = gradientViews.get(DepthwiseConvolutionParamInitializer.BIAS_KEY); @@ -188,7 +187,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3; if(input.size(dimIfWrongFormat) == inDepth){ //User might have passed NCHW data to a NHWC net, or vice versa? - s += "\n" + ConvolutionUtils.NCHW_NHWC_ERROR_MSG; + s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG; } throw new DL4JInvalidInputException(s); @@ -203,16 +202,16 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { int[] pad; int[] outSize; if (convolutionMode == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); + outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } - pad = ConvolutionUtils.getSameModeTopLeftPadding( + pad = Convolution2DUtils.getSameModeTopLeftPadding( outSize, new int[]{(int) input.size(nchw ? 2 : 1), (int) input.size(nchw ? 3 : 2)}, kernel, strides, dilation); } else { pad = getTypedLayerConfiguration().getPadding(); - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); + outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); } long outH = outSize[0]; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java index 60533ee2a..8a2da7f69 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution; +import java.util.Arrays; import lombok.val; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -30,7 +31,10 @@ import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.nn.params.SeparableConvolutionParamInitializer; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -38,11 +42,6 @@ import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - -import java.util.Arrays; public class SeparableConvolution2DLayer extends ConvolutionLayer { @@ -90,11 +89,11 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { int[] strides = getTypedLayerConfiguration().getStride(); int[] pad; if (convolutionMode == ConvolutionMode.Same) { - int[] outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation - pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); + int[] outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation + pad = Convolution2DUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); } else { pad = getTypedLayerConfiguration().getPadding(); - ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation + Convolution2DUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation } INDArray biasGradView = gradientViews.get(SeparableConvolutionParamInitializer.BIAS_KEY); @@ -206,7 +205,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { int dimIfWrongFormat = 1; if(input.size(dimIfWrongFormat) == inDepth){ //User might have passed NCHW data to a NHWC net, or vice versa? - s += "\n" + ConvolutionUtils.NCHW_NHWC_ERROR_MSG; + s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG; } throw new DL4JInvalidInputException(s); @@ -221,7 +220,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { int[] pad; int[] outSize; if (convolutionMode == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getOutputSize( + outSize = Convolution2DUtils.getOutputSize( input, kernel, strides, @@ -233,7 +232,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE) { throw new ND4JArraySizeException(); } - pad = ConvolutionUtils.getSameModeTopLeftPadding( + pad = Convolution2DUtils.getSameModeTopLeftPadding( outSize, new int[] {(int) input.size(hIdx), (int) input.size(wIdx)}, kernel, @@ -241,7 +240,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { dilation); } else { pad = getTypedLayerConfiguration().getPadding(); - outSize = ConvolutionUtils.getOutputSize( + outSize = Convolution2DUtils.getOutputSize( input, kernel, strides, diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java index 371511075..cc24bf77b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution; +import java.util.Arrays; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -27,17 +28,14 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - -import java.util.Arrays; - @Slf4j public class SpaceToBatch extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepth.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepth.java index 1bc6e3c70..0f15d9ba6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepth.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepth.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution; +import java.util.Arrays; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -27,18 +28,15 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - -import java.util.Arrays; - @Slf4j public class SpaceToDepth extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java index f6bfcb3cf..5b0028a52 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding1DLayer.java @@ -22,18 +22,17 @@ package org.deeplearning4j.nn.layers.convolution; import lombok.val; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public class ZeroPadding1DLayer extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding3DLayer.java index 4b9674ed9..71d413d2a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPadding3DLayer.java @@ -22,18 +22,17 @@ package org.deeplearning4j.nn.layers.convolution; import lombok.val; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; public class ZeroPadding3DLayer extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java index 03822b77c..cd6c09158 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ZeroPaddingLayer.java @@ -27,13 +27,13 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; public class ZeroPaddingLayer extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java index ce96217d3..a83ccbeb3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling1DLayer.java @@ -20,19 +20,18 @@ package org.deeplearning4j.nn.layers.convolution.subsampling; +import java.util.Arrays; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.Convolution2DUtils; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Broadcast; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Arrays; public class Subsampling1DLayer extends SubsamplingLayer { public Subsampling1DLayer(LayerConfiguration conf, DataType dataType) { @@ -102,7 +101,7 @@ public class Subsampling1DLayer extends SubsamplingLayer { @Override public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - INDArray reduced = ConvolutionUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0], + INDArray reduced = Convolution2DUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0], getTypedLayerConfiguration().getStride()[0], getTypedLayerConfiguration().getPadding()[0], getTypedLayerConfiguration().getDilation()[0], getTypedLayerConfiguration().getConvolutionMode()); return new Pair<>(reduced, currentMaskState); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java index 1c8cda6a5..e269812c2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/Subsampling3DLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution.subsampling; +import java.util.Arrays; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.ConvolutionMode; @@ -32,15 +33,12 @@ import org.deeplearning4j.nn.layers.AbstractLayer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.Convolution3DUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Arrays; - @Slf4j public class Subsampling3DLayer extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java index b8c115a30..ab5281958 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingHelper.java @@ -25,9 +25,9 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.LayerHelper; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.api.ndarray.INDArray; /** * Helper for the subsampling layer. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java index 474d14216..96ff07a12 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/SubsamplingLayer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.layers.convolution.subsampling; +import java.util.Arrays; +import java.util.Map; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.exception.DL4JInvalidInputException; @@ -34,7 +36,7 @@ import org.deeplearning4j.nn.layers.LayerHelper; import org.deeplearning4j.nn.layers.mkldnn.MKLDNNSubsamplingHelper; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -42,9 +44,6 @@ import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.exception.ND4JOpProfilerException; import org.nd4j.linalg.factory.Nd4j; -import java.util.Arrays; -import java.util.Map; - @Slf4j public class SubsamplingLayer extends AbstractLayer { @@ -119,7 +118,7 @@ public class SubsamplingLayer extends AbstractLayer(maskArray, currentMaskState); } - INDArray outMask = ConvolutionUtils.cnn2dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize(), getTypedLayerConfiguration().getStride(), + INDArray outMask = Convolution2DUtils.cnn2dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize(), getTypedLayerConfiguration().getStride(), getTypedLayerConfiguration().getPadding(), getTypedLayerConfiguration().getDilation(), getTypedLayerConfiguration().getConvolutionMode()); return super.feedForwardMaskArray(outMask, currentMaskState, minibatchSize); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java index ae5417fc8..bae0c77d3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling1D.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution.upsampling; +import java.util.Arrays; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -28,16 +29,13 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Arrays; - @Slf4j public class Upsampling1D extends Upsampling2D { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java index 12ef14e80..ac077c63d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling2D.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution.upsampling; +import java.util.Arrays; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -30,16 +31,13 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Arrays; - @Slf4j public class Upsampling2D extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java index 577bb0976..c0709e404 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/upsampling/Upsampling3D.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.convolution.upsampling; +import java.util.Arrays; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CacheMode; @@ -29,16 +30,13 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Arrays; - @Slf4j public class Upsampling3D extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/PReLU.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/PReLU.java index f89112664..8061ee32f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/PReLU.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/PReLU.java @@ -27,11 +27,11 @@ import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.params.PReLUParamInitializer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationPReLU; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; public class PReLU extends BaseLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java index 5a65889f8..032317af7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/autoencoder/AutoEncoder.java @@ -23,11 +23,11 @@ package org.deeplearning4j.nn.layers.feedforward.autoencoder; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.layers.BasePretrainNetwork; import org.deeplearning4j.nn.params.PretrainParamInitializer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; public class AutoEncoder extends BasePretrainNetwork { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java index a87e04a3a..11f48357e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java @@ -22,9 +22,9 @@ package org.deeplearning4j.nn.layers.feedforward.dense; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.layers.BaseLayer; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; /** * @author Adam Gibson @@ -47,7 +47,7 @@ public class DenseLayer extends BaseLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java index 66961b202..660e3730a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingLayer.java @@ -22,19 +22,19 @@ package org.deeplearning4j.nn.layers.feedforward.embedding; import lombok.extern.slf4j.Slf4j; import lombok.val; -import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.exception.ND4JArraySizeException; -import org.nd4j.common.primitives.Pair; import org.deeplearning4j.exception.DL4JInvalidInputException; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.params.DefaultParamInitializer; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Nd4j; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.exception.ND4JArraySizeException; +import org.nd4j.linalg.factory.Nd4j; @Slf4j public class EmbeddingLayer extends BaseLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingSequenceLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingSequenceLayer.java index 1cd584bf3..64e40cfd0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingSequenceLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/embedding/EmbeddingSequenceLayer.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.layers.feedforward.embedding; +import static org.nd4j.linalg.api.shape.Shape.hasDefaultStridesForShape; + +import java.util.Arrays; import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.exception.DL4JInvalidInputException; @@ -31,15 +34,11 @@ import org.deeplearning4j.nn.layers.BaseLayer; import org.deeplearning4j.nn.params.DefaultParamInitializer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Broadcast; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Arrays; - -import static org.nd4j.linalg.api.shape.Shape.hasDefaultStridesForShape; @Slf4j public class EmbeddingSequenceLayer extends BaseLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/BaseMKLDNNHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/BaseMKLDNNHelper.java index a349eef20..269053ba2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/BaseMKLDNNHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/BaseMKLDNNHelper.java @@ -20,11 +20,10 @@ package org.deeplearning4j.nn.layers.mkldnn; -import org.deeplearning4j.common.config.DL4JClassLoading; -import org.nd4j.linalg.factory.Nd4j; - import java.lang.reflect.Method; import java.util.concurrent.atomic.AtomicBoolean; +import org.deeplearning4j.common.config.DL4JClassLoading; +import org.nd4j.linalg.factory.Nd4j; public class BaseMKLDNNHelper { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNBatchNormHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNBatchNormHelper.java index 388125e82..04be6baa3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNBatchNormHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNBatchNormHelper.java @@ -20,6 +20,10 @@ package org.deeplearning4j.nn.layers.mkldnn; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -27,6 +31,8 @@ import org.deeplearning4j.nn.layers.normalization.BatchNormalizationHelper; import org.deeplearning4j.nn.params.BatchNormalizationParamInitializer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.util.ArrayUtil; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; @@ -36,13 +42,6 @@ import org.nd4j.linalg.api.ops.impl.layers.convolution.BatchNorm; import org.nd4j.linalg.api.ops.impl.summarystats.Variance; import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.nd4j.common.util.ArrayUtil; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; public class MKLDNNBatchNormHelper implements BatchNormalizationHelper { private static final int[] RANK2_DIMS = {0}; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNConvHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNConvHelper.java index 2a3734f9e..ad7120eed 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNConvHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNConvHelper.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.layers.mkldnn; +import java.util.Collections; +import java.util.Map; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -29,7 +31,9 @@ import org.deeplearning4j.nn.layers.convolution.ConvolutionHelper; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.util.ArrayUtil; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -37,11 +41,6 @@ import org.nd4j.linalg.api.ops.OpContext; import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv2D; import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv2DDerivative; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.nd4j.common.util.ArrayUtil; - -import java.util.Collections; -import java.util.Map; public class MKLDNNConvHelper implements ConvolutionHelper { @@ -73,7 +72,7 @@ public class MKLDNNConvHelper implements ConvolutionHelper { } if (convolutionMode == ConvolutionMode.Same) { - pad = ConvolutionUtils.getSameModeTopLeftPadding(new int[]{(int)delta.size(hDim), (int)delta.size(wDim)}, new int[] {(int) input.size(hDim), (int) input.size(wDim)}, + pad = Convolution2DUtils.getSameModeTopLeftPadding(new int[]{(int)delta.size(hDim), (int)delta.size(wDim)}, new int[] {(int) input.size(hDim), (int) input.size(wDim)}, kernel, strides, dilation); } @@ -132,10 +131,10 @@ public class MKLDNNConvHelper implements ConvolutionHelper { int inW = (int)input.size(wDim); int[] outSize; if (convolutionMode == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation - pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); + outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation + pad = Convolution2DUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); } else { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation + outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation } if(context == null ){ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLSTMHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLSTMHelper.java index c5b159fb9..5cb87e3d2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLSTMHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLSTMHelper.java @@ -20,6 +20,10 @@ package org.deeplearning4j.nn.layers.mkldnn; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LSTM; @@ -28,6 +32,7 @@ import org.deeplearning4j.nn.layers.recurrent.FwdPassReturn; import org.deeplearning4j.nn.layers.recurrent.LSTMHelper; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.*; import org.nd4j.linalg.api.buffer.DataType; @@ -36,12 +41,6 @@ import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.api.shape.LongShapeDescriptor; import org.nd4j.linalg.indexing.BooleanIndexing; import org.nd4j.linalg.indexing.conditions.Conditions; -import org.nd4j.common.primitives.Pair; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; public class MKLDNNLSTMHelper implements LSTMHelper { public MKLDNNLSTMHelper(DataType dataType) {} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLocalResponseNormalizationHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLocalResponseNormalizationHelper.java index c38a81b58..ecd0cc098 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLocalResponseNormalizationHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNLocalResponseNormalizationHelper.java @@ -20,20 +20,19 @@ package org.deeplearning4j.nn.layers.mkldnn; +import java.util.Collections; +import java.util.Map; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.normalization.LocalResponseNormalizationHelper; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.OpContext; import org.nd4j.linalg.api.ops.impl.layers.convolution.LocalResponseNormalization; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Collections; -import java.util.Map; public class MKLDNNLocalResponseNormalizationHelper extends BaseMKLDNNHelper implements LocalResponseNormalizationHelper { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNSubsamplingHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNSubsamplingHelper.java index 91923a967..bb825120a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNSubsamplingHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNSubsamplingHelper.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.layers.mkldnn; +import java.util.Collections; +import java.util.Map; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.layers.PoolingType; @@ -28,7 +30,9 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingHelper; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.util.ConvolutionUtils; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.util.ArrayUtil; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.DynamicCustomOp; @@ -39,11 +43,6 @@ import org.nd4j.linalg.api.ops.impl.layers.convolution.Pooling2D; import org.nd4j.linalg.api.ops.impl.layers.convolution.Pooling2DDerivative; import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Pooling2DConfig; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.nd4j.common.util.ArrayUtil; - -import java.util.Collections; -import java.util.Map; public class MKLDNNSubsamplingHelper implements SubsamplingHelper { @@ -75,7 +74,7 @@ public class MKLDNNSubsamplingHelper implements SubsamplingHelper { } if (convolutionMode == ConvolutionMode.Same) { - pad = ConvolutionUtils.getSameModeTopLeftPadding(new int[]{(int)epsilon.size(hIdx), (int)epsilon.size(wIdx)}, new int[] {(int)input.size(hIdx), (int)input.size(wIdx)}, kernel, strides, dilation); + pad = Convolution2DUtils.getSameModeTopLeftPadding(new int[]{(int)epsilon.size(hIdx), (int)epsilon.size(wIdx)}, new int[] {(int)input.size(hIdx), (int)input.size(wIdx)}, kernel, strides, dilation); } Pooling2DConfig conf = Pooling2DConfig.builder() @@ -115,10 +114,10 @@ public class MKLDNNSubsamplingHelper implements SubsamplingHelper { int[] outSize; if (convolutionMode == ConvolutionMode.Same) { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation - pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int)input.size(hIdx), (int)input.size(wIdx)}, kernel, strides, dilation); + outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation + pad = Convolution2DUtils.getSameModeTopLeftPadding(outSize, new int[] {(int)input.size(hIdx), (int)input.size(wIdx)}, kernel, strides, dilation); } else { - outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation + outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation } long[] outShape = format == CNN2DFormat.NCHW ? new long[]{input.size(0), input.size(1), outSize[0], outSize[1]} : diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java index d661fccdf..cbe1cde99 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.normalization; +import java.util.*; import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.common.config.DL4JClassLoading; @@ -34,6 +35,7 @@ import org.deeplearning4j.nn.params.BatchNormalizationParamInitializer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp; @@ -46,9 +48,6 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.exception.ND4JOpProfilerException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.common.primitives.Pair; - -import java.util.*; @Slf4j public class BatchNormalization extends BaseLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java index d6bac6a64..8ec565513 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalizationHelper.java @@ -23,10 +23,10 @@ package org.deeplearning4j.nn.layers.normalization; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.LayerHelper; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; public interface BatchNormalizationHelper extends LayerHelper { boolean checkSupported(double eps, boolean fixedGammaBeta); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java index ea4922e6a..0dd4de67f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalization.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.layers.normalization; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; + import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.common.config.DL4JClassLoading; @@ -30,6 +32,10 @@ import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; import org.deeplearning4j.nn.layers.LayerHelper; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.primitives.Triple; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.MulOp; @@ -38,12 +44,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.nd4j.common.primitives.Triple; - -import static org.nd4j.linalg.indexing.NDArrayIndex.interval; @Slf4j public class LocalResponseNormalization diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java index f2c60f160..bc833f4aa 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/LocalResponseNormalizationHelper.java @@ -22,9 +22,9 @@ package org.deeplearning4j.nn.layers.normalization; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.LayerHelper; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.api.ndarray.INDArray; public interface LocalResponseNormalizationHelper extends LayerHelper { boolean checkSupported(double k, double n, double alpha, double beta); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java index 49a61f496..e18977c7b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/Yolo2OutputLayer.java @@ -20,6 +20,11 @@ package org.deeplearning4j.nn.layers.objdetect; +import static org.nd4j.linalg.indexing.NDArrayIndex.*; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.List; import lombok.*; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.layers.IOutputLayer; @@ -28,7 +33,11 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.util.ArrayUtil; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.activations.impl.ActivationSigmoid; @@ -47,16 +56,6 @@ import org.nd4j.linalg.indexing.conditions.Conditions; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.impl.LossL2; import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.nd4j.common.util.ArrayUtil; - -import java.io.Serializable; -import java.util.Arrays; -import java.util.List; - -import static org.nd4j.linalg.indexing.NDArrayIndex.*; public class Yolo2OutputLayer extends AbstractLayer implements Serializable, IOutputLayer { private static final Gradient EMPTY_GRADIENT = new DefaultGradient(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/YoloUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/YoloUtils.java index c99b25d03..46d055712 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/YoloUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/objdetect/YoloUtils.java @@ -20,21 +20,20 @@ package org.deeplearning4j.nn.layers.objdetect; -import lombok.NonNull; -import org.nd4j.linalg.api.memory.MemoryWorkspace; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Broadcast; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.ops.transforms.Transforms; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import static org.nd4j.linalg.indexing.NDArrayIndex.*; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; - -import static org.nd4j.linalg.indexing.NDArrayIndex.*; +import lombok.NonNull; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.linalg.api.memory.MemoryWorkspace; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Broadcast; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.ops.transforms.Transforms; public class YoloUtils { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayer.java index 218b18884..79ee3680c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNOutputLayer.java @@ -21,10 +21,12 @@ package org.deeplearning4j.nn.layers.ocnn; -import lombok.Builder; +import static org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer.R_KEY; +import static org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer.V_KEY; +import static org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer.W_KEY; + import lombok.Getter; import lombok.Setter; -import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -32,6 +34,7 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseOutputLayer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationReLU; import org.nd4j.linalg.api.buffer.DataType; @@ -42,12 +45,6 @@ import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.common.primitives.Pair; - -import static org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer.R_KEY; -import static org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer.V_KEY; -import static org.deeplearning4j.nn.layers.ocnn.OCNNParamInitializer.W_KEY; - public class OCNNOutputLayer extends BaseOutputLayer { @Setter diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNParamInitializer.java index 40b734bee..bfcb3e25f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/ocnn/OCNNParamInitializer.java @@ -20,6 +20,10 @@ package org.deeplearning4j.nn.layers.ocnn; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; +import static org.nd4j.linalg.indexing.NDArrayIndex.point; + +import java.util.*; import lombok.val; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.params.DefaultParamInitializer; @@ -29,11 +33,6 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.*; - -import static org.nd4j.linalg.indexing.NDArrayIndex.interval; -import static org.nd4j.linalg.indexing.NDArrayIndex.point; - public class OCNNParamInitializer extends DefaultParamInitializer { private final static OCNNParamInitializer INSTANCE = new OCNNParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java index 94e3b6de8..41cdba9c2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/pooling/GlobalPoolingLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.pooling; +import java.util.Arrays; import java.util.Map; import lombok.val; import org.apache.commons.lang3.ArrayUtils; @@ -30,7 +31,10 @@ import org.deeplearning4j.nn.conf.layers.PoolingType; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.MaskedReductionUtil; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastCopyOp; @@ -38,11 +42,6 @@ import org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp; import org.nd4j.linalg.api.ops.impl.transforms.any.IsMax; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.ops.transforms.Transforms; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - -import java.util.Arrays; public class GlobalPoolingLayer extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java index 3eaa12f3f..c5592bb01 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.layers.recurrent; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.deeplearning4j.nn.api.layers.RecurrentLayer; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -27,10 +30,6 @@ import org.deeplearning4j.nn.layers.BaseLayer; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - public abstract class BaseRecurrentLayer extends BaseLayer implements RecurrentLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalLayer.java index 803f7126d..23af4645d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalLayer.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.layers.recurrent; +import static org.nd4j.linalg.indexing.NDArrayIndex.*; + +import java.util.*; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.NonNull; @@ -44,6 +47,7 @@ import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.util.TimeSeriesUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.evaluation.IEvaluation; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; @@ -52,11 +56,6 @@ import org.nd4j.linalg.dataset.api.MultiDataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.*; - -import static org.nd4j.linalg.indexing.NDArrayIndex.*; public class BidirectionalLayer implements RecurrentLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java index 127d50de8..4149509de 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.recurrent; +import java.util.Map; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CacheMode; @@ -27,12 +28,10 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.GravesBidirectionalLSTMParamInitializer; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Map; @Slf4j public class GravesBidirectionalLSTM diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java index 91d820b37..45908fc6a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java @@ -26,11 +26,11 @@ import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.params.GravesLSTMParamInitializer; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; @Deprecated @Slf4j diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java index c262997ce..150ceff59 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelper.java @@ -20,16 +20,15 @@ package org.deeplearning4j.nn.layers.recurrent; +import java.util.Map; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.LayerHelper; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Map; public interface LSTMHelper extends LayerHelper { boolean checkSupported(IActivation gateActivationFn, IActivation activationFn, boolean hasPeepholeConnections); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java index c7c7fa643..ce39df4fe 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java @@ -20,6 +20,11 @@ package org.deeplearning4j.nn.layers.recurrent; +import static org.nd4j.linalg.indexing.NDArrayIndex.*; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.exception.DL4JInvalidInputException; @@ -36,6 +41,7 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.activations.impl.ActivationSigmoid; import org.nd4j.linalg.api.memory.MemoryWorkspace; @@ -47,13 +53,6 @@ import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.linalg.exception.ND4JOpProfilerException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.nd4j.common.primitives.Pair; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; - -import static org.nd4j.linalg.indexing.NDArrayIndex.*; @Slf4j public class LSTMHelpers { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LastTimeStepLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LastTimeStepLayer.java index da5f0b782..4b3c4c58c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LastTimeStepLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LastTimeStepLayer.java @@ -20,24 +20,23 @@ package org.deeplearning4j.nn.layers.recurrent; +import static org.nd4j.linalg.indexing.NDArrayIndex.all; +import static org.nd4j.linalg.indexing.NDArrayIndex.point; + +import java.util.Arrays; import lombok.NonNull; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.TimeSeriesUtils; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Arrays; - -import static org.nd4j.linalg.indexing.NDArrayIndex.all; -import static org.nd4j.linalg.indexing.NDArrayIndex.point; public class LastTimeStepLayer extends BaseWrapperLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayer.java index c591cd18d..d572895c7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayer.java @@ -20,19 +20,17 @@ package org.deeplearning4j.nn.layers.recurrent; -import java.util.Arrays; +import static org.deeplearning4j.nn.conf.RNNFormat.NWC; +import java.util.Arrays; +import lombok.NonNull; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.common.primitives.Pair; - -import lombok.NonNull; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import static org.deeplearning4j.nn.conf.RNNFormat.NWC; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.api.ndarray.INDArray; public class MaskZeroLayer extends BaseWrapperLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java index f58e054ea..f0c289da7 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnLossLayer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.layers.recurrent; +import java.util.Arrays; +import java.util.List; import lombok.Getter; import lombok.Setter; import org.deeplearning4j.eval.Evaluation; @@ -30,19 +32,16 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.TimeSeriesUtils; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - -import java.util.Arrays; -import java.util.List; public class RnnLossLayer extends BaseLayer implements IOutputLayer { @Setter @Getter protected INDArray labels; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java index f5edea8e8..b86016022 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.recurrent; +import java.util.Arrays; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.RNNFormat; @@ -27,206 +28,257 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseOutputLayer; import org.deeplearning4j.nn.params.DefaultParamInitializer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.TimeSeriesUtils; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import java.util.Arrays; +public class RnnOutputLayer + extends BaseOutputLayer { -public class RnnOutputLayer extends BaseOutputLayer { + public RnnOutputLayer(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + } - public RnnOutputLayer(LayerConfiguration conf, DataType dataType) { - super(conf, dataType); + @Override + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + if (input.rank() != 3) { + throw new UnsupportedOperationException( + "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." + + " Got input with rank " + + input.rank() + + " and shape " + + Arrays.toString(input.shape()) + + " - " + + layerId()); } - @Override - public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(true); - if (input.rank() != 3) { - throw new UnsupportedOperationException( - "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." + - " Got input with rank " + input.rank() + " and shape " + Arrays.toString(input.shape()) + " - " + layerId()); - } + RNNFormat format = getTypedLayerConfiguration().getDataFormat(); + int td = (format == RNNFormat.NCW) ? 2 : 1; //either NCW or NWC + Preconditions.checkState( + labels.rank() == 3, + "Expected rank 3 labels array, got label array with shape %ndShape", + labels); + Preconditions.checkState( + input.size(td) == labels.size(td), + "Sequence lengths do not match for RnnOutputLayer input and labels:" + + "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - " + + "mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape.\n", + input, "\n\n", + labels); - RNNFormat format = getTypedLayerConfiguration().getDataFormat(); - int td = (format == RNNFormat.NCW) ? 2 : 1; - Preconditions.checkState(labels.rank() == 3, "Expected rank 3 labels array, got label array with shape %ndShape", labels); - Preconditions.checkState(input.size(td) == labels.size(td), "Sequence lengths do not match for RnnOutputLayer input and labels:" + - "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape", input, labels); - - - INDArray inputTemp = input; - if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){ - this.input = input.permute(0, 2, 1); - } - - this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM); - - applyDropOutIfNecessary(true, workspaceMgr); //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients - - Pair gradAndEpsilonNext = super.backpropGradient(epsilon, workspaceMgr); //Also applies dropout - this.input = inputTemp; - INDArray epsilon2d = gradAndEpsilonNext.getSecond(); - - INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD); - if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){ - epsilon3d = epsilon3d.permute(0, 2, 1); - } - weightNoiseParams.clear(); - - //epsilon3d = backpropDropOutIfPresent(epsilon3d); - return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d); + INDArray inputTemp = input; + if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { + this.input = input.permute(0, 2, 1); } - /**{@inheritDoc} - */ - @Override - public double f1Score(INDArray examples, INDArray labels) { - if (examples.rank() == 3) - examples = TimeSeriesUtils.reshape3dTo2d(examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS); - if (labels.rank() == 3) - labels = TimeSeriesUtils.reshape3dTo2d(labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS); - return super.f1Score(examples, labels); + this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM); + + applyDropOutIfNecessary( + true, + workspaceMgr); // Edge case: we skip OutputLayer forward pass during training as this isn't + // required to calculate gradients + + Pair gradAndEpsilonNext = + super.backpropGradient(epsilon, workspaceMgr); // Also applies dropout + this.input = inputTemp; + INDArray epsilon2d = gradAndEpsilonNext.getSecond(); + + INDArray epsilon3d = + TimeSeriesUtils.reshape2dTo3d( + epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD); + if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { + epsilon3d = epsilon3d.permute(0, 2, 1); + } + weightNoiseParams.clear(); + + // epsilon3d = backpropDropOutIfPresent(epsilon3d); + return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d); + } + + /** {@inheritDoc} */ + @Override + public double f1Score(INDArray examples, INDArray labels) { + if (examples.rank() == 3) + examples = + TimeSeriesUtils.reshape3dTo2d( + examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS); + if (labels.rank() == 3) + labels = + TimeSeriesUtils.reshape3dTo2d( + labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS); + return super.f1Score(examples, labels); + } + + public INDArray getInput() { + return input; + } + + @Override + public Layer.Type type() { + return Layer.Type.RECURRENT; + } + + @Override + protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + if (input.rank() == 3) { + // Case when called from RnnOutputLayer + INDArray inputTemp = input; + input = + (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) + ? input.permute(0, 2, 1) + : input; + input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM); + INDArray out = super.preOutput(training, workspaceMgr); + this.input = inputTemp; + return out; + } else { + // Case when called from BaseOutputLayer + INDArray out = super.preOutput(training, workspaceMgr); + return out; + } + } + + @Override + protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) { + INDArray labels = this.labels; + if (labels.rank() == 3) { + labels = + (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) + ? labels.permute(0, 2, 1) + : labels; + return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType); + } + return labels; + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + INDArray input = this.input; + if (input.rank() != 3) + throw new UnsupportedOperationException( + "Input must be rank 3. Got input with rank " + input.rank() + " " + layerId()); + INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr); + INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr); + + applyDropOutIfNecessary(training, workspaceMgr); + if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { + input = input.permute(0, 2, 1); + } + INDArray input2d = + TimeSeriesUtils.reshape3dTo2d( + input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM); + + INDArray act2d = + getTypedLayerConfiguration() + .getActivationFn() + .getActivation(input2d.mmul(W).addiRowVector(b), training); + if (maskArray != null) { + if (!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())) { + // Per output masking + act2d.muli(maskArray.castTo(act2d.dataType())); + } else { + // Per time step masking + act2d.muliColumnVector(maskArray.castTo(act2d.dataType())); + } } - public INDArray getInput() { - return input; + INDArray ret = + TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS); + if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { + ret = ret.permute(0, 2, 1); + } + return ret; + } + + @Override + public void setMaskArray(INDArray maskArray) { + if (maskArray != null) { + // Two possible cases: + // (a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector) + // (b) per output masking - rank 3 mask array -> reshape to rank 2 ( + if (maskArray.rank() == 2) { + this.maskArray = + TimeSeriesUtils.reshapeTimeSeriesMaskToVector( + maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); + } else if (maskArray.rank() == 3) { + this.maskArray = + TimeSeriesUtils.reshape3dTo2d( + maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); + } else { + throw new UnsupportedOperationException( + "Invalid mask array: must be rank 2 or 3 (got: rank " + + maskArray.rank() + + ", shape = " + + Arrays.toString(maskArray.shape()) + + ") " + + layerId()); + } + } else { + this.maskArray = null; + } + } + + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + + // If the *input* mask array is present and active, we should use it to mask the output + if (maskArray != null && currentMaskState == MaskState.Active) { + this.inputMaskArray = + TimeSeriesUtils.reshapeTimeSeriesMaskToVector( + maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); + this.inputMaskArrayState = currentMaskState; + } else { + this.inputMaskArray = null; + this.inputMaskArrayState = null; } - @Override - public Layer.Type type() { - return Layer.Type.RECURRENT; + return null; // Last layer in network + } + + /** + * Compute the score for each example individually, after labels and input have been set. + * + * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include + * regularization) + * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith + * example + */ + @Override + public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) { + // For RNN: need to sum up the score over each time step before returning. + + if (input == null || labels == null) + throw new IllegalStateException( + "Cannot calculate score without input and labels " + layerId()); + INDArray preOut = preOutput2d(false, workspaceMgr); + + ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction(); + INDArray scoreArray = + lossFunction.computeScoreArray( + getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), + preOut, + getTypedLayerConfiguration().getActivationFn(), + maskArray); + // scoreArray: shape [minibatch*timeSeriesLength, 1] + // Reshape it to [minibatch, timeSeriesLength] then sum over time step + + INDArray scoreArrayTs = + TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int) input.size(0)); + INDArray summedScores = scoreArrayTs.sum(true, 1); + + if (fullNetRegTerm != 0.0) { + summedScores.addi(fullNetRegTerm); } - @Override - protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(false); - if (input.rank() == 3) { - //Case when called from RnnOutputLayer - INDArray inputTemp = input; - input = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? input.permute(0, 2, 1):input; - input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM); - INDArray out = super.preOutput(training, workspaceMgr); - this.input = inputTemp; - return out; - } else { - //Case when called from BaseOutputLayer - INDArray out = super.preOutput(training, workspaceMgr); - return out; - } - } - - @Override - protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) { - INDArray labels = this.labels; - if (labels.rank() == 3){ - labels = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? labels.permute(0, 2, 1) : labels; - return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType); - } - return labels; - } - - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { - INDArray input = this.input; - if (input.rank() != 3) - throw new UnsupportedOperationException( - "Input must be rank 3. Got input with rank " + input.rank() + " " + layerId()); - INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr); - INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr); - - applyDropOutIfNecessary(training, workspaceMgr); - if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){ - input = input.permute(0, 2, 1); - } - INDArray input2d = TimeSeriesUtils.reshape3dTo2d(input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM); - - INDArray act2d = getTypedLayerConfiguration().getActivationFn().getActivation(input2d.mmul(W).addiRowVector(b), training); - if (maskArray != null) { - if(!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())){ - //Per output masking - act2d.muli(maskArray.castTo(act2d.dataType())); - } else { - //Per time step masking - act2d.muliColumnVector(maskArray.castTo(act2d.dataType())); - } - } - - INDArray ret = TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS); - if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){ - ret = ret.permute(0, 2, 1); - } - return ret; - } - - @Override - public void setMaskArray(INDArray maskArray) { - if (maskArray != null) { - //Two possible cases: - //(a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector) - //(b) per output masking - rank 3 mask array -> reshape to rank 2 ( - if (maskArray.rank() == 2) { - this.maskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); - } else if (maskArray.rank() == 3) { - this.maskArray = TimeSeriesUtils.reshape3dTo2d(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); - } else { - throw new UnsupportedOperationException( - "Invalid mask array: must be rank 2 or 3 (got: rank " + maskArray.rank() + ", shape = " - + Arrays.toString(maskArray.shape()) + ") " + layerId()); - } - } else { - this.maskArray = null; - } - } - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, - int minibatchSize) { - - //If the *input* mask array is present and active, we should use it to mask the output - if (maskArray != null && currentMaskState == MaskState.Active) { - this.inputMaskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); - this.inputMaskArrayState = currentMaskState; - } else { - this.inputMaskArray = null; - this.inputMaskArrayState = null; - } - - return null; //Last layer in network - } - - /**Compute the score for each example individually, after labels and input have been set. - * - * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include regularization) - * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example - */ - @Override - public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) { - //For RNN: need to sum up the score over each time step before returning. - - if (input == null || labels == null) - throw new IllegalStateException("Cannot calculate score without input and labels " + layerId()); - INDArray preOut = preOutput2d(false, workspaceMgr); - - ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction(); - INDArray scoreArray = - lossFunction.computeScoreArray(getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), preOut, - getTypedLayerConfiguration().getActivationFn(), maskArray); - //scoreArray: shape [minibatch*timeSeriesLength, 1] - //Reshape it to [minibatch, timeSeriesLength] then sum over time step - - INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0)); - INDArray summedScores = scoreArrayTs.sum(true, 1); - - if (fullNetRegTerm != 0.0) { - summedScores.addi(fullNetRegTerm); - } - - return summedScores; - } + return summedScores; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/SimpleRnn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/SimpleRnn.java index a993583d0..8014a033e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/SimpleRnn.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/SimpleRnn.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.layers.recurrent; +import static org.nd4j.linalg.indexing.NDArrayIndex.*; + import lombok.val; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -28,6 +30,8 @@ import org.deeplearning4j.nn.params.SimpleRnnParamInitializer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.primitives.Quad; import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; @@ -38,10 +42,6 @@ import org.nd4j.linalg.api.ops.impl.transforms.custom.LayerNorm; import org.nd4j.linalg.api.ops.impl.transforms.custom.LayerNormBp; import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.nd4j.common.primitives.Quad; - -import static org.nd4j.linalg.indexing.NDArrayIndex.*; public class SimpleRnn extends BaseRecurrentLayer { public static final String STATE_KEY_PREV_ACTIVATION = "prevAct"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/TimeDistributedLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/TimeDistributedLayer.java index 9a97f6a4a..331709ab2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/TimeDistributedLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/TimeDistributedLayer.java @@ -28,9 +28,9 @@ import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer; import org.deeplearning4j.nn.workspace.ArrayType; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.util.TimeSeriesUtils; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; import org.nd4j.common.util.ArrayUtil; +import org.nd4j.linalg.api.ndarray.INDArray; public class TimeDistributedLayer extends BaseWrapperLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffGraphVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffGraphVertex.java index 1cf0b1be1..234230957 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffGraphVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffGraphVertex.java @@ -20,10 +20,11 @@ package org.deeplearning4j.nn.layers.samediff; +import java.util.*; import lombok.val; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.SDVertexParams; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffVertex; import org.deeplearning4j.nn.gradient.DefaultGradient; @@ -40,15 +41,13 @@ import org.nd4j.autodiff.samediff.internal.InferenceSession; import org.nd4j.autodiff.samediff.internal.SessionMemMgr; import org.nd4j.autodiff.util.SameDiffUtils; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.*; public class SameDiffGraphVertex extends BaseGraphVertex { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java index 7ee7cd33f..36df6f472 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.samediff; +import java.util.*; import lombok.val; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; @@ -28,6 +29,8 @@ import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.autodiff.samediff.array.SingleThreadArrayHolder; @@ -35,18 +38,14 @@ import org.nd4j.autodiff.samediff.internal.InferenceSession; import org.nd4j.autodiff.samediff.internal.SessionMemMgr; import org.nd4j.autodiff.util.SameDiffUtils; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.util.ArrayUtil; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.nd4j.common.util.ArrayUtil; - -import java.util.*; public class SameDiffLayer extends AbstractLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java index 60d4d4c7d..ced38dd28 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.layers.samediff; +import java.util.*; import lombok.Getter; import lombok.Setter; import lombok.val; @@ -37,6 +38,7 @@ import org.nd4j.autodiff.samediff.array.SingleThreadArrayHolder; import org.nd4j.autodiff.samediff.internal.InferenceSession; import org.nd4j.autodiff.samediff.internal.SessionMemMgr; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration; @@ -44,9 +46,6 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.*; public class SameDiffOutputLayer extends AbstractLayer implements IOutputLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java index 7f4a65453..a19023b49 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/training/CenterLossOutputLayer.java @@ -26,14 +26,13 @@ import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.BaseOutputLayer; import org.deeplearning4j.nn.params.CenterLossParamInitializer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.lossfunctions.ILossFunction; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - public class CenterLossOutputLayer extends BaseOutputLayer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/IdentityLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/IdentityLayer.java index 19695f550..1cfee1246 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/IdentityLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/IdentityLayer.java @@ -20,7 +20,6 @@ package org.deeplearning4j.nn.layers.util; -import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffLambdaLayer; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/MaskLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/MaskLayer.java index da9661565..f182ea942 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/MaskLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/util/MaskLayer.java @@ -20,20 +20,18 @@ package org.deeplearning4j.nn.layers.util; +import java.util.Arrays; import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.layers.AbstractLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Broadcast; -import org.nd4j.common.primitives.Pair; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; -import org.deeplearning4j.nn.workspace.ArrayType; - -import java.util.Arrays; public class MaskLayer extends AbstractLayer { private final Gradient emptyGradient = new DefaultGradient(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java index 497b08aaf..817d62778 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java @@ -25,9 +25,9 @@ import java.util.Map; import lombok.Data; import lombok.NonNull; import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.MaskState; -import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; @@ -47,7 +47,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; @Data -public abstract class BaseWrapperLayer extends AbstractLayer { +public abstract class BaseWrapperLayer extends AbstractLayer { protected Layer underlying; @@ -57,8 +57,8 @@ public abstract class BaseWrapperLayer extends AbstractLayer { } @Override - public BaseLayerConfiguration getTypedLayerConfiguration() { - return (BaseLayerConfiguration) underlying.getLayerConfiguration(); + public LayerConf_T getTypedLayerConfiguration() { + return (LayerConf_T) underlying.getLayerConfiguration(); } /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 650bad51f..7cc08a62d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -25,7 +25,6 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import java.io.*; import java.util.*; import java.util.stream.Collectors; - import lombok.*; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; @@ -118,7 +117,7 @@ import org.nd4j.linalg.workspace.WorkspaceUtils; */ @Slf4j // @JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id") -@JsonIgnoreProperties({"helper", "net", "initCalled", "iupdater", "activationFn"}) +@JsonIgnoreProperties({"helper", "net", "initCalled", "iupdater"}) public class MultiLayerNetwork extends ArtificialNeuralNetwork implements Serializable, Classifier, Layer, ITrainableLayer { @@ -713,7 +712,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork if (layer_conf instanceof BaseLayerConfiguration) ((BaseLayerConfiguration) layer_conf).setDataType(netDtype); - nParamsPerLayer[i] = layer_conf.initializer().numParams(layer_conf); + nParamsPerLayer[i] = layer_conf.numParams(); paramLength += nParamsPerLayer[i]; } log.debug( @@ -827,7 +826,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork } /** - * This method allows you to specificy GradientsAccumulator instance to be used with this model + * This method allows you to specify GradientsAccumulator instance to be used with this model *
*
* PLEASE NOTE: Do not use this method unless you understand how to use GradientsAccumulator & @@ -1570,13 +1569,13 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork if (fwdPassType == FwdPassType.STANDARD) { // Standard feed-forward case if (i > 0 - && ConvolutionUtils.layerHasConvolutionLayout(layers[i - 1].getLayerConfiguration()) - && ConvolutionUtils.layerHasConvolutionLayout(layers[i].getLayerConfiguration())) { + && Convolution2DUtils.layerHasConvolutionLayout(layers[i - 1].getLayerConfiguration()) + && Convolution2DUtils.layerHasConvolutionLayout(layers[i].getLayerConfiguration())) { CNN2DFormat preLayerFormat = - ConvolutionUtils.getFormatForLayer(layers[i - 1].getLayerConfiguration()); + Convolution2DUtils.getFormatForLayer(layers[i - 1].getLayerConfiguration()); CNN2DFormat currLayerFormat = - ConvolutionUtils.getFormatForLayer(layers[i].getLayerConfiguration()); + Convolution2DUtils.getFormatForLayer(layers[i].getLayerConfiguration()); if (preLayerFormat != currLayerFormat) { // NHWC case if (preLayerFormat == CNN2DFormat.NCHW) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java index c68403835..3538960d1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/BatchNormalizationParamInitializer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.params; +import java.util.*; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.BatchNormalization; @@ -27,8 +28,6 @@ import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.*; - public class BatchNormalizationParamInitializer extends AbstractParamInitializer { private static final BatchNormalizationParamInitializer INSTANCE = new BatchNormalizationParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/CenterLossParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/CenterLossParamInitializer.java index 8a02c397e..7a05cd581 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/CenterLossParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/CenterLossParamInitializer.java @@ -21,16 +21,13 @@ package org.deeplearning4j.nn.params; -import lombok.val; -import org.deeplearning4j.nn.api.Layer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.indexing.NDArrayIndex; - import java.util.Collections; import java.util.LinkedHashMap; import java.util.Map; +import lombok.val; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.indexing.NDArrayIndex; public class CenterLossParamInitializer extends DefaultParamInitializer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java index b11f9f3d2..994638a65 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Convolution3DParamInitializer.java @@ -21,6 +21,9 @@ package org.deeplearning4j.nn.params; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; import lombok.val; import org.deeplearning4j.nn.conf.layers.Convolution3D; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -28,10 +31,6 @@ import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.Map; - public class Convolution3DParamInitializer extends ConvolutionParamInitializer { private static final Convolution3DParamInitializer INSTANCE = new Convolution3DParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java new file mode 100644 index 000000000..e09c2610d --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java @@ -0,0 +1,183 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.nn.params; + + +import java.util.*; +import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; +import org.deeplearning4j.nn.conf.layers.Convolution1DNew; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.weights.WeightInitUtil; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.indexing.NDArrayIndex; + +public class ConvolutionNewParamInitializer extends AbstractParamInitializer { + + private static final ConvolutionNewParamInitializer INSTANCE = new ConvolutionNewParamInitializer(); + + public static ConvolutionNewParamInitializer getInstance() { + return INSTANCE; + } + + + public final static String WEIGHT_KEY = DefaultParamInitializer.WEIGHT_KEY; + public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY; + + @Override + public long numParams(LayerConfiguration l) { + return l.numParams(); + } + + @Override + public List paramKeys(LayerConfiguration layer) { + ConvolutionLayer layerConf = + (ConvolutionLayer) layer; + if(layerConf.hasBias()){ + return Arrays.asList(WEIGHT_KEY, BIAS_KEY); + } else { + return weightKeys(layer); + } + } + + @Override + public List weightKeys(LayerConfiguration layer) { + return Collections.singletonList(WEIGHT_KEY); + } + + @Override + public List biasKeys(LayerConfiguration layer) { + ConvolutionLayer layerConf = + (ConvolutionLayer) layer; + if(layerConf.hasBias()){ + return Collections.singletonList(BIAS_KEY); + } else { + return Collections.emptyList(); + } + } + + @Override + public boolean isWeightParam(LayerConfiguration layer, String key) { + return WEIGHT_KEY.equals(key); + } + + @Override + public boolean isBiasParam(LayerConfiguration layer, String key) { + return BIAS_KEY.equals(key); + } + + @Override + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + Convolution1DNew layer = (Convolution1DNew) conf; + if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2"); + + Map params = Collections.synchronizedMap(new LinkedHashMap()); + + Convolution1DNew layerConf = + (Convolution1DNew) conf; + + val nOut = layerConf.getNOut(); + + if(layer.hasBias()){ + //Standard case + INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut)); + INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf))); + params.put(BIAS_KEY, createBias(conf, biasView, initializeParams)); + params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); + conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY); + conf.getNetConfiguration().addNetWideVariable(BIAS_KEY); + } else { + INDArray weightView = paramsView; + params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); + conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY); + } + + return params; + } + + @Override + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { + + Convolution1DNew layerConf = + (Convolution1DNew) conf; + + int[] kernel = layerConf.getKernelSize(); + val nIn = layerConf.getNIn(); + val nOut = layerConf.getNOut(); + + Map out = new LinkedHashMap<>(); + if(layerConf.hasBias()){ + //Standard case + INDArray biasGradientView = gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut)); + INDArray weightGradientView = + gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf))) + .reshape('c', nOut, nIn, kernel[0], kernel[1]); + out.put(BIAS_KEY, biasGradientView); + out.put(WEIGHT_KEY, weightGradientView); + } else { + INDArray weightGradientView = gradientView.reshape('c', nOut, nIn, kernel[0], kernel[1]); + out.put(WEIGHT_KEY, weightGradientView); + } + return out; + } + + //1 bias per feature map + protected INDArray createBias(LayerConfiguration conf, INDArray biasView, boolean initializeParams) { + //the bias is a 1D tensor -- one bias per output feature map + Convolution1DNew layerConf = + (Convolution1DNew) conf; + if (initializeParams) + biasView.assign(layerConf.getBiasInit()); + return biasView; + } + + + protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) { + /* + Create a 4d weight matrix of: + (number of kernels, num input channels, kernel height, kernel width) + Note c order is used specifically for the CNN weights, as opposed to f order elsewhere + Inputs to the convolution layer are: + (batch size, num input feature maps, image height, image width) + */ + Convolution1DNew layerConf = + (Convolution1DNew) conf; + if (initializeParams) { + int[] kernel = layerConf.getKernelSize(); + int[] stride = layerConf.getStride(); + + val inputDepth = layerConf.getNIn(); + val outputDepth = layerConf.getNOut(); + + double fanIn = inputDepth * kernel[0] * kernel[1]; + double fanOut = outputDepth * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]); + + val weightsShape = new long[] {outputDepth, inputDepth, kernel[0], kernel[1]}; + + return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView); + } else { + int[] kernel = layerConf.getKernelSize(); + return WeightInitUtil.reshapeWeights( + new long[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]}, weightView, 'c'); + } + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java index 9b53e3713..a0d9bea82 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionParamInitializer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.params; +import java.util.*; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; @@ -29,8 +30,6 @@ import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.*; - public class ConvolutionParamInitializer extends AbstractParamInitializer { private static final ConvolutionParamInitializer INSTANCE = new ConvolutionParamInitializer(); @@ -51,7 +50,11 @@ public class ConvolutionParamInitializer extends AbstractParamInitializer { int[] kernel = layerConf.getKernelSize(); val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); - return nIn * nOut * kernel[0] * kernel[1] + (layerConf.hasBias() ? nOut : 0); + var kern = 1; + for(int i : kernel) { + kern = kern * i; + } + return nIn * nOut * kern + (layerConf.hasBias() ? nOut : 0); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java index 6e2d2b128..1f04cc534 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/Deconvolution3DParamInitializer.java @@ -21,6 +21,9 @@ package org.deeplearning4j.nn.params; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; import lombok.val; import org.deeplearning4j.nn.conf.layers.Deconvolution3D; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -28,10 +31,6 @@ import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.Map; - public class Deconvolution3DParamInitializer extends ConvolutionParamInitializer { private static final Deconvolution3DParamInitializer INSTANCE = new Deconvolution3DParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java index 463c24ae3..e0d16c745 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DeconvolutionParamInitializer.java @@ -20,15 +20,14 @@ package org.deeplearning4j.nn.params; +import java.util.LinkedHashMap; +import java.util.Map; import lombok.val; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.LinkedHashMap; -import java.util.Map; - public class DeconvolutionParamInitializer extends ConvolutionParamInitializer { private static final DeconvolutionParamInitializer INSTANCE = new DeconvolutionParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java index 77c2b3a5f..a804949ce 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.params; +import java.util.*; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import lombok.val; @@ -27,11 +28,9 @@ import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInitUtil; -import org.deeplearning4j.nn.weights.WeightInitXavier; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.*; @Slf4j public class DefaultParamInitializer extends AbstractParamInitializer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java index d1bd00449..67e8849e2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DepthwiseConvolutionParamInitializer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.params; +import java.util.*; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.DepthwiseConvolution2D; @@ -29,8 +30,6 @@ import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.*; - public class DepthwiseConvolutionParamInitializer extends AbstractParamInitializer { private static final DepthwiseConvolutionParamInitializer INSTANCE = new DepthwiseConvolutionParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ElementWiseParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ElementWiseParamInitializer.java index 665a47d7f..510d21d73 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ElementWiseParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ElementWiseParamInitializer.java @@ -20,18 +20,16 @@ package org.deeplearning4j.nn.params; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; import lombok.val; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.FeedForwardLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.weights.IWeightInit; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.Map; - public class ElementWiseParamInitializer extends DefaultParamInitializer{ private static final ElementWiseParamInitializer INSTANCE = new ElementWiseParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java index 28d458e78..7061cc134 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/EmptyParamInitializer.java @@ -20,15 +20,12 @@ package org.deeplearning4j.nn.params; -import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.nd4j.linalg.api.ndarray.INDArray; - import java.util.Collections; import java.util.List; import java.util.Map; +import org.deeplearning4j.nn.api.AbstractParamInitializer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.nd4j.linalg.api.ndarray.INDArray; /** * @author Adam Gibson diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java index 580d07402..c74def9aa 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerParamInitializer.java @@ -25,7 +25,6 @@ import java.util.List; import java.util.Map; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.misc.FrozenLayer; import org.nd4j.linalg.api.ndarray.INDArray; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerWithBackpropParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerWithBackpropParamInitializer.java index 1328e28d9..37de353eb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerWithBackpropParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/FrozenLayerWithBackpropParamInitializer.java @@ -20,16 +20,14 @@ package org.deeplearning4j.nn.params; -import org.deeplearning4j.nn.api.AbstractParamInitializer; -import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; -import org.deeplearning4j.nn.conf.layers.LayerConfiguration; -import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop; -import org.nd4j.linalg.api.ndarray.INDArray; - import java.util.Collections; import java.util.List; import java.util.Map; +import org.deeplearning4j.nn.api.AbstractParamInitializer; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop; +import org.nd4j.linalg.api.ndarray.INDArray; public class FrozenLayerWithBackpropParamInitializer extends AbstractParamInitializer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java index e74d69a1a..f90e991b2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesBidirectionalLSTMParamInitializer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.params; +import java.util.*; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -30,8 +31,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.*; - public class GravesBidirectionalLSTMParamInitializer extends AbstractParamInitializer { private static final GravesBidirectionalLSTMParamInitializer INSTANCE = diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java index c770ab7d2..bfb9a882a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/GravesLSTMParamInitializer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.params; +import java.util.*; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -30,8 +31,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.*; - public class GravesLSTMParamInitializer extends AbstractParamInitializer { private static final GravesLSTMParamInitializer INSTANCE = new GravesLSTMParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java index 11d5638fe..e457e8a2d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PReLUParamInitializer.java @@ -20,6 +20,10 @@ package org.deeplearning4j.nn.params; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; @@ -30,11 +34,6 @@ import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - public class PReLUParamInitializer extends AbstractParamInitializer { public final static String WEIGHT_KEY = "W"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PretrainParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PretrainParamInitializer.java index 4eb87427a..89c3bb3b3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PretrainParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/PretrainParamInitializer.java @@ -20,15 +20,13 @@ package org.deeplearning4j.nn.params; +import java.util.Map; import lombok.val; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.Map; - /** * Pretrain weight initializer. * Has the visible bias as well as hidden and weight matrix. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java index 5ea7b5000..8cfa88427 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SameDiffParamInitializer.java @@ -20,21 +20,20 @@ package org.deeplearning4j.nn.params; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer; import org.deeplearning4j.nn.conf.layers.samediff.SameDiffVertex; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.util.ArrayUtil; - -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -import static org.nd4j.linalg.indexing.NDArrayIndex.interval; +import org.nd4j.linalg.api.ndarray.INDArray; @Slf4j public class SameDiffParamInitializer extends AbstractParamInitializer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java index 58547886f..1e0e1036d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SeparableConvolutionParamInitializer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.params; +import java.util.*; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -29,8 +30,6 @@ import org.deeplearning4j.nn.weights.WeightInitUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.*; - public class SeparableConvolutionParamInitializer extends AbstractParamInitializer { private static final SeparableConvolutionParamInitializer INSTANCE = new SeparableConvolutionParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java index 5b1deb4d8..630e1c803 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/SimpleRnnParamInitializer.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.params; +import static org.nd4j.linalg.indexing.NDArrayIndex.interval; + +import java.util.*; import lombok.val; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; @@ -27,10 +30,6 @@ import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; import org.deeplearning4j.nn.weights.IWeightInit; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.*; - -import static org.nd4j.linalg.indexing.NDArrayIndex.interval; - public class SimpleRnnParamInitializer extends AbstractParamInitializer { private static final SimpleRnnParamInitializer INSTANCE = new SimpleRnnParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java index 362c35170..5bdb95eee 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/VariationalAutoencoderParamInitializer.java @@ -20,6 +20,10 @@ package org.deeplearning4j.nn.params; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; import lombok.val; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder; @@ -28,11 +32,6 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.linalg.indexing.NDArrayIndex; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - public class VariationalAutoencoderParamInitializer extends DefaultParamInitializer { private static final VariationalAutoencoderParamInitializer INSTANCE = new VariationalAutoencoderParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java index 5744e70ad..cedf6d8be 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/WrapperLayerParamInitializer.java @@ -20,14 +20,13 @@ package org.deeplearning4j.nn.params; +import java.util.List; +import java.util.Map; import org.deeplearning4j.nn.api.AbstractParamInitializer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayerConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.List; -import java.util.Map; - public class WrapperLayerParamInitializer extends AbstractParamInitializer { private static final WrapperLayerParamInitializer INSTANCE = new WrapperLayerParamInitializer(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java index 6f715cd8f..fe3a244ce 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/FineTuneConfiguration.java @@ -47,6 +47,7 @@ import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerValidation; import org.deeplearning4j.nn.conf.layers.SubsamplingLayer; +import org.deeplearning4j.nn.conf.serde.CavisMapper; import org.deeplearning4j.nn.conf.stepfunctions.StepFunction; import org.deeplearning4j.nn.conf.weightnoise.IWeightNoise; import org.deeplearning4j.nn.weights.IWeightInit; @@ -118,7 +119,7 @@ public class FineTuneConfiguration { public static FineTuneConfiguration fromJson(String json) { try { - return NeuralNetConfiguration.mapper().readValue(json, FineTuneConfiguration.class); + return CavisMapper.getMapper(CavisMapper.Type.JSON).readValue(json, FineTuneConfiguration.class); } catch (IOException e) { throw new RuntimeException(e); } @@ -126,7 +127,7 @@ public class FineTuneConfiguration { public static FineTuneConfiguration fromYaml(String yaml) { try { - return NeuralNetConfiguration.mapperYaml().readValue(yaml, FineTuneConfiguration.class); + return CavisMapper.getMapper(CavisMapper.Type.YAML).readValue(yaml, FineTuneConfiguration.class); } catch (IOException e) { throw new RuntimeException(e); } @@ -322,7 +323,7 @@ public class FineTuneConfiguration { public String toJson() { try { - return NeuralNetConfiguration.mapper().writeValueAsString(this); + return CavisMapper.getMapper(CavisMapper.Type.JSON).writeValueAsString(this); } catch (JsonProcessingException e) { throw new RuntimeException(e); } @@ -330,7 +331,7 @@ public class FineTuneConfiguration { public String toYaml() { try { - return NeuralNetConfiguration.mapperYaml().writeValueAsString(this); + return CavisMapper.getMapper(CavisMapper.Type.YAML).writeValueAsString(this); } catch (JsonProcessingException e) { throw new RuntimeException(e); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java index 5a35a492c..47253f7b6 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearning.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.transferlearning; +import java.util.*; import lombok.extern.slf4j.Slf4j; import lombok.val; import org.deeplearning4j.nn.conf.*; @@ -39,13 +40,11 @@ import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInitDistribution; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; +import org.nd4j.common.primitives.Triple; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; -import org.nd4j.common.primitives.Triple; - -import java.util.*; @Slf4j public class TransferLearning { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java index bd6cc18a3..c75f1c7c2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelper.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.transferlearning; +import java.util.*; import org.apache.commons.lang3.ArrayUtils; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.graph.ComputationGraph; @@ -33,8 +34,6 @@ import org.nd4j.linalg.dataset.MultiDataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -import java.util.*; - public class TransferLearningHelper { private boolean isGraph = true; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java index e8c7c1e6c..6f06a0ab9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/BaseMultiLayerUpdater.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.updater; +import java.util.*; import lombok.Getter; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.ITrainableLayer; @@ -27,22 +28,19 @@ import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.CustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp; - import org.nd4j.linalg.api.ops.impl.reduce.floating.Norm2; import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.NDArrayIndex; -import org.deeplearning4j.nn.workspace.ArrayType; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.nd4j.linalg.learning.config.IUpdater; -import java.util.*; - @Getter public abstract class BaseMultiLayerUpdater implements Updater { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java index 1dfe4a7b9..980146b21 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/LayerUpdater.java @@ -20,14 +20,13 @@ package org.deeplearning4j.nn.updater; +import java.util.HashMap; import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ITrainableLayer; +import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.HashMap; - @Slf4j public class LayerUpdater extends BaseMultiLayerUpdater { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java index 1027f5003..4caa75ecc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/MultiLayerUpdater.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.updater; +import java.util.HashMap; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.ITrainableLayer; @@ -28,8 +29,6 @@ import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.HashMap; - @Getter @Slf4j public class MultiLayerUpdater extends BaseMultiLayerUpdater { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java index 7b496468f..61f93ae5f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterBlock.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.updater; +import java.util.ArrayList; +import java.util.List; import lombok.AllArgsConstructor; import lombok.Data; import org.deeplearning4j.nn.api.ITrainableLayer; @@ -28,9 +30,6 @@ import org.nd4j.linalg.indexing.NDArrayIndex; import org.nd4j.linalg.learning.GradientUpdater; import org.nd4j.linalg.learning.regularization.Regularization; -import java.util.ArrayList; -import java.util.List; - @Data public class UpdaterBlock { private int paramOffsetStart; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java index 11573daa0..8731b58c4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/UpdaterCreator.java @@ -20,8 +20,8 @@ package org.deeplearning4j.nn.updater; -import org.deeplearning4j.nn.api.Layer; import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java index 45ffa6159..fb742f656 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/updater/graph/ComputationGraphUpdater.java @@ -20,15 +20,14 @@ package org.deeplearning4j.nn.updater.graph; +import java.util.Arrays; +import java.util.HashMap; import org.deeplearning4j.nn.api.ITrainableLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.graph.vertex.GraphVertex; import org.deeplearning4j.nn.updater.BaseMultiLayerUpdater; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Arrays; -import java.util.HashMap; - public class ComputationGraphUpdater extends BaseMultiLayerUpdater { protected ITrainableLayer[] orderedLayers; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/IWeightInit.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/IWeightInit.java index 676589d87..491cb6488 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/IWeightInit.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/IWeightInit.java @@ -20,13 +20,12 @@ package org.deeplearning4j.nn.weights; -import org.nd4j.linalg.api.ndarray.INDArray; import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonTypeInfo; - import java.io.Serializable; +import org.nd4j.linalg.api.ndarray.INDArray; -@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") +@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "@class") @JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.ANY, getterVisibility = JsonAutoDetect.Visibility.NONE, setterVisibility = JsonAutoDetect.Visibility.NONE) public interface IWeightInit extends Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitDistribution.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitDistribution.java index e9da0c596..3829f5d5e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitDistribution.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitDistribution.java @@ -20,19 +20,21 @@ package org.deeplearning4j.nn.weights; -import lombok.EqualsAndHashCode; +import lombok.*; import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.distribution.Distributions; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.rng.distribution.impl.OrthogonalDistribution; -import com.fasterxml.jackson.annotation.JsonProperty; @EqualsAndHashCode +@NoArgsConstructor public class WeightInitDistribution implements IWeightInit { - private final Distribution distribution; + @Getter @Setter + private Distribution distribution; - public WeightInitDistribution(@JsonProperty("distribution") Distribution distribution) { + + public WeightInitDistribution(@NonNull Distribution distribution) { if(distribution == null) { // Would fail later below otherwise throw new IllegalArgumentException("Must set distribution!"); @@ -40,6 +42,7 @@ public class WeightInitDistribution implements IWeightInit { this.distribution = distribution; } + @Override public INDArray init(double fanIn, double fanOut, long[] shape, char order, INDArray paramView) { //org.nd4j.linalg.api.rng.distribution.Distribution not serializable diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java index 35e15b380..e236a791e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java @@ -20,16 +20,14 @@ package org.deeplearning4j.nn.weights; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Arrays; import lombok.Data; -import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.indexing.INDArrayIndex; import org.nd4j.linalg.indexing.NDArrayIndex; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Arrays; @Data @NoArgsConstructor diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUtil.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUtil.java index fb8890237..6929538c8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUtil.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUtil.java @@ -21,16 +21,14 @@ package org.deeplearning4j.nn.weights; +import java.util.Arrays; import org.apache.commons.math3.util.FastMath; +import org.nd4j.common.util.ArrayUtil; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.random.impl.TruncatedNormalDistribution; import org.nd4j.linalg.api.rng.distribution.Distribution; import org.nd4j.linalg.api.rng.distribution.impl.OrthogonalDistribution; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.util.ArrayUtil; - -import java.util.Arrays; - /** * Weight initialization utility diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java index 0c7458689..c788238d3 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java @@ -21,9 +21,7 @@ package org.deeplearning4j.nn.weights; import lombok.Data; -import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; -import org.apache.commons.math3.util.FastMath; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.random.impl.TruncatedNormalDistribution; import org.nd4j.linalg.factory.Nd4j; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java index b559db308..07649c138 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java @@ -21,9 +21,7 @@ package org.deeplearning4j.nn.weights; import lombok.Data; -import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; -import org.apache.commons.math3.util.FastMath; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.random.impl.TruncatedNormalDistribution; import org.nd4j.linalg.factory.Nd4j; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java index 7bbd172a4..97025d413 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.weights; import lombok.Data; -import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java index 95d0dee3a..6a56b902b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.weights; import lombok.Data; -import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java index f062b5e5e..5befe8ef2 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java @@ -21,7 +21,6 @@ package org.deeplearning4j.nn.weights; import lombok.Data; -import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/EmbeddingInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/EmbeddingInitializer.java index afae83c80..35f6a8d48 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/EmbeddingInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/EmbeddingInitializer.java @@ -20,10 +20,9 @@ package org.deeplearning4j.nn.weights.embeddings; -import org.nd4j.linalg.api.ndarray.INDArray; import com.fasterxml.jackson.annotation.JsonTypeInfo; - import java.io.Serializable; +import org.nd4j.linalg.api.ndarray.INDArray; @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@class") public interface EmbeddingInitializer extends Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/WeightInitEmbedding.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/WeightInitEmbedding.java index 70227b8ca..0fe4176b9 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/WeightInitEmbedding.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/weights/embeddings/WeightInitEmbedding.java @@ -20,14 +20,14 @@ package org.deeplearning4j.nn.weights.embeddings; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.EqualsAndHashCode; import lombok.NonNull; import org.deeplearning4j.nn.weights.IWeightInit; import org.deeplearning4j.nn.weights.WeightInit; import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.ndarray.INDArray; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; @JsonIgnoreProperties("nonSerializableInit") @EqualsAndHashCode diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/workspace/LayerWorkspaceMgr.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/workspace/LayerWorkspaceMgr.java index a7b972a2f..f9e5a14d4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/workspace/LayerWorkspaceMgr.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/workspace/LayerWorkspaceMgr.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.workspace; import com.google.common.base.Preconditions; +import java.util.*; import lombok.Getter; import lombok.NonNull; import lombok.Setter; @@ -28,9 +29,6 @@ import org.bytedeco.javacpp.Pointer; import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.workspace.BaseWorkspaceMgr; -import org.nd4j.linalg.workspace.WorkspaceMgr; - -import java.util.*; public class LayerWorkspaceMgr extends BaseWorkspaceMgr { public static String CUDNN_WORKSPACE_KEY = "CUDNN_WORKSPACE"; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/Solver.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/Solver.java index 6412914c4..3d7c4588a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/Solver.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/Solver.java @@ -20,6 +20,10 @@ package org.deeplearning4j.optimize; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; @@ -34,11 +38,6 @@ import org.deeplearning4j.optimize.stepfunctions.StepFunctions; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.factory.Nd4j; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; - public class Solver { private NeuralNetConfiguration conf; private Collection listeners; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/BaseTrainingListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/BaseTrainingListener.java index d72b836f5..7b1438f4b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/BaseTrainingListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/BaseTrainingListener.java @@ -20,11 +20,10 @@ package org.deeplearning4j.optimize.api; -import net.brutex.ai.dnn.api.IModel; -import org.nd4j.linalg.api.ndarray.INDArray; - import java.util.List; import java.util.Map; +import net.brutex.ai.dnn.api.IModel; +import org.nd4j.linalg.api.ndarray.INDArray; public abstract class BaseTrainingListener implements TrainingListener { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java index 0d6999fce..84747356f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/ConvexOptimizer.java @@ -20,6 +20,8 @@ package org.deeplearning4j.optimize.api; +import java.io.Serializable; +import java.util.Collection; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.Updater; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -27,11 +29,8 @@ import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.solvers.accumulation.GradientsAccumulator; -import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.common.primitives.Pair; - -import java.io.Serializable; -import java.util.Collection; +import org.nd4j.linalg.api.ndarray.INDArray; public interface ConvexOptimizer extends Serializable { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/IterationListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/IterationListener.java index 309f478fe..2d1777033 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/IterationListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/IterationListener.java @@ -21,9 +21,8 @@ package org.deeplearning4j.optimize.api; -import net.brutex.ai.dnn.api.IModel; - import java.io.Serializable; +import net.brutex.ai.dnn.api.IModel; @Deprecated public abstract class IterationListener extends BaseTrainingListener implements Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/LineOptimizer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/LineOptimizer.java index e76c45954..7c91b23fd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/LineOptimizer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/LineOptimizer.java @@ -20,11 +20,10 @@ package org.deeplearning4j.optimize.api; -import org.deeplearning4j.exception.InvalidStepException; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; - import java.io.Serializable; +import org.deeplearning4j.exception.InvalidStepException; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.linalg.api.ndarray.INDArray; public interface LineOptimizer extends Serializable { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/StepFunction.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/StepFunction.java index 6d5948fec..61531c143 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/StepFunction.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/StepFunction.java @@ -20,9 +20,8 @@ package org.deeplearning4j.optimize.api; -import org.nd4j.linalg.api.ndarray.INDArray; - import java.io.Serializable; +import org.nd4j.linalg.api.ndarray.INDArray; public interface StepFunction extends Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java index 20fe978dc..895c38a03 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/api/TrainingListener.java @@ -20,14 +20,13 @@ package org.deeplearning4j.optimize.api; +import java.util.List; +import java.util.Map; import net.brutex.ai.dnn.api.IModel; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -import java.util.List; -import java.util.Map; - public interface TrainingListener { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/Checkpoint.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/Checkpoint.java index 0ea277eb0..48a5b7c4b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/Checkpoint.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/Checkpoint.java @@ -20,11 +20,10 @@ package org.deeplearning4j.optimize.listeners; -import lombok.AllArgsConstructor; -import lombok.Data; - import java.io.Serializable; import java.util.Arrays; +import lombok.AllArgsConstructor; +import lombok.Data; @AllArgsConstructor @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java index 120099da4..c19ad3ebb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CheckpointListener.java @@ -21,6 +21,10 @@ package org.deeplearning4j.optimize.listeners; import com.google.common.io.Files; +import java.io.*; +import java.nio.charset.Charset; +import java.util.*; +import java.util.concurrent.TimeUnit; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; @@ -31,11 +35,6 @@ import org.deeplearning4j.optimize.api.BaseTrainingListener; import org.deeplearning4j.util.ModelSerializer; import org.nd4j.common.base.Preconditions; -import java.io.*; -import java.nio.charset.Charset; -import java.util.*; -import java.util.concurrent.TimeUnit; - @Slf4j public class CheckpointListener extends BaseTrainingListener implements Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java index 32058d3db..a01d5de7b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresIterationListener.java @@ -20,9 +20,6 @@ package org.deeplearning4j.optimize.listeners; -import net.brutex.ai.dnn.api.IModel; -import org.deeplearning4j.optimize.api.BaseTrainingListener; - import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -31,6 +28,8 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.optimize.api.BaseTrainingListener; public class CollectScoresIterationListener extends BaseTrainingListener { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresListener.java index a08b18814..bfa989446 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/CollectScoresListener.java @@ -22,15 +22,13 @@ package org.deeplearning4j.optimize.listeners; import it.unimi.dsi.fastutil.doubles.DoubleArrayList; import it.unimi.dsi.fastutil.ints.IntArrayList; +import java.io.Serializable; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.BaseTrainingListener; - -import java.io.Serializable; - @Data @EqualsAndHashCode(callSuper = true) @Slf4j diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ComposableIterationListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ComposableIterationListener.java index 4b67fcede..eabcf1c32 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ComposableIterationListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ComposableIterationListener.java @@ -20,14 +20,13 @@ package org.deeplearning4j.optimize.listeners; -import net.brutex.ai.dnn.api.IModel; -import org.deeplearning4j.optimize.api.BaseTrainingListener; -import org.deeplearning4j.optimize.api.TrainingListener; - import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.optimize.api.BaseTrainingListener; +import org.deeplearning4j.optimize.api.TrainingListener; @Deprecated public class ComposableIterationListener extends BaseTrainingListener implements Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/EvaluativeListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/EvaluativeListener.java index f98dd0aad..da0adec26 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/EvaluativeListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/EvaluativeListener.java @@ -20,6 +20,7 @@ package org.deeplearning4j.optimize.listeners; +import java.util.concurrent.atomic.AtomicLong; import lombok.Getter; import lombok.NonNull; import lombok.Setter; @@ -39,8 +40,6 @@ import org.nd4j.linalg.dataset.MultiDataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; -import java.util.concurrent.atomic.AtomicLong; - @Slf4j public class EvaluativeListener extends BaseTrainingListener { protected transient ThreadLocal iterationCount = new ThreadLocal<>(); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/FailureTestingListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/FailureTestingListener.java index d6ac11b41..28ef81525 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/FailureTestingListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/FailureTestingListener.java @@ -20,6 +20,9 @@ package org.deeplearning4j.optimize.listeners; +import java.io.Serializable; +import java.net.InetAddress; +import java.util.*; import lombok.AllArgsConstructor; import lombok.Data; import lombok.EqualsAndHashCode; @@ -32,10 +35,6 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import java.io.Serializable; -import java.net.InetAddress; -import java.util.*; - @Slf4j public class FailureTestingListener implements TrainingListener, Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/PerformanceListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/PerformanceListener.java index 4f287816e..4ac091ecd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/PerformanceListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/PerformanceListener.java @@ -21,14 +21,6 @@ package org.deeplearning4j.optimize.listeners; import com.google.common.base.Preconditions; -import lombok.extern.slf4j.Slf4j; -import net.brutex.ai.dnn.api.IModel; -import org.deeplearning4j.nn.graph.ComputationGraph; -import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; -import org.deeplearning4j.optimize.api.BaseTrainingListener; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.factory.Nd4j; - import java.io.IOException; import java.io.ObjectInputStream; import java.io.Serializable; @@ -37,6 +29,13 @@ import java.lang.management.ManagementFactory; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.nn.graph.ComputationGraph; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.optimize.api.BaseTrainingListener; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; @Slf4j public class PerformanceListener extends BaseTrainingListener implements Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreIterationListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreIterationListener.java index 0d65332ad..5d54d5409 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreIterationListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreIterationListener.java @@ -20,12 +20,11 @@ package org.deeplearning4j.optimize.listeners; +import java.io.Serializable; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.BaseTrainingListener; -import java.io.Serializable; - @Slf4j public class ScoreIterationListener extends BaseTrainingListener implements Serializable { private int printIterations = 10; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreToChartListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreToChartListener.java index 1370bbad7..b43862189 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreToChartListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/ScoreToChartListener.java @@ -23,10 +23,10 @@ package org.deeplearning4j.optimize.listeners; import java.io.IOException; import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.Response; -import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.api.BaseTrainingListener; @Slf4j diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/SleepyTrainingListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/SleepyTrainingListener.java index 834778001..766787547 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/SleepyTrainingListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/SleepyTrainingListener.java @@ -20,6 +20,10 @@ package org.deeplearning4j.optimize.listeners; +import java.io.Serializable; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; import lombok.*; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; @@ -27,11 +31,6 @@ import org.deeplearning4j.optimize.api.BaseTrainingListener; import org.nd4j.common.util.ThreadUtils; import org.nd4j.linalg.api.ndarray.INDArray; -import java.io.Serializable; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; - @AllArgsConstructor @NoArgsConstructor @Data diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/TimeIterationListener.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/TimeIterationListener.java index 8a947e4c0..1ba8ad5b8 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/TimeIterationListener.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/TimeIterationListener.java @@ -20,13 +20,12 @@ package org.deeplearning4j.optimize.listeners; -import lombok.extern.slf4j.Slf4j; -import net.brutex.ai.dnn.api.IModel; -import org.deeplearning4j.optimize.api.BaseTrainingListener; - import java.io.Serializable; import java.util.Date; import java.util.concurrent.atomic.AtomicLong; +import lombok.extern.slf4j.Slf4j; +import net.brutex.ai.dnn.api.IModel; +import org.deeplearning4j.optimize.api.BaseTrainingListener; @Slf4j public class TimeIterationListener extends BaseTrainingListener implements Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/ModelSavingCallback.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/ModelSavingCallback.java index cb0fe44a0..f708efc21 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/ModelSavingCallback.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/listeners/callbacks/ModelSavingCallback.java @@ -20,17 +20,16 @@ package org.deeplearning4j.optimize.listeners.callbacks; +import java.io.File; +import java.io.IOException; import lombok.NonNull; +import net.brutex.ai.dnn.api.IModel; import org.apache.commons.io.FilenameUtils; import org.deeplearning4j.exception.DL4JInvalidConfigException; -import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.optimize.listeners.EvaluativeListener; import org.deeplearning4j.util.ModelSerializer; import org.nd4j.evaluation.IEvaluation; -import java.io.File; -import java.io.IOException; - public class ModelSavingCallback implements EvaluationCallback { protected File rootFolder; protected String template; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BackTrackLineSearch.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BackTrackLineSearch.java index 534807c1f..8213ac348 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BackTrackLineSearch.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BackTrackLineSearch.java @@ -20,10 +20,13 @@ package org.deeplearning4j.optimize.solvers; +import static org.nd4j.linalg.ops.transforms.Transforms.abs; + import net.brutex.ai.dnn.api.IModel; import org.apache.commons.math3.util.FastMath; import org.deeplearning4j.exception.InvalidStepException; import org.deeplearning4j.nn.conf.stepfunctions.NegativeGradientStepFunction; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.api.ConvexOptimizer; import org.deeplearning4j.optimize.api.LineOptimizer; import org.deeplearning4j.optimize.api.StepFunction; @@ -34,13 +37,9 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue; import org.nd4j.linalg.api.ops.impl.transforms.comparison.Eps; import org.nd4j.linalg.factory.Nd4j; -import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.nd4j.linalg.ops.transforms.Transforms.abs; - - public class BackTrackLineSearch implements LineOptimizer { private static final Logger log = LoggerFactory.getLogger(BackTrackLineSearch.class); private final IModel layer; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java index d5731f04d..2087b9601 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/BaseOptimizer.java @@ -20,6 +20,11 @@ package org.deeplearning4j.optimize.solvers; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import lombok.Getter; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.exception.InvalidStepException; @@ -39,19 +44,13 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.optimize.solvers.accumulation.GradientsAccumulator; import org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction; import org.deeplearning4j.optimize.stepfunctions.NegativeGradientStepFunction; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - /** * Base optimizer * @author Adam Gibson diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/ConjugateGradient.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/ConjugateGradient.java index 614075e20..0464a02bc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/ConjugateGradient.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/ConjugateGradient.java @@ -20,6 +20,7 @@ package org.deeplearning4j.optimize.solvers; +import java.util.Collection; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.optimize.api.StepFunction; @@ -29,9 +30,6 @@ import org.nd4j.linalg.factory.Nd4j; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Collection; - - public class ConjugateGradient extends BaseOptimizer { private static final long serialVersionUID = -1269296013474864091L; private static final Logger logger = LoggerFactory.getLogger(ConjugateGradient.class); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java index 80a94c6e6..42a708d70 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LBFGS.java @@ -20,18 +20,17 @@ package org.deeplearning4j.optimize.solvers; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedList; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.optimize.api.StepFunction; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Collection; -import java.util.Iterator; -import java.util.LinkedList; /** * LBFGS diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LineGradientDescent.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LineGradientDescent.java index 78ebf3231..64cc59170 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LineGradientDescent.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/LineGradientDescent.java @@ -20,6 +20,7 @@ package org.deeplearning4j.optimize.solvers; +import java.util.Collection; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.optimize.api.StepFunction; @@ -27,8 +28,6 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import java.util.Collection; - public class LineGradientDescent extends BaseOptimizer { private static final long serialVersionUID = 6336124657542062284L; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java index e0de12fe9..c09f64205 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/StochasticGradientDescent.java @@ -20,6 +20,7 @@ package org.deeplearning4j.optimize.solvers; +import java.util.Collection; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -29,12 +30,10 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.optimize.api.StepFunction; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Collection; @Slf4j public class StochasticGradientDescent extends BaseOptimizer { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/BasicGradientsAccumulator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/BasicGradientsAccumulator.java index 09002e24a..3ca761135 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/BasicGradientsAccumulator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/BasicGradientsAccumulator.java @@ -20,6 +20,13 @@ package org.deeplearning4j.optimize.solvers.accumulation; +import java.util.List; +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantReadWriteLock; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.optimize.api.StepFunction; @@ -27,16 +34,6 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import java.util.List; -import java.util.Queue; -import java.util.concurrent.BrokenBarrierException; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.CyclicBarrier; -import java.util.concurrent.LinkedTransferQueue; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.ReentrantReadWriteLock; - @Slf4j public class BasicGradientsAccumulator implements GradientsAccumulator { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java index 16e8a97e7..a0b65aa8d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulator.java @@ -20,6 +20,14 @@ package org.deeplearning4j.optimize.solvers.accumulation; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantLock; import lombok.Getter; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; @@ -42,15 +50,6 @@ import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.util.AtomicThrowable; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.locks.ReentrantLock; - @Slf4j public class EncodedGradientsAccumulator implements GradientsAccumulator, Registerable { public static final long DEFAULT_INITIAL_MEMORY = 100 * 1024 * 1024L; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodingHandler.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodingHandler.java index fc3e9a9e0..7e9de3742 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodingHandler.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/EncodingHandler.java @@ -20,19 +20,7 @@ package org.deeplearning4j.optimize.solvers.accumulation; -import org.nd4j.linalg.api.buffer.DataType; import com.google.common.util.concurrent.AtomicDouble; -import lombok.NonNull; -import lombok.extern.slf4j.Slf4j; -import org.deeplearning4j.optimize.solvers.accumulation.encoding.ResidualPostProcessor; -import org.deeplearning4j.optimize.solvers.accumulation.encoding.ThresholdAlgorithm; -import org.deeplearning4j.optimize.solvers.accumulation.encoding.ThresholdAlgorithmReducer; -import org.nd4j.linalg.api.buffer.DataBuffer; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.exception.ND4JIllegalStateException; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.linalg.ops.transforms.Transforms; - import java.text.DecimalFormat; import java.util.Collection; import java.util.Iterator; @@ -41,6 +29,15 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.optimize.solvers.accumulation.encoding.ResidualPostProcessor; +import org.deeplearning4j.optimize.solvers.accumulation.encoding.ThresholdAlgorithm; +import org.deeplearning4j.optimize.solvers.accumulation.encoding.ThresholdAlgorithmReducer; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.ops.transforms.Transforms; @Slf4j public class EncodingHandler implements MessageHandler { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/FancyBlockingQueue.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/FancyBlockingQueue.java index 79e9d427c..bfcdbb7db 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/FancyBlockingQueue.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/FancyBlockingQueue.java @@ -20,10 +20,6 @@ package org.deeplearning4j.optimize.solvers.accumulation; -import lombok.NonNull; -import lombok.extern.slf4j.Slf4j; -import org.nd4j.common.util.ThreadUtils; - import java.util.Collection; import java.util.Iterator; import java.util.concurrent.BlockingQueue; @@ -32,6 +28,9 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantReadWriteLock; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import org.nd4j.common.util.ThreadUtils; @Slf4j public class FancyBlockingQueue implements BlockingQueue, Registerable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/GradientsAccumulator.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/GradientsAccumulator.java index a50ca59f1..fa5b8c3fd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/GradientsAccumulator.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/GradientsAccumulator.java @@ -20,12 +20,10 @@ package org.deeplearning4j.optimize.solvers.accumulation; +import java.io.Serializable; import org.deeplearning4j.optimize.api.StepFunction; import org.nd4j.linalg.api.ndarray.INDArray; -import java.io.Serializable; -import java.util.Queue; - public interface GradientsAccumulator extends Serializable { /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/IndexedTail.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/IndexedTail.java index cc16c78d7..2fd9b0020 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/IndexedTail.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/IndexedTail.java @@ -20,23 +20,22 @@ package org.deeplearning4j.optimize.solvers.accumulation; -import lombok.Getter; -import lombok.NonNull; -import lombok.extern.slf4j.Slf4j; -import lombok.val; -import org.nd4j.common.base.Preconditions; -import org.nd4j.linalg.api.buffer.DataType; -import org.nd4j.linalg.api.ndarray.INDArray; -import org.nd4j.linalg.compression.ThresholdCompression; -import org.nd4j.linalg.exception.ND4JIllegalStateException; -import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.AtomicBoolean; - import java.util.ArrayList; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantReadWriteLock; +import lombok.Getter; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import lombok.val; +import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.AtomicBoolean; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.compression.ThresholdCompression; +import org.nd4j.linalg.exception.ND4JIllegalStateException; +import org.nd4j.linalg.factory.Nd4j; @Slf4j public class IndexedTail { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/MessageHandler.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/MessageHandler.java index e5b970608..9f660d376 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/MessageHandler.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/MessageHandler.java @@ -20,9 +20,8 @@ package org.deeplearning4j.optimize.solvers.accumulation; -import org.nd4j.linalg.api.ndarray.INDArray; - import java.io.Serializable; +import org.nd4j.linalg.api.ndarray.INDArray; public interface MessageHandler extends Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/SmartFancyBlockingQueue.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/SmartFancyBlockingQueue.java index 9fc28cc7a..d6c86aba1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/SmartFancyBlockingQueue.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/SmartFancyBlockingQueue.java @@ -22,18 +22,17 @@ package org.deeplearning4j.optimize.solvers.accumulation; import EDU.oswego.cs.dl.util.concurrent.ReaderPreferenceReadWriteLock; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import lombok.val; +import org.nd4j.common.primitives.AtomicBoolean; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.compression.ThresholdCompression; import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.AtomicBoolean; - -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.LinkedBlockingQueue; @Slf4j public class SmartFancyBlockingQueue extends FancyBlockingQueue { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ResidualPostProcessor.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ResidualPostProcessor.java index b4058906f..ce8983660 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ResidualPostProcessor.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ResidualPostProcessor.java @@ -20,9 +20,8 @@ package org.deeplearning4j.optimize.solvers.accumulation.encoding; -import org.nd4j.linalg.api.ndarray.INDArray; - import java.io.Serializable; +import org.nd4j.linalg.api.ndarray.INDArray; public interface ResidualPostProcessor extends Serializable, Cloneable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ThresholdAlgorithm.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ThresholdAlgorithm.java index 41f8348ad..0c143e03b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ThresholdAlgorithm.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/optimize/solvers/accumulation/encoding/ThresholdAlgorithm.java @@ -20,9 +20,8 @@ package org.deeplearning4j.optimize.solvers.accumulation.encoding; -import org.nd4j.linalg.api.ndarray.INDArray; - import java.io.Serializable; +import org.nd4j.linalg.api.ndarray.INDArray; public interface ThresholdAlgorithm extends Serializable { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java index c5305dbc0..e0d1e8b9e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java @@ -20,7 +20,7 @@ package org.deeplearning4j.util; - +import java.util.Arrays; import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -32,285 +32,334 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.exception.ND4JArraySizeException; -import java.util.Arrays; - public class Convolution1DUtils { - private static final int ONE = 1; + private static final int ONE = 1; + private Convolution1DUtils() {} - private Convolution1DUtils() { + public static int getOutputSize( + INDArray inputData, int kernel, int strides, int padding, ConvolutionMode convolutionMode) { + return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE); + } + + /** + * Returns true if the given layer has an {@link RNNFormat}. This is true for: {@link + * Convolution1D}, {@link Subsampling1DLayer} {@link SimpleRnn} {@link LSTM} {@link + * EmbeddingSequenceLayer} + * + * @param layer the layer to test + * @return true if the input layer has an rnn format false otherwise + */ + public static boolean hasRnnDataFormat(LayerConfiguration layer) { + return layer instanceof Convolution1D + || layer instanceof Convolution1D + || layer instanceof Subsampling1DLayer + || layer instanceof SimpleRnn + || layer instanceof LSTM + || layer instanceof EmbeddingSequenceLayer; + } + + /** + * Get the {@link RNNFormat} for the given layer. Throws an {@link IllegalArgumentException} if a + * layer doesn't have an rnn format + * + * @param layer the layer to get the format for + * @return the format for the layer + */ + public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) { + Preconditions.checkState( + hasRnnDataFormat(layer), + "ILayer of type " + + layer.getClass().getName() + + " and name " + + layer.getName() + + " does not have an RNNFormat"); + if (layer instanceof SimpleRnn) { + SimpleRnn simpleRnn = (SimpleRnn) layer; + return simpleRnn.getDataFormat(); + } else if (layer instanceof Convolution1D) { + Convolution1D convolution1D = (Convolution1D) layer; + return convolution1D.getRnnDataFormat(); + } else if (layer instanceof Convolution1D) { + Convolution1D convolution1D = (Convolution1D) layer; + return convolution1D.getRnnDataFormat(); + } else if (layer instanceof Subsampling1DLayer) { + Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer; + return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC; + } else if (layer instanceof LSTM) { + LSTM lstm = (LSTM) layer; + return lstm.getDataFormat(); + } else if (layer instanceof EmbeddingSequenceLayer) { + EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer; + return embeddingSequenceLayer.getOutputDataFormat(); + } else { + throw new IllegalArgumentException( + "Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName()); + } + } + + /** + * Reshapes the given weight array or weight gradient to work with the specified {@link RNNFormat} + * + * @param w the weight array or gradient + * @param rnnFormat the {@link RNNFormat} to use + * @return the reshaped array. + */ + public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) { + + if (rnnFormat == RNNFormat.NWC) + w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2)) + .permute(2, 1, 0); // [oC, iC, k, 1] to [k, iC, oC] + else { + w = w.reshape(w.ordering(), w.size(2), w.size(1), w.size(0)); } + return w; + } - public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding, - ConvolutionMode convolutionMode) { - return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE); + /** + * Get the output size (height) for the given input data and CNN1D configuration + * + * @param inH Input size (height, or channels). + * @param kernel Kernel size + * @param strides Stride + * @param padding Padding + * @param convolutionMode Convolution mode (Same, Strict, Truncate) + * @param dilation Kernel dilation + * @return Output size (width) + */ + public static long getOutputSize( + long inH, + int kernel, + int strides, + int padding, + ConvolutionMode convolutionMode, + int dilation) { + long eKernel = effectiveKernelSize(kernel, dilation); + if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) { + return (int) Math.ceil(inH / ((double) strides)); + } + return (inH - eKernel + 2L * padding) / strides + 1; + } + + /** + * Get the output size (height) for the given input data and CNN1D configuration + * + * @param inputData Input data + * @param kernel Kernel size + * @param strides Stride + * @param padding Padding + * @param convolutionMode Convolution mode (Same, Strict, Truncate) + * @param dilation Kernel dilation + * @return Output size (width) + */ + public static int getOutputSize( + INDArray inputData, + int kernel, + int strides, + int padding, + ConvolutionMode convolutionMode, + int dilation) { + if (inputData.size(2) > Integer.MAX_VALUE) throw new ND4JArraySizeException(); + int inH = (int) inputData.size(2); + int eKernel = effectiveKernelSize(kernel, dilation); + boolean atrous = (eKernel == kernel); + validateShapes(inputData, eKernel, strides, padding, convolutionMode, dilation, inH, atrous); + + if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) { + int outH = (int) Math.ceil(inH / ((double) strides)); + return outH; } - /** - * Returns true if the given layer has an - * {@link RNNFormat}. - * This is true for: - * {@link Convolution1DLayer}, - * {@link Subsampling1DLayer} - * {@link SimpleRnn} - * {@link LSTM} - * {@link EmbeddingSequenceLayer} - * @param layer the layer to test - * @return true if the input layer has an rnn format - * false otherwise - */ - public static boolean hasRnnDataFormat(LayerConfiguration layer) { - return layer instanceof Convolution1D || - layer instanceof Convolution1DLayer || - layer instanceof Subsampling1DLayer || - layer instanceof SimpleRnn || - layer instanceof LSTM || - layer instanceof EmbeddingSequenceLayer; + int outH = (inH - eKernel + 2 * padding) / strides + 1; + return outH; + } + + public static void validateShapes( + INDArray inputData, + int eKernel, + int strides, + int padding, + ConvolutionMode convolutionMode, + int dilation, + int inShape, + boolean atrous) { + + int inH = inShape; + boolean t = convolutionMode == ConvolutionMode.Truncate; + + if (t && (eKernel <= 0 || eKernel > inH + 2 * padding)) { + StringBuilder sb = new StringBuilder(); + sb.append("Invalid input data or configuration: "); + if (atrous) sb.append("effective "); + sb.append("kernel height and input height must satisfy 0 < "); + if (atrous) sb.append("effective "); + sb.append("kernel height <= input height + 2 * padding height. \nGot "); + if (atrous) sb.append("effective "); + sb.append("kernel height = ") + .append(eKernel) + .append(", input height = ") + .append(inH) + .append(" and padding height = ") + .append(padding) + .append(" which do not satisfy 0 < ") + .append(eKernel) + .append(" <= ") + .append(inH + 2 * padding) + .append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation)); + + throw new DL4JInvalidInputException(sb.toString()); } - /** - * Get the {@link RNNFormat} for the given layer. - * Throws an {@link IllegalArgumentException} - * if a layer doesn't have an rnn format - * @param layer the layer to get the format for - * @return the format for the layer - */ - public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) { - Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getName() + " does not have an RNNFormat"); - if(layer instanceof SimpleRnn) { - SimpleRnn simpleRnn = (SimpleRnn) layer; - return simpleRnn.getDataFormat(); - } else if(layer instanceof Convolution1D) { - Convolution1D convolution1D = (Convolution1D) layer; - return convolution1D.getRnnDataFormat(); - } else if(layer instanceof Convolution1DLayer) { - Convolution1DLayer convolution1DLayer = (Convolution1DLayer) layer; - return convolution1DLayer.getRnnDataFormat(); - } else if(layer instanceof Subsampling1DLayer) { - Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer; - return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC; - } else if(layer instanceof LSTM) { - LSTM lstm = (LSTM) layer; - return lstm.getDataFormat(); - } else if(layer instanceof EmbeddingSequenceLayer) { - EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer; - return embeddingSequenceLayer.getOutputDataFormat(); - } - else { - throw new IllegalArgumentException("Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName()); - } + if (convolutionMode == ConvolutionMode.Strict) { + if ((inH - eKernel + 2 * padding) % strides != 0) { + double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0; + String str = String.format("%.2f", d); + int truncated = (int) d; + int sameSize = (int) Math.ceil(inH / ((double) strides)); + + String sb = + "Invalid input data or configuration: Combination of kernel size, " + + "stride and padding are not " + + "valid for given input height, using ConvolutionMode.Strict\n" + + "ConvolutionMode.Strict requires: output height = (input height - kernelSize + " + + "2*padding)/stride + 1 to be an integer. Got: (" + + inH + + " - " + + eKernel + + " + 2*" + + padding + + ")/" + + strides + + " + 1 = " + + str + + "\n" + + "See \"Constraints on strides\" at http://cs231n.github." + + "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n" + + "To truncate/crop the input, such that output height = floor(" + + str + + ") = " + + truncated + + ", use ConvolutionType.Truncate.\n" + + "Alternatively use ConvolutionType.Same, which will use padding to give an " + + "output height of ceil(" + + inH + + "/" + + strides + + ")=" + + sameSize + + getCommonErrorMsg(inputData, eKernel, strides, padding, dilation); + + throw new DL4JInvalidConfigException(sb); + } } + } - /** - * Reshapes the given weight - * array or weight gradient - * to work with the specified - * {@link RNNFormat} - * @param w the weight array or gradient - * @param rnnFormat the {@link RNNFormat} to use - * @return the reshaped array. - */ - public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) { - if(rnnFormat == RNNFormat.NWC) - w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2)).permute(2, 1, 0); //[oC, iC, k, 1] to [k, iC, oC] - else { - w = w.reshape(w.ordering(),w.size(2),w.size(1),w.size(0)); - } - - return w; + public static int effectiveKernelSize(int kernel, int dilation) { + // Determine the effective kernel size, accounting for dilation + // http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions + if (dilation == 1) { + return kernel; + } else { + return kernel + (kernel - 1) * (dilation - 1); } + } - - /** - * Get the output size (height) for the given input data and CNN1D configuration - * - * @param inH Input size (height, or channels). - * @param kernel Kernel size - * @param strides Stride - * @param padding Padding - * @param convolutionMode Convolution mode (Same, Strict, Truncate) - * @param dilation Kernel dilation - * @return Output size (width) - */ - public static long getOutputSize(long inH, int kernel, int strides, int padding, - ConvolutionMode convolutionMode, int dilation) { - long eKernel = effectiveKernelSize(kernel, dilation); - if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) { - return (int) Math.ceil(inH / ((double) strides)); - } - return (inH - eKernel + 2L * padding) / strides + 1; + private static String getCommonErrorMsg( + INDArray inputData, int kernel, int strides, int padding, int dilation) { + String s = + "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]=" + + Arrays.toString(inputData.shape()) + + ", inputKernel=" + + kernel; + if (dilation != 1) { + int effectiveKernel = effectiveKernelSize(kernel, dilation); + s += ", effectiveKernelGivenDilation=" + effectiveKernel; } + return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation; + } - /** - * Get the output size (height) for the given input data and CNN1D configuration - * - * @param inputData Input data - * @param kernel Kernel size - * @param strides Stride - * @param padding Padding - * @param convolutionMode Convolution mode (Same, Strict, Truncate) - * @param dilation Kernel dilation - * @return Output size (width) - */ - public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding, - ConvolutionMode convolutionMode, int dilation) { - if (inputData.size(2) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - int inH = (int) inputData.size(2); - int eKernel = effectiveKernelSize(kernel, dilation); - boolean atrous = (eKernel == kernel); - validateShapes(inputData, eKernel, strides, padding, convolutionMode, dilation, inH, atrous); - - if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) { - int outH = (int) Math.ceil(inH / ((double) strides)); - return outH; - } - - int outH = (inH - eKernel + 2 * padding) / strides + 1; - return outH; + /** Check that the convolution mode is consistent with the padding specification */ + public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) { + if (mode == ConvolutionMode.Same) { + boolean nullPadding = padding == 0; + if (!nullPadding) + throw new IllegalArgumentException( + "Padding cannot be used when using the `same' convolution mode"); } + } - public static void validateShapes(INDArray inputData, int eKernel, int strides, int padding, - ConvolutionMode convolutionMode, int dilation, int inShape, - boolean atrous) { + /** + * Get top padding for same mode only. + * + * @param outSize Output size (length 2 array, height dimension first) + * @param inSize Input size (length 2 array, height dimension first) + * @param kernel Kernel size (length 2 array, height dimension first) + * @param strides Strides (length 2 array, height dimension first) + * @param dilation Dilation (length 2 array, height dimension first) + * @return Top left padding (length 2 array, height dimension first) + */ + public static int getSameModeTopLeftPadding( + int outSize, int inSize, int kernel, int strides, int dilation) { + int eKernel = effectiveKernelSize(kernel, dilation); + // Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2 + int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2; + Preconditions.checkState( + outPad >= 0, + "Invalid padding values calculated: %s - " + + "layer configuration is invalid? Input size %s, output size %s, kernel %s, " + + "strides %s, dilation %s", + outPad, + inSize, + outSize, + kernel, + strides, + dilation); + return outPad; + } - int inH = inShape; - boolean t = convolutionMode == ConvolutionMode.Truncate; + public static int getSameModeBottomRightPadding( + int outSize, int inSize, int kernel, int strides, int dilation) { + int eKernel = effectiveKernelSize(kernel, dilation); + int totalPad = ((outSize - 1) * strides + eKernel - inSize); + int tlPad = totalPad / 2; + int brPad = totalPad - tlPad; + Preconditions.checkState( + brPad >= 0, + "Invalid padding values (right) calculated: %s - " + + "layer configuration is invalid? Input size %s, output size %s, kernel %s, " + + "strides %s, dilation %s", + brPad, + inSize, + outSize, + kernel, + strides, + dilation); + return brPad; + } - if (t && (eKernel <= 0 || eKernel > inH + 2 * padding)) { - StringBuilder sb = new StringBuilder(); - sb.append("Invalid input data or configuration: "); - if (atrous) sb.append("effective "); - sb.append("kernel height and input height must satisfy 0 < "); - if (atrous) sb.append("effective "); - sb.append("kernel height <= input height + 2 * padding height. \nGot "); - if (atrous) sb.append("effective "); - sb.append("kernel height = ").append(eKernel).append(", input height = ").append(inH) - .append(" and padding height = ").append(padding).append(" which do not satisfy 0 < ") - .append(eKernel).append(" <= ").append(inH + 2 * padding) - .append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation)); - - throw new DL4JInvalidInputException(sb.toString()); - } - - - if (convolutionMode == ConvolutionMode.Strict) { - if ((inH - eKernel + 2 * padding) % strides != 0) { - double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0; - String str = String.format("%.2f", d); - int truncated = (int) d; - int sameSize = (int) Math.ceil(inH / ((double) strides)); - - String sb = "Invalid input data or configuration: Combination of kernel size, " + - "stride and padding are not " + - "valid for given input height, using ConvolutionMode.Strict\n" + - "ConvolutionMode.Strict requires: output height = (input height - kernelSize + " + - "2*padding)/stride + 1 to be an integer. Got: (" + - inH + " - " + eKernel + " + 2*" + padding + ")/" + - strides + " + 1 = " + - str + "\n" + "See \"Constraints on strides\" at http://cs231n.github." + - "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n" + - "To truncate/crop the input, such that output height = floor(" + - str + ") = " + - truncated + ", use ConvolutionType.Truncate.\n" + - "Alternatively use ConvolutionType.Same, which will use padding to give an " + - "output height of ceil(" + - inH + "/" + strides + ")=" + sameSize + - getCommonErrorMsg(inputData, eKernel, strides, padding, dilation); - - throw new DL4JInvalidConfigException(sb); - } - } + /** + * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for + * kernel size and stride, and values >= 0 for padding. + * + * @param kernel Kernel size to check + * @param stride Stride to check + * @param padding Padding to check + */ + public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) { + if (kernel <= 0) { + throw new IllegalStateException( + "Invalid kernel size: value must be positive (> 0). Got: " + kernel); } - - public static int effectiveKernelSize(int kernel, int dilation) { - //Determine the effective kernel size, accounting for dilation - //http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions - if (dilation == 1) { - return kernel; - } else { - return kernel + (kernel - 1) * (dilation - 1); - } + if (stride <= 0) { + throw new IllegalStateException( + "Invalid kernel size: value must be positive (> 0). Got: " + stride); } - - private static String getCommonErrorMsg(INDArray inputData, int kernel, int strides, int padding, int dilation) { - String s = "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]=" + Arrays.toString(inputData.shape()) - + ", inputKernel=" + kernel; - if (dilation != 1) { - int effectiveKernel = effectiveKernelSize(kernel, dilation); - s += ", effectiveKernelGivenDilation=" + effectiveKernel; - } - return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation; + if (padding < 0) { + throw new IllegalStateException( + "Invalid kernel size: value must be positive (> 0). Got: " + padding); } - - - /** - * Check that the convolution mode is consistent with the padding specification - */ - public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) { - if (mode == ConvolutionMode.Same) { - boolean nullPadding = padding == 0; - if (!nullPadding) - throw new IllegalArgumentException("Padding cannot be used when using the `same' convolution mode"); - - } - } - - /** - * Get top padding for same mode only. - * - * @param outSize Output size (length 2 array, height dimension first) - * @param inSize Input size (length 2 array, height dimension first) - * @param kernel Kernel size (length 2 array, height dimension first) - * @param strides Strides (length 2 array, height dimension first) - * @param dilation Dilation (length 2 array, height dimension first) - * @return Top left padding (length 2 array, height dimension first) - */ - public static int getSameModeTopLeftPadding(int outSize, int inSize, int kernel, int strides, int dilation) { - int eKernel = effectiveKernelSize(kernel, dilation); - //Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2 - int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2; - Preconditions.checkState(outPad >= 0, "Invalid padding values calculated: %s - " + - "layer configuration is invalid? Input size %s, output size %s, kernel %s, " + - "strides %s, dilation %s", outPad, inSize, outSize, kernel, strides, dilation); - return outPad; - } - - public static int getSameModeBottomRightPadding(int outSize, int inSize, int kernel, int strides, int dilation) { - int eKernel = effectiveKernelSize(kernel, dilation); - int totalPad = ((outSize - 1) * strides + eKernel - inSize); - int tlPad = totalPad / 2; - int brPad = totalPad - tlPad; - Preconditions.checkState(brPad >= 0, "Invalid padding values (right) calculated: %s - " + - "layer configuration is invalid? Input size %s, output size %s, kernel %s, " + - "strides %s, dilation %s", brPad, inSize, outSize, kernel, strides, dilation); - return brPad; - } - - /** - * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for kernel size and - * stride, and values >= 0 for padding. - * - * @param kernel Kernel size to check - * @param stride Stride to check - * @param padding Padding to check - */ - public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) { - - if (kernel <= 0) { - throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + kernel); - } - if (stride <= 0) { - throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + stride); - - } - if (padding < 0) { - throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + padding); - } - } - - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution2DUtils.java similarity index 98% rename from cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java rename to cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution2DUtils.java index 887b1e0a7..ab0225e7d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/ConvolutionUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution2DUtils.java @@ -21,6 +21,7 @@ package org.deeplearning4j.util; +import java.util.Arrays; import lombok.NonNull; import lombok.val; import org.deeplearning4j.exception.DL4JInvalidConfigException; @@ -44,9 +45,7 @@ import org.nd4j.linalg.api.shape.Shape; import org.nd4j.linalg.exception.ND4JArraySizeException; import org.nd4j.linalg.factory.Nd4j; -import java.util.Arrays; - -public class ConvolutionUtils { +public class Convolution2DUtils { public static final String NCHW_NHWC_ERROR_MSG = "Note: Convolution layers can be configured for either NCHW (channels first)" + " or NHWC (channels last) format for input images and activations.\n" + @@ -58,7 +57,7 @@ public class ConvolutionUtils { private static final int[] ONES = new int[]{1, 1}; - private ConvolutionUtils() { + private Convolution2DUtils() { } /** @@ -199,9 +198,9 @@ public class ConvolutionUtils { * layer */ public static CNN2DFormat getFormatForLayer(LayerConfiguration layer) { - if(layer instanceof Convolution1DLayer) { - Convolution1DLayer convolution1DLayer = (Convolution1DLayer) layer; - return convolution1DLayer.getDataFormat(); + if(layer instanceof Convolution1D) { + Convolution1D convolution1D = (Convolution1D) layer; + return convolution1D.getDataFormat(); } else if(layer instanceof ConvolutionLayer) { ConvolutionLayer convolutionLayer = (ConvolutionLayer) layer; return convolutionLayer.getConvFormat(); @@ -845,10 +844,10 @@ public class ConvolutionUtils { int[] s = new int[]{stride, 1}; int[] d = new int[]{dilation, 1}; if (cm == ConvolutionMode.Same || cm == ConvolutionMode.Causal) { - outSize = ConvolutionUtils.getOutputSize(reshaped4d, k, s, null, cm, d, CNN2DFormat.NCHW); //Also performs validation + outSize = Convolution2DUtils.getOutputSize(reshaped4d, k, s, null, cm, d, CNN2DFormat.NCHW); //Also performs validation } else { pad = new int[]{padding, 0}; - outSize = ConvolutionUtils.getOutputSize(reshaped4d, k, s, pad, cm, d, CNN2DFormat.NCHW); //Also performs validation + outSize = Convolution2DUtils.getOutputSize(reshaped4d, k, s, pad, cm, d, CNN2DFormat.NCHW); //Also performs validation } int outH = outSize[0]; @@ -926,7 +925,7 @@ public class ConvolutionUtils { d = dilation; } - int[] outSize = ConvolutionUtils.getOutputSize(inMask, k, s, p, convolutionMode, d); //Also performs validation + int[] outSize = Convolution2DUtils.getOutputSize(inMask, k, s, p, convolutionMode, d); //Also performs validation boolean allEq = true; for( int i=0; i(null, null, layer); } catch (Exception e) { log.error("",e);