diff --git a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java index 5cbf30f2f..6f47c1412 100644 --- a/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java +++ b/.old/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/src/test/java/org/deeplearning4j/spark/impl/multilayer/TestMiscFunctions.java @@ -221,7 +221,7 @@ public class TestMiscFunctions extends BaseSparkTest { int nIn = 10; NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution( new GaussianReconstructionDistribution(Activation.IDENTITY)) .nIn(nIn).nOut(5).encoderLayerSizes(12).decoderLayerSizes(13).build()) @@ -261,7 +261,7 @@ public class TestMiscFunctions extends BaseSparkTest { NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() .list().layer(0, - new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new LossFunctionWrapper( Activation.IDENTITY, new LossMSE())) .nIn(nIn).nOut(5).encoderLayerSizes(12).decoderLayerSizes(13) diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java index 476acb24b..532613651 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java @@ -96,7 +96,7 @@ public class App { private static LayerConfiguration[] genLayers() { return new LayerConfiguration[] { DenseLayer.builder().nIn(INPUT).nOut(X_DIM*Y_DIM*CHANNELS).weightInit(WeightInit.NORMAL).build(), - ActivationLayer.builder(new ActivationLReLU(0.2)).build(), + ActivationLayer.builder(Activation.LEAKYRELU).build(), DenseLayer.builder().nIn(X_DIM*Y_DIM*CHANNELS).nOut(X_DIM*Y_DIM).build(), ActivationLayer.builder(new ActivationLReLU(0.2)).build(), DenseLayer.builder().nIn(X_DIM*Y_DIM).nOut(X_DIM*Y_DIM).build(), diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java index eeae08f00..779414c70 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/CNN2DTestCases.java @@ -331,10 +331,10 @@ public class CNN2DTestCases { .build(), "leaky_re_lu_8") .addLayer("outputs", - new Yolo2OutputLayer.Builder() + Yolo2OutputLayer.builder() .lambdaNoObj(lambdaNoObj) .lambdaCoord(lambdaCoord) - .boundingBoxPriors(priors) + .boundingBoxes(priors) .build(), "convolution2d_9") .setOutputs("outputs") diff --git a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java index 8be9f71ba..ceabef0b0 100644 --- a/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java +++ b/brutex-extended-tests/src/test/java/org/deeplearning4j/integration/testcases/dl4j/RNNTestCases.java @@ -322,7 +322,7 @@ public class RNNTestCases { .updater(new Adam(5e-2)) .l1(1e-3).l2(1e-3) - .layer(0, Bidirectional.builder(LSTM.builder().activation(Activation.TANH).nOut(10).build())) + .layer(0, Bidirectional.builder(LSTM.builder().activation(Activation.TANH).nOut(10).build()).build()) .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build()) .layer(OutputLayer.builder().nOut(6) .lossFunction(LossFunctions.LossFunction.MCXENT) diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/activations/Activation.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/activations/Activation.java index 90063c5e3..196b18244 100644 --- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/activations/Activation.java +++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/activations/Activation.java @@ -22,9 +22,11 @@ package org.nd4j.linalg.activations; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.activations.impl.*; +import org.nd4j.linalg.api.ndarray.INDArray; -public enum Activation { +public enum Activation implements IActivation { CUBE, ELU, HARDSIGMOID, HARDTANH, IDENTITY, LEAKYRELU, RATIONALTANH, RELU, RELU6, RRELU, SIGMOID, SOFTMAX, SOFTPLUS, SOFTSIGN, TANH, RECTIFIEDTANH, SELU, SWISH, THRESHOLDEDRELU, GELU, MISH; @@ -149,4 +151,44 @@ public enum Activation { throw new UnsupportedOperationException("Activation function not yet supported: " + this); } } + + /** + * Carry out activation function on the input array (usually known as 'preOut' or 'z') + * Implementations must overwrite "in", transform in place and return "in" + * Can support separate behaviour during test + * + * @param in input array. + * @param training true when training. + * @return transformed activation + */ + @Override + public INDArray getActivation(INDArray in, boolean training) { + return getActivationFunction().getActivation(in, training); + } + + /** + * Backpropagate the errors through the activation function, given input z and epsilon dL/da.
+ * Returns 2 INDArrays:
+ * (a) The gradient dL/dz, calculated from dL/da, and
+ * (b) The parameter gradients dL/dW, where w is the weights in the activation function. For activation functions + * with no gradients, this will be null. + * + * @param in Input, before applying the activation function (z, or 'preOut') + * @param epsilon Gradient to be backpropagated: dL/da, where L is the loss function + * @return dL/dz and dL/dW, for weights w (null if activation function has no weights) + */ + @Override + public Pair backprop(INDArray in, INDArray epsilon) { + return getActivationFunction().backprop(in, epsilon); + } + +/** +* + * @param inputSize + * @return +*/ + @Override + public int numParams(int inputSize) { + return getActivationFunction().numParams(inputSize); + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java index 86c4dc366..a4923e5da 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/earlystopping/TestEarlyStopping.java @@ -872,7 +872,7 @@ public class TestEarlyStopping extends BaseDL4JTest { .nIn(10) .nOut(10) .activation(Activation.TANH) - .gateActivationFunction(Activation.SIGMOID) + .gateActivationFunction(Activation.SIGMOID.getActivationFunction()) .dropOut(0.5) .build()) .layer(1, RnnOutputLayer.builder() diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java index bc4c3e223..286f93992 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/AttentionLayerTest.java @@ -90,8 +90,8 @@ public class AttentionLayerTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .layer(LSTM.builder().nOut(layerSize).build()) .layer( projectInput ? - new SelfAttentionLayer.Builder().nOut(4).nHeads(2).projectInput(true).build() - : new SelfAttentionLayer.Builder().nHeads(1).projectInput(false).build() + SelfAttentionLayer.builder().nOut(4).nHeads(2).projectInput(true).build() + : SelfAttentionLayer.builder().nHeads(1).projectInput(false).build() ) .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build()) .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) @@ -151,8 +151,8 @@ public class AttentionLayerTest extends BaseDL4JTest { .list() .layer(LSTM.builder().nOut(layerSize).build()) .layer( projectInput ? - new LearnedSelfAttentionLayer.Builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build() - : new LearnedSelfAttentionLayer.Builder().nHeads(1).nQueries(numQueries).projectInput(false).build() + LearnedSelfAttentionLayer.builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build() + : LearnedSelfAttentionLayer.builder().nHeads(1).nQueries(numQueries).projectInput(false).build() ) .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build()) .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) @@ -191,8 +191,8 @@ public class AttentionLayerTest extends BaseDL4JTest { .list() .layer(LSTM.builder().nOut(layerSize).build()) .layer( projectInput ? - new LearnedSelfAttentionLayer.Builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build() - : new LearnedSelfAttentionLayer.Builder().nHeads(1).nQueries(numQueries).projectInput(false).build() + LearnedSelfAttentionLayer.builder().nOut(4).nHeads(2).nQueries(numQueries).projectInput(true).build() + : LearnedSelfAttentionLayer.builder().nHeads(1).nQueries(numQueries).projectInput(false).build() ) .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build()) .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) @@ -245,7 +245,7 @@ public class AttentionLayerTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .list() .layer(LSTM.builder().nOut(layerSize).build()) - .layer(new RecurrentAttentionLayer.Builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) + .layer(RecurrentAttentionLayer.builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build()) .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) @@ -308,7 +308,7 @@ public class AttentionLayerTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .list() .layer(LSTM.builder().nOut(layerSize).build()) - .layer(new RecurrentAttentionLayer.Builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) + .layer(RecurrentAttentionLayer.builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build()) .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java index c3e00ca3e..a3ef0c082 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java @@ -363,7 +363,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .nOut(1) .build()) // output: (5-2+0)/1+1 = 4 .layer( - new SpaceToDepthLayer.Builder(blocks, SpaceToDepthLayer.DataFormat.NCHW) + SpaceToDepthLayer.builder().blockSize(blocks).dataFormat(CNN2DFormat.NCHW) .build()) // (mb,1,4,4) -> (mb,4,2,2) .layer( OutputLayer.builder() @@ -450,10 +450,10 @@ public class CNNGradientCheckTest extends BaseDL4JTest { ConvolutionLayer.builder(kernel) .nIn(inputDepth) .nOut(3) - .dataFormat(format) + .convFormat(format) .build()) .layer( - new SpaceToBatchLayer.Builder(blocks) + SpaceToBatchLayer.builder(blocks) .dataFormat(format) .build()) // trivial space to batch .layer( @@ -546,7 +546,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer( ConvolutionLayer.builder(kernel, stride, padding) .nIn(inputDepth) - .dataFormat(format) + .convFormat(format) .nOut(3) .build()) // output: (5-2+0)/1+1 = 4 .layer( @@ -641,7 +641,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { 0, ConvolutionLayer.builder(kernel, stride, padding) .nIn(inputDepth) - .dataFormat(format) + .convFormat(format) .nOut(3) .build()) // output: (5-2+0)/1+1 = 4 .layer( @@ -750,7 +750,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { 0, ConvolutionLayer.builder(kernel, stride, padding) .nIn(inputDepth) - .dataFormat(format) + .convFormat(format) .nOut(3) .build()) // output: (5-2+0)/1+1 = 4 .layer( @@ -765,7 +765,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer( 2, ConvolutionLayer.builder(kernel, stride, padding) - .dataFormat(format) + .convFormat(format) .nIn(3) .nOut(2) .build()) // Output: (3-2+0)/1+1 = 2 @@ -849,7 +849,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { ConvolutionLayer.builder() .kernelSize(2, 2) .stride(1, 1) - .dataFormat(format) + .convFormat(format) .padding(0, 0) .nIn(inputDepth) .nOut(2) @@ -861,7 +861,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .nOut(7) .kernelSize(2, 2) .dataFormat(format) - .setInputSize(4, 4) + .inputSize(new int[]{4, 4}) .convolutionMode(ConvolutionMode.Strict) .hasBias(false) .stride(1, 1) @@ -873,7 +873,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .nIn(7) .nOut(2) .kernelSize(2, 2) - .dataFormat(format) + .convFormat(format) .stride(1, 1) .padding(0, 0) .build()) // (3-2+0)/1+1 = 2 @@ -959,7 +959,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { ConvolutionLayer.builder() .kernelSize(2, 2) .stride(1, 1) - .dataFormat(format) + .convFormat(format) .padding(0, 0) .nIn(inputDepth) .nOut(2) @@ -970,7 +970,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .nIn(2) .nOut(2) .kernelSize(2, 2) - .dataFormat(format) + .convFormat(format) .stride(1, 1) .padding(0, 0) .build()) // (4-2+0)/1+1 = 3 @@ -980,7 +980,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .nIn(2) .nOut(2) .kernelSize(2, 2) - .dataFormat(format) + .convFormat(format) .stride(1, 1) .padding(0, 0) .build()) // (3-2+0)/1+1 = 2 @@ -1076,7 +1076,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { ConvolutionLayer.builder() .name("layer 0") .kernelSize(k, k) - .dataFormat(format) + .convFormat(format) .stride(1, 1) .padding(0, 0) .nIn(inputDepth) @@ -1097,7 +1097,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .nIn(2) .nOut(2) .kernelSize(k, k) - .dataFormat(format) + .convFormat(format) .stride(1, 1) .padding(0, 0) .build()) @@ -1181,7 +1181,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { ConvolutionLayer.builder() .name("layer 0") .kernelSize(k, k) - .dataFormat(format) + .convFormat(format) .stride(stride, stride) .padding(0, 0) .nIn(inputDepth) @@ -1297,7 +1297,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .layer( 0, ConvolutionLayer.builder(kernel, stride, padding) - .dataFormat(format) + .convFormat(format) .nIn(inputDepth) .nOut(3) .build()) // output: (6-2+0)/1+1 = 5 @@ -1307,7 +1307,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { ConvolutionLayer.builder(kernel, stride, padding) .nIn(3) .nOut(3) - .dataFormat(format) + .convFormat(format) .build()) // output: (6-2+0)/1+1 = 5 .layer( 3, @@ -1436,7 +1436,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .name("deconvolution_2D_layer") .kernelSize(k, k) .stride(s, s) - .dataFormat(format) + .convFormat(format) .dilation(d, d) .convolutionMode(cm) .nIn(inputDepth) @@ -1530,7 +1530,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .stride(s, s) .dilation(d, d) .depthMultiplier(3) - .dataFormat(format) + .convFormat(format) .nIn(inputDepth) .nOut(2) .build()) @@ -1621,7 +1621,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .kernelSize(k, k) .stride(s, s) .dilation(d, d) - .dataFormat(format) + .convFormat(format) .nIn(inputDepth) .nOut(2) .build()); @@ -1642,7 +1642,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .kernelSize(k, k) .stride(s, s) .dilation(d, d) - .dataFormat(format) + .convFormat(format) .build()); } @@ -1732,14 +1732,14 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .list() .layer( ConvolutionLayer.builder(kernel, stride, padding) - .dataFormat(format) + .convFormat(format) .nIn(inputDepth) .nOut(2) .build()) // output: (6-2+0)/1+1 = 5 .layer(Cropping2D.builder(crop).dataFormat(format).build()) .layer( ConvolutionLayer.builder(kernel, stride, padding) - .dataFormat(format) + .convFormat(format) .nIn(2) .nOut(2) .build()) @@ -1857,7 +1857,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .stride(1, 1) .nIn(nIn) .nOut(nIn) - .dataFormat(format) + .convFormat(format) .build()) .layer( DepthwiseConvolution2D.builder() diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java index 1b6b9ab85..d2fcd533c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CapsnetGradientCheckTest.java @@ -82,7 +82,7 @@ public class CapsnetGradientCheckTest extends BaseDL4JTest { .seed(123) .updater(new NoOp()) .dist(new UniformDistribution(-6, 6)) - .layer(new PrimaryCapsules.Builder(primaryCapsDim, primarpCapsChannel) + .layer(PrimaryCapsules.builder(primaryCapsDim, primarpCapsChannel) .kernelSize(3, 3) .stride(2, 2) .build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java index 9f84c6cd7..cb18de167 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java @@ -131,7 +131,7 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest { .updater(new NoOp()) .dist(new NormalDistribution(0, 1.0)).seed(12345L).list() .layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1) - .dataFormat(nchw ? CNN2DFormat.NCHW : CNN2DFormat.NHWC) + .convFormat(nchw ? CNN2DFormat.NCHW : CNN2DFormat.NHWC) .nOut(layerDepth) .build()) .layer(1, GlobalPoolingLayer.builder().poolingType(pt).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java index 1be8f012f..05deaa3d3 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java @@ -345,10 +345,10 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest { .dist(new NormalDistribution(0, 0.1)) .updater(new NoOp()).graphBuilder().addInputs("input") .addLayer("l1", ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).padding(0, 0) - .dataFormat(format) + .convFormat(format) .nIn(2).nOut(2).activation(Activation.TANH).build(), "input") .addLayer("l2", ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1) - .padding(0, 0).dataFormat(format) + .padding(0, 0).convFormat(format) .nIn(2).nOut(2).activation(Activation.TANH).build(), "input") .addVertex("merge", new MergeVertex(), "l1", "l2") .addLayer("outputLayer", diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java index c258aefa4..3419f4884 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java @@ -116,7 +116,7 @@ public class RnnGradientChecks extends BaseDL4JTest { .layer(Bidirectional.builder(m, (simple ? SimpleRnn.builder().nIn(3).nOut(3).hasLayerNorm(hasLayerNorm).build() : - LSTM.builder().nIn(3).nOut(3).build()))) + LSTM.builder().nIn(3).nOut(3).build())).build()) .layer(RnnOutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java index 80a07a33f..0ea61a969 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/YoloGradientCheckTests.java @@ -115,12 +115,11 @@ public class YoloGradientCheckTests extends BaseDL4JTest { .activation(a) .l1(l1[i]).l2(l2[i]) .convolutionMode(ConvolutionMode.Same) - .list() .layer(ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1) - .dataFormat(format) + .convFormat(format) .nIn(depthIn).nOut(yoloDepth).build())//output: (5-2+0)/1+1 = 4 - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPrior) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPrior) .build()) .inputType(InputType.convolutional(h, w, depthIn, format)) .build(); @@ -237,8 +236,8 @@ public class YoloGradientCheckTests extends BaseDL4JTest { .layer(ConvolutionLayer.builder().kernelSize(3,3).stride(1,1).nOut(4).build()) .layer(SubsamplingLayer.builder().kernelSize(2,2).stride(2,2).build()) .layer(ConvolutionLayer.builder().activation(Activation.IDENTITY).kernelSize(3,3).stride(1,1).nOut(depthOut).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPriors) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPriors) .build()) .inputType(InputType.convolutional(h,w,c)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java index 98d14efe0..9f1ebfd77 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java @@ -437,7 +437,7 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest { .layer(DenseLayer.builder().nIn(10).nOut(10).build()) .layer(!lossLayer ? OutputLayer.builder().nIn(10).nOut(nOut[i]) .activation(activations[i]).lossFunction(lf[i]).build() - : LossLayer.builder().lossFunction().activation(activations[i]).lossFunction(lf[i]) + : LossLayer.builder().activation(activations[i]).lossFunction(lf[i].getILossFunction()) .build()) .validateOutputLayerConfig(validate) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java index 226be92b6..616052892 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java @@ -48,6 +48,7 @@ import org.nd4j.linalg.learning.config.RmsProp; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; +import java.util.List; import java.util.Map; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -230,7 +231,8 @@ public class TestConstraints extends BaseDL4JTest { .biasInit(0.2) .layer(DenseLayer.builder().nIn(12).nOut(10) - .constrainAllParameters(lc).build()) + .allParamConstraints(List.of(lc)) + .build()) .layer(OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(8).build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java index 92aad1896..ca990ac05 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/conf/graph/ElementWiseVertexTest.java @@ -201,21 +201,21 @@ public class ElementWiseVertexTest extends BaseDL4JTest { .addInputs("input1", "input2", "input3") .addLayer("dense1", DenseLayer.builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + .activation(Activation.TANH).build(), "input1") .addLayer("dense2", DenseLayer.builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + .activation(Activation.TANH).build(), "input2") .addLayer("dense3", DenseLayer.builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + .activation(Activation.TANH).build(), "input3") .addVertex("elementwiseAdd", new ElementWiseVertex(ElementWiseVertex.Op.Add), "dense1", "dense2", "dense3") .addLayer("output", OutputLayer.builder().nIn(midsz).nOut(outputsz) - .activation(new ActivationSigmoid()) + .activation(Activation.SIGMOID) .lossFunction(LossFunction.MSE).build(), "elementwiseAdd") .setOutputs("output").build(); @@ -377,21 +377,21 @@ public class ElementWiseVertexTest extends BaseDL4JTest { .addInputs("input1", "input2", "input3") .addLayer("dense1", DenseLayer.builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + .activation(Activation.TANH).build(), "input1") .addLayer("dense2", DenseLayer.builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + .activation(Activation.TANH).build(), "input2") .addLayer("dense3", DenseLayer.builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + .activation(Activation.TANH).build(), "input3") .addVertex("elementwiseProduct", new ElementWiseVertex(ElementWiseVertex.Op.Product), "dense1", "dense2", "dense3") .addLayer("output", OutputLayer.builder().nIn(midsz).nOut(outputsz) - .activation(new ActivationSigmoid()) + .activation(Activation.SIGMOID) .lossFunction(LossFunction.MSE).build(), "elementwiseProduct") .setOutputs("output").build(); @@ -552,17 +552,17 @@ public class ElementWiseVertexTest extends BaseDL4JTest { .addInputs("input1", "input2") .addLayer("dense1", DenseLayer.builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + .activation(Activation.TANH).build(), "input1") .addLayer("dense2", DenseLayer.builder().nIn(featuresz).nOut(midsz) - .activation(new ActivationTanH()).build(), + .activation(Activation.TANH).build(), "input2") .addVertex("elementwiseSubtract", new ElementWiseVertex(ElementWiseVertex.Op.Subtract), "dense1", "dense2") .addLayer("output", OutputLayer.builder().nIn(midsz).nOut(outputsz) - .activation(new ActivationSigmoid()) + .activation(Activation.SIGMOID) .lossFunction(LossFunction.MSE).build(), "elementwiseSubtract") .setOutputs("output").build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java index ec83bd083..99162a472 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java @@ -493,7 +493,7 @@ public class DTypeTests extends BaseDL4JTest { secondLast = ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(3).activation(Activation.TANH).build(); break; case 4: - ol = new Yolo2OutputLayer.Builder().boundingBoxPriors(Nd4j.create(new double[][]{{1.0, 1.0}, {2.0, 2.0}}).castTo(networkDtype)).build(); + ol = Yolo2OutputLayer.builder().boundingBoxes(Nd4j.create(new double[][]{{1.0, 1.0}, {2.0, 2.0}}).castTo(networkDtype)).build(); secondLast = ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nOut(14).activation(Activation.TANH).build(); break; default: @@ -817,8 +817,8 @@ public class DTypeTests extends BaseDL4JTest { .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) .list() - .layer(new SpaceToBatchLayer.Builder().blocks(1, 1).build()) - .layer(new SpaceToDepthLayer.Builder().blocks(2).build()) + .layer(SpaceToBatchLayer.builder().blockSize(1, 1).build()) + .layer(SpaceToDepthLayer.builder().blockSize(2).build()) .layer(OutputLayer.builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .inputType(InputType.convolutional(28, 28, 5)) .build(); @@ -907,7 +907,7 @@ public class DTypeTests extends BaseDL4JTest { .layer(Bidirectional.builder(LSTM.builder().nIn(5).nOut(5).activation(Activation.TANH).build()).build()) .layer(new TimeDistributed(DenseLayer.builder().nIn(10).nOut(5).activation(Activation.TANH).build())) .layer(SimpleRnn.builder().nIn(5).nOut(5).build()) - .layer(new MaskZeroLayer.Builder().underlying(SimpleRnn.builder().nIn(5).nOut(5).build()).maskValue(0.0).build()) + .layer(MaskZeroLayer.builder().underlying(SimpleRnn.builder().nIn(5).nOut(5).build()).maskingValue(0.0).build()) .layer(secondLast) .layer(ol) .build(); @@ -986,7 +986,7 @@ public class DTypeTests extends BaseDL4JTest { .updater(new NoOp()) .dist(new UniformDistribution(-6, 6)) - .layer(new PrimaryCapsules.Builder(primaryCapsDim, primarpCapsChannel) + .layer(PrimaryCapsules.builder(primaryCapsDim, primarpCapsChannel) .kernelSize(3, 3) .stride(2, 2) .build()) @@ -1400,9 +1400,9 @@ public class DTypeTests extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .list() .layer(LSTM.builder().nOut(layerSize).build()) - .layer(new SelfAttentionLayer.Builder().nOut(8).nHeads(2).projectInput(true).build()) - .layer(new LearnedSelfAttentionLayer.Builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build()) - .layer(new RecurrentAttentionLayer.Builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) + .layer(SelfAttentionLayer.builder().nOut(8).nHeads(2).projectInput(true).build()) + .layer(LearnedSelfAttentionLayer.builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build()) + .layer(RecurrentAttentionLayer.builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build()) .layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java index 165582c16..c3d030e45 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestCompGraphCNN.java @@ -161,7 +161,7 @@ public class TestCompGraphCNN extends BaseDL4JTest { imageHeight)) .addLayer("conv1", ConvolutionLayer.builder() .kernelSize(kernelHeight, kernelWidth).stride(1, 1) - .dataFormat(CNN2DFormat.NCHW) + .convFormat(CNN2DFormat.NCHW) .nIn(nChannels).nOut(2).weightInit(WeightInit.XAVIER) .activation(Activation.RELU).build(), "input") .addLayer("pool1", diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java index 76e33fa67..d57dd17d0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java @@ -1163,7 +1163,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { "act") .addLayer("drop", DropoutLayer.builder(0.5).build(), "pool") .addLayer("dense", DenseLayer.builder().nIn(1).nOut(1).build(), "drop") - .addLayer("loss", LossLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .addLayer("loss", LossLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT.getILossFunction()) .build(), "dense") .allowDisconnected(true) .setOutputs("loss").build(); @@ -1457,7 +1457,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .graphBuilder() .addInputs("in") .layer("0", SubsamplingLayer.builder().kernelSize(2,2).stride(2,2).build(), "in") - .layer("1", LossLayer.builder().lossFunction().activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE).build(), "0") + .layer("1", LossLayer.builder().activation(Activation.SIGMOID).lossFunction(LossFunctions.LossFunction.MSE.getILossFunction()).build(), "0") .setOutputs("1") .setInputTypes(InputType.convolutionalFlat(28,28,1)) .build(); @@ -1791,7 +1791,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .nIn(10).nOut(5) .activation(Activation.TANH) .dropOut(new GaussianNoise(0.05)) - .build()) + .build()).build() ,"merge") .addLayer("out1", RnnOutputLayer.builder().activation(Activation.SOFTMAX) @@ -1986,10 +1986,10 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .updater(new Adam()) .graphBuilder() .addInputs("x_emb") - .addLayer("agg_lstm", Bidirectional.builder(CONCAT, LSTM.builder().nOut(hiddenSize/2).build()), "x_emb") + .addLayer("agg_lstm", Bidirectional.builder(CONCAT, LSTM.builder().nOut(hiddenSize/2).build()).build(), "x_emb") .addLayer("agg_att", DenseLayer.builder().nIn(100).nOut(1).activation(Activation.SOFTMAX).build(), "agg_lstm") .addVertex("att", new PreprocessorVertex(new ComposableInputPreProcessor(new FeedForwardToRnnPreProcessor(), new PermutePreprocessor(0,2,1), new RnnToFeedForwardPreProcessor())), "agg_att") - .addLayer("att_repeat", new RepeatVector.Builder(hiddenSize).build(),"att") + .addLayer("att_repeat", RepeatVector.builder().repetitionFactor(hiddenSize).build(),"att") .addVertex("att_trans", new PreprocessorVertex(new PermutePreprocessor(0, 2, 1)), "att_repeat") .addVertex("mult", new ElementWiseVertex(ElementWiseVertex.Op.Product), "agg_lstm", "att_trans") .addLayer("sum", GlobalPoolingLayer.builder().build(), "mult") @@ -2197,16 +2197,16 @@ public class TestComputationGraphNetwork extends BaseDL4JTest { .addInputs("in") .layer("l0", ConvolutionLayer.builder() .nOut(16) - .dataFormat(CNN2DFormat.NHWC) + .convFormat(CNN2DFormat.NHWC) .kernelSize(2,2).stride(1,1) .build(), "in") .layer("l1", ConvolutionLayer.builder() .nOut(8) - .dataFormat(CNN2DFormat.NHWC) + .convFormat(CNN2DFormat.NHWC) .kernelSize(2,2).stride(1,1) .build(), "in") .addVertex("merge", new MergeVertex(), "l0", "l1") - .layer("out", CnnLossLayer.builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).build(), "merge") + .layer("out", CnnLossLayer.builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE.getILossFunction()).build(), "merge") .setOutputs("out") .setInputTypes(InputType.convolutional(32, 32, 3, CNN2DFormat.NHWC)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java index 8966dda90..ae4d3520b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/ActivationLayerTest.java @@ -357,7 +357,7 @@ public class ActivationLayerTest extends BaseDL4JTest { .activation(Activation.RATIONALTANH) .layer(DenseLayer.builder().nIn(10).nOut(10).build()) - .layer(ActivationLayer.builder()) + .layer(ActivationLayer.builder().build()) .layer(ActivationLayer.builder().build()) .layer(ActivationLayer.builder().activation(Activation.ELU).build()) .layer( @@ -404,7 +404,7 @@ public class ActivationLayerTest extends BaseDL4JTest { .graphBuilder() .addInputs("in") .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in") - .addLayer("1", ActivationLayer.builder(), "0") + .addLayer("1", ActivationLayer.builder().build(), "0") .addLayer("2", ActivationLayer.builder().build(), "1") .addLayer("3", ActivationLayer.builder().activation(Activation.ELU).build(), "2") .addLayer( diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java index 470f5f63d..fc92a8c18 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/CapsNetMNISTTest.java @@ -63,7 +63,7 @@ public class CapsNetMNISTTest extends BaseDL4JTest { .kernelSize(9, 9) .stride(3, 3) .build()) - .layer(new PrimaryCapsules.Builder(8, 8) + .layer(PrimaryCapsules.builder(8, 8) .kernelSize(7, 7) .stride(2, 2) .build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java index 12f63e7ec..d8bbe5ae5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/capsule/PrimaryCapsulesTest.java @@ -44,7 +44,7 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { @Test public void testOutputType(){ - PrimaryCapsules layer = new PrimaryCapsules.Builder(8, 8) + PrimaryCapsules layer = PrimaryCapsules.builder(8, 8) .kernelSize(7, 7) .stride(2, 2) .build(); @@ -57,7 +57,7 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { @Test public void testInputType(){ - PrimaryCapsules layer = new PrimaryCapsules.Builder(8, 8) + PrimaryCapsules layer = PrimaryCapsules.builder(8, 8) .kernelSize(7, 7) .stride(2, 2) .build(); @@ -72,7 +72,7 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { @Test public void testConfig(){ - PrimaryCapsules layer1 = new PrimaryCapsules.Builder(8, 10) + PrimaryCapsules layer1 = PrimaryCapsules.builder(8, 10) .kernelSize(5, 5) .stride(4, 4) .useLeakyReLU(0.5) @@ -84,22 +84,22 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { assertArrayEquals(new int[]{4, 4}, layer1.getStride()); assertArrayEquals(new int[]{0, 0}, layer1.getPadding()); assertArrayEquals(new int[]{1, 1}, layer1.getDilation()); - assertTrue(layer1.isUseRelu()); - assertEquals(0.5, layer1.getLeak(), 0.001); + assertTrue(layer1.isUseRelU()); + assertEquals(0.5, layer1.getUseLeakyReLU(), 0.001); - PrimaryCapsules layer2 = new PrimaryCapsules.Builder(8, 10) + PrimaryCapsules layer2 = PrimaryCapsules.builder(8, 10) .kernelSize(5, 5) .stride(4, 4) .build(); - assertFalse(layer2.isUseRelu()); + assertFalse(layer2.isUseRelU()); - PrimaryCapsules layer3 = new PrimaryCapsules.Builder(8, 10) + PrimaryCapsules layer3 = PrimaryCapsules.builder(8, 10) .kernelSize(5, 5) .stride(4, 4) .useReLU() .build(); - assertTrue(layer3.isUseRelu()); - assertEquals(0, layer3.getLeak(), 0.001); + assertTrue(layer3.isUseRelU()); + assertEquals(0, layer3.getUseLeakyReLU(), 0.001); } @@ -108,7 +108,7 @@ public class PrimaryCapsulesTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(123) .list() - .layer(new PrimaryCapsules.Builder(8, 10) + .layer(PrimaryCapsules.builder(8, 10) .kernelSize(5, 5) .stride(4, 4) .useLeakyReLU(0.5) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java index 829d79500..44d7380fd 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java @@ -561,7 +561,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { .kernelSize(3, 3) .stride(2, 2) .activation(Activation.TANH) - .dataFormat(format) + .convFormat(format) .nOut(3) .helperAllowFallback(false) .build(), format, cm, null); @@ -685,14 +685,14 @@ public class ConvDataFormatTests extends BaseDL4JTest { return getNetWithLayer(Deconvolution2D.builder().nOut(2) .activation(Activation.TANH) .kernelSize(2,2) - .dataFormat(format) + .convFormat(format) .stride(2,2) .build(), format, cm, null); } else { return getNetWithLayer(Deconvolution2D.builder().nOut(2) .activation(Activation.TANH) .kernelSize(2,2) - .dataFormat(format) + .convFormat(format) .stride(2,2) .build(), format, cm, null); } @@ -715,26 +715,26 @@ public class ConvDataFormatTests extends BaseDL4JTest { private MultiLayerNetwork getSpaceToDepthNet(CNN2DFormat format, boolean setOnLayerAlso) { if (setOnLayerAlso) { - return getNetWithLayer(new SpaceToDepthLayer.Builder() - .blocks(2) + return getNetWithLayer(SpaceToDepthLayer.builder() + .blockSize(2) .dataFormat(format) .build(), format, ConvolutionMode.Same, null); } else { - return getNetWithLayer(new SpaceToDepthLayer.Builder() - .blocks(2) + return getNetWithLayer(SpaceToDepthLayer.builder() + .blockSize(2) .build(), format, ConvolutionMode.Same, null); } } private MultiLayerNetwork getSpaceToBatchNet(CNN2DFormat format, boolean setOnLayerAlso) { if (setOnLayerAlso) { - return getNetWithLayer(new SpaceToBatchLayer.Builder() - .blocks(2, 2) + return getNetWithLayer(SpaceToBatchLayer.builder() + .blockSize(2, 2) .dataFormat(format) .build(), format, ConvolutionMode.Same, InputType.convolutional(16, 16, 3, format)); } else { - return getNetWithLayer(new SpaceToBatchLayer.Builder() - .blocks(2, 2) + return getNetWithLayer(SpaceToBatchLayer.builder() + .blockSize(2, 2) .build(), format, ConvolutionMode.Same, InputType.convolutional(16, 16, 3, format)); } } @@ -807,7 +807,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { .kernelSize(3, 3) .stride(2, 2) .activation(Activation.TANH) - .dataFormat(format) + .convFormat(format) .nOut(3) .helperAllowFallback(false) .build()); @@ -988,7 +988,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { switch (i){ case 0: - b.layer(ConvolutionLayer.builder().kernelSize(2,2).nIn(3).nOut(3).dataFormat(df).build()); + b.layer(ConvolutionLayer.builder().kernelSize(2,2).nIn(3).nOut(3).convFormat(df).build()); b.inputType(InputType.convolutional(12,12,3,df)); break; case 1: @@ -996,7 +996,7 @@ public class ConvDataFormatTests extends BaseDL4JTest { b.inputType(InputType.convolutional(12,12,3,df)); break; case 2: - b.layer(Deconvolution2D.builder().dataFormat(df).kernelSize(2,2).nIn(3).nOut(3).build()); + b.layer(Deconvolution2D.builder().convFormat(df).kernelSize(2,2).nIn(3).nOut(3).build()); b.inputType(InputType.convolutional(12,12,3,df)); break; case 3: diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java index ba80796a3..f608d9341 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerSetupTest.java @@ -27,6 +27,7 @@ import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.api.OptimizationAlgorithm; +import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration.NeuralNetConfigurationBuilder; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -370,7 +371,7 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() .layer(ConvolutionLayer.builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) //(28-2+0)/2+1 = 14 - .layer(new SpaceToBatchLayer.Builder(blocks).build()) // Divide space dimensions by blocks, i.e. 14/2 = 7 + .layer(SpaceToBatchLayer.builder(blocks).build()) // Divide space dimensions by blocks, i.e. 14/2 = 7 .layer(OutputLayer.builder().nOut(3).activation(Activation.SOFTMAX).build()) .inputType(InputType.convolutional(28, 28, 1)); @@ -389,11 +390,11 @@ public class ConvolutionLayerSetupTest extends BaseDL4JTest { int blocks = 2; - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().list() + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder() //(28-2+0)/2+1 = 14 -> 14x14x3 out .layer(ConvolutionLayer.builder(2, 2).padding(0, 0).stride(2, 2).nIn(1).nOut(3).build()) // Divide space dimensions by blocks, i.e. 14/2 = 7 -> 7x7x12 out (3x2x2 depth) - .layer(new SpaceToDepthLayer.Builder(blocks, SpaceToDepthLayer.DataFormat.NCHW).build()) + .layer(SpaceToDepthLayer.builder().blockSize(blocks).dataFormat(CNN2DFormat.NCHW).build()) .layer(OutputLayer.builder().nIn(3 * 2 * 2).nOut(3).activation(Activation.SOFTMAX).build()) // nIn of the next layer gets multiplied by 2*2. .inputType(InputType.convolutional(28, 28, 1)); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java index 353f0cfc2..87814e038 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayerTest.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.layers.convolution; +import static org.junit.jupiter.api.Assertions.*; + +import java.util.List; import lombok.val; import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; @@ -32,9 +35,9 @@ import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.conf.layers.Convolution1DLayer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; -import org.deeplearning4j.nn.conf.layers.*; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; @@ -55,778 +58,987 @@ import org.nd4j.linalg.learning.config.Nesterovs; import org.nd4j.linalg.lossfunctions.LossFunctions; import org.nd4j.linalg.lossfunctions.impl.LossMCXENT; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.*; - /** * @author Adam Gibson */ public class ConvolutionLayerTest extends BaseDL4JTest { - @Override - public DataType getDataType(){ - return DataType.FLOAT; + private static final int kH = 2; + private static final int kW = 2; + private static final int[] strides = {1, 1}; + private static final int[] pad = {0, 0}; + private static final int miniBatch = 2; + private static final int inDepth = 2; + private static final int height = 3; + private static final int width = 3; + private static final int outW = 2; + private static final int outH = 2; + + ////////////////////////////////////////////////////////////////////////////////// + + private static Layer getCNNConfig( + int nIn, int nOut, int[] kernelSize, int[] stride, int[] padding) { + + ConvolutionLayer layer = + ConvolutionLayer.builder(kernelSize, stride, padding) + .nIn(nIn) + .nOut(nOut) + .activation(Activation.SIGMOID) + .build(); + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(layer).build(); + + val numParams = conf.getFlattenedLayerConfigurations().get(0).initializer().numParams(conf); + INDArray params = Nd4j.create(1, numParams); + return conf.getFlattenedLayerConfigurations() + .get(0) + .instantiate(conf, null, 0, params, true, params.dataType()); + } + + private static INDArray getInput() { + + /* + ----- Input images ----- + example 0: + channels 0 channels 1 + [ 0 1 2 [ 9 10 11 + 3 4 5 12 13 14 + 6 7 8] 15 16 17] + example 1: + [18 19 20 [27 28 29 + 21 22 23 30 31 32 + 24 25 26] 33 34 35] + */ + + INDArray input = Nd4j.create(new int[] {miniBatch, inDepth, height, width}, 'c'); + input.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}})); + input.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}})); + input.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}})); + input.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{27, 28, 29}, {30, 31, 32}, {33, 34, 35}})); + + return input; + } + + private static MultiLayerNetwork getCNNMLNConfig(boolean backprop, boolean pretrain) { + int outputNum = 10; + int seed = 123; + + NeuralNetConfiguration.NeuralNetConfigurationBuilder conf = + NeuralNetConfiguration.builder() + .seed(seed) + .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) + .list() + .layer(0, ConvolutionLayer.builder(new int[] {10, 10}).nOut(6).build()) + .layer( + 1, + SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}) + .stride(1, 1) + .build()) + .layer( + 2, + OutputLayer.builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) + .nOut(outputNum) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .build()) + .inputType(InputType.convolutionalFlat(28, 28, 1)); + + MultiLayerNetwork model = new MultiLayerNetwork(conf.build()); + model.init(); + + return model; + } + + @Override + public DataType getDataType() { + return DataType.FLOAT; + } + + @Test + public void testTwdFirstLayer() throws Exception { + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) + .l2(2e-4) + .updater(new Nesterovs(0.9)) + .dropOut(0.5) + .list() + .layer( + 0, + ConvolutionLayer.builder(8, 8) // 16 filters kernel size 8 stride 4 + .stride(4, 4) + .nOut(16) + .dropOut(0.5) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + ConvolutionLayer.builder(4, 4) // 32 filters kernel size 4 stride 2 + .stride(2, 2) + .nOut(32) + .dropOut(0.5) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 2, + DenseLayer.builder() // fully connected with 256 rectified units + .nOut(256) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .dropOut(0.5) + .build()) + .layer( + 3, + OutputLayer.builder(LossFunctions.LossFunction.SQUARED_LOSS) // output layer + .nOut(10) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .build()) + .inputType(InputType.convolutionalFlat(28, 28, 1)); + + DataSetIterator iter = new MnistDataSetIterator(10, 10); + NeuralNetConfiguration conf = builder.build(); + MultiLayerNetwork network = new MultiLayerNetwork(conf); + network.init(); + DataSet ds = iter.next(); + for (int i = 0; i < 5; i++) { + network.fit(ds); } + } - @Test - public void testTwdFirstLayer() throws Exception { - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(123) - .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4) - .updater(new Nesterovs(0.9)).dropOut(0.5) - .list().layer(0, - ConvolutionLayer.builder(8, 8) //16 filters kernel size 8 stride 4 - .stride(4, 4).nOut(16).dropOut(0.5) - .activation(Activation.RELU).weightInit( - WeightInit.XAVIER) - .build()) - .layer(1, ConvolutionLayer.builder(4, 4) //32 filters kernel size 4 stride 2 - .stride(2, 2).nOut(32).dropOut(0.5).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(2, DenseLayer.builder() //fully connected with 256 rectified units - .nOut(256).activation(Activation.RELU).weightInit(WeightInit.XAVIER) - .dropOut(0.5).build()) - .layer(3, OutputLayer.builder(LossFunctions.LossFunction.SQUARED_LOSS) //output layer - .nOut(10).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()) - .inputType(InputType.convolutionalFlat(28, 28, 1)); + @Test + public void testCNNSubComboWithMixedHW() { + int imageHeight = 20; + int imageWidth = 23; + int nChannels = 1; + int classes = 2; + int numSamples = 200; - DataSetIterator iter = new MnistDataSetIterator(10, 10); - NeuralNetConfiguration conf = builder.build(); - MultiLayerNetwork network = new MultiLayerNetwork(conf); - network.init(); - DataSet ds = iter.next(); - for( int i=0; i<5; i++ ) { - network.fit(ds); - } - } + int kernelHeight = 3; + int kernelWidth = 3; - @Test - public void testCNNSubComboWithMixedHW() { - int imageHeight = 20; - int imageWidth = 23; - int nChannels = 1; - int classes = 2; - int numSamples = 200; + DataSet trainInput; + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .list() + .layer( + 0, + ConvolutionLayer.builder(kernelHeight, kernelWidth) + .stride(1, 1) + .nOut(2) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + SubsamplingLayer.builder() + .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()) + .kernelSize(imageHeight - kernelHeight, 1) + .stride(1, 1) + .build()) + .layer( + 2, + OutputLayer.builder() + .nOut(classes) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .build()) + .inputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); - int kernelHeight = 3; - int kernelWidth = 3; + NeuralNetConfiguration conf = builder.build(); + MultiLayerNetwork model = new MultiLayerNetwork(conf); + model.init(); - DataSet trainInput; - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = - NeuralNetConfiguration.builder() - .seed(123) - .list() - .layer(0, ConvolutionLayer.builder(kernelHeight, kernelWidth).stride(1, 1) - .nOut(2).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, SubsamplingLayer.builder() - .poolingType(SubsamplingLayer.PoolingType.MAX.toPoolingType()) - .kernelSize(imageHeight - kernelHeight, 1).stride(1, 1).build()) - .layer(2, OutputLayer.builder().nOut(classes).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) - .inputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); + INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); + INDArray emptyLables = Nd4j.zeros(numSamples, classes); - NeuralNetConfiguration conf = builder.build(); - MultiLayerNetwork model = new MultiLayerNetwork(conf); - model.init(); + trainInput = new DataSet(emptyFeatures, emptyLables); + model.fit(trainInput); + } - INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); - INDArray emptyLables = Nd4j.zeros(numSamples, classes); + ////////////////////////////////////////////////////////////////////////////////// - trainInput = new DataSet(emptyFeatures, emptyLables); - model.fit(trainInput); - } + @Test + public void testCausal1d() { + Nd4j.getEnvironment().setVerbose(true); + Nd4j.getEnvironment().setDebug(true); + // See: Fixes: https://github.com/eclipse/deeplearning4j/issues/9060 + double learningRate = 1e-3; + long seed = 123; + long timeSteps = 72; + long vectorLength = 64; + long batchSize = 1; + INDArray arr = Nd4j.randn(batchSize, vectorLength, timeSteps); - @Test - public void testCausal1d() { - Nd4j.getEnvironment().setVerbose(true); - Nd4j.getEnvironment().setDebug(true); - //See: Fixes: https://github.com/eclipse/deeplearning4j/issues/9060 - double learningRate = 1e-3; - long seed = 123; - long timeSteps = 72; - long vectorLength = 64; - long batchSize = 1; - INDArray arr = Nd4j.randn(batchSize,vectorLength,timeSteps); + NeuralNetConfiguration build = + NeuralNetConfiguration.builder() + .seed(seed) + .activation(Activation.RELU) + .weightInit(WeightInit.NORMAL) // better init + .updater(new Adam(learningRate)) + .list() + // block 1 + .layer( + Convolution1D.builder() + .kernelSize(2) + .rnnDataFormat(RNNFormat.NCW) + .stride(1) + .nOut(14) + .convolutionMode(ConvolutionMode.Causal) + .dilation(4) + .build()) + .layer( + RnnLossLayer.builder() + .dataFormat(RNNFormat.NCW) + .activation(new ActivationSoftmax()) + .lossFunction(new LossMCXENT()) + .build()) + .inputType(InputType.recurrent(vectorLength, timeSteps, RNNFormat.NCW)) + .build(); - NeuralNetConfiguration build = NeuralNetConfiguration.builder().seed(seed) - .activation(Activation.RELU) - .weightInit(WeightInit.NORMAL) // better init - .updater(new Adam(learningRate)) - .list() - // block 1 - .layer(Convolution1D.builder() - .kernelSize(2) - .rnnDataFormat(RNNFormat.NCW) - .stride(1) - .nOut(14) - .convolutionMode(ConvolutionMode.Causal) - .dilation(4) - .build()) - .layer(RnnLossLayer.builder().dataFormat(RNNFormat.NCW) - .activation(new ActivationSoftmax()) - .lossFunction(new LossMCXENT()).build()) - .inputType(InputType.recurrent(vectorLength,timeSteps,RNNFormat.NCW)) - .build(); + MultiLayerNetwork network = new MultiLayerNetwork(build); + network.init(); + INDArray output = network.output(arr); + assertArrayEquals(new long[] {1, 14, 72}, output.shape()); + System.out.println(output); + } - MultiLayerNetwork network = new MultiLayerNetwork(build); - network.init(); - INDArray output = network.output(arr); - assertArrayEquals(new long[]{1,14,72},output.shape()); - System.out.println(output); - } + @Test + public void testCNNTooLargeKernel() { + assertThrows( + DL4JException.class, + () -> { + int imageHeight = 20; - @Test - public void testCNNTooLargeKernel() { - assertThrows(DL4JException.class, () -> { - int imageHeight = 20; + int imageWidth = 23; + int nChannels = 1; + int classes = 2; + int numSamples = 200; - int imageWidth = 23; - int nChannels = 1; - int classes = 2; - int numSamples = 200; + int kernelHeight = imageHeight; + int kernelWidth = imageWidth + 1; - int kernelHeight = imageHeight; - int kernelWidth = imageWidth + 1; + DataSet trainInput; + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .list() + .layer( + 0, + ConvolutionLayer.builder( + kernelHeight, + kernelWidth) // (img-kernel+2*padding)/stride + 1: must be >= 1. + // Therefore: with p=0, kernel <= img size + .stride(1, 1) + .nOut(2) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + OutputLayer.builder() + .nOut(classes) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .build()) + .inputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); - DataSet trainInput; - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = - NeuralNetConfiguration.builder() - .seed(123) - .list() - .layer(0, ConvolutionLayer.builder(kernelHeight, kernelWidth) //(img-kernel+2*padding)/stride + 1: must be >= 1. Therefore: with p=0, kernel <= img size - .stride(1, 1).nOut(2).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, OutputLayer.builder().nOut(classes).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) - .inputType(InputType.convolutionalFlat(imageHeight, imageWidth, nChannels)); + NeuralNetConfiguration conf = builder.build(); + MultiLayerNetwork model = new MultiLayerNetwork(conf); + model.init(); - NeuralNetConfiguration conf = builder.build(); - MultiLayerNetwork model = new MultiLayerNetwork(conf); - model.init(); + INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); + INDArray emptyLables = Nd4j.zeros(numSamples, classes); - INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); - INDArray emptyLables = Nd4j.zeros(numSamples, classes); - - trainInput = new DataSet(emptyFeatures, emptyLables); - model.fit(trainInput); + trainInput = new DataSet(emptyFeatures, emptyLables); + model.fit(trainInput); }); - } + } - @Test - public void testCNNZeroStride() { - assertThrows(Exception.class, () -> { - int imageHeight = 20; - int imageWidth = 23; - int nChannels = 1; - int classes = 2; - int numSamples = 200; + @Test + public void testCNNZeroStride() { + assertThrows( + Exception.class, + () -> { + int imageHeight = 20; + int imageWidth = 23; + int nChannels = 1; + int classes = 2; + int numSamples = 200; - int kernelHeight = imageHeight; - int kernelWidth = imageWidth; + int kernelHeight = imageHeight; + int kernelWidth = imageWidth; - DataSet trainInput; - NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = - NeuralNetConfiguration.builder() - .seed(123) - .list() - .layer(0, ConvolutionLayer.builder(kernelHeight, kernelWidth).stride(1, 0) - .nOut(2).activation(Activation.RELU) - .weightInit(WeightInit.XAVIER).build()) - .layer(1, OutputLayer.builder().nOut(classes).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) + DataSet trainInput; + NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = + NeuralNetConfiguration.builder() + .seed(123) + .list() + .layer( + 0, + ConvolutionLayer.builder(kernelHeight, kernelWidth) + .stride(1, 0) + .nOut(2) + .activation(Activation.RELU) + .weightInit(WeightInit.XAVIER) + .build()) + .layer( + 1, + OutputLayer.builder() + .nOut(classes) + .weightInit(WeightInit.XAVIER) + .activation(Activation.SOFTMAX) + .build()) + .inputType(InputType.convolutional(imageHeight, imageWidth, nChannels)); - .inputType(InputType.convolutional(imageHeight, imageWidth, nChannels)); + NeuralNetConfiguration conf = builder.build(); + MultiLayerNetwork model = new MultiLayerNetwork(conf); + model.init(); - NeuralNetConfiguration conf = builder.build(); - MultiLayerNetwork model = new MultiLayerNetwork(conf); - model.init(); + INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); + INDArray emptyLables = Nd4j.zeros(numSamples, classes); - INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels); - INDArray emptyLables = Nd4j.zeros(numSamples, classes); - - trainInput = new DataSet(emptyFeatures, emptyLables); - model.fit(trainInput); + trainInput = new DataSet(emptyFeatures, emptyLables); + model.fit(trainInput); }); + } + + @Test + public void testCNNBiasInit() { + ConvolutionLayer cnn = ConvolutionLayer.builder().nIn(1).nOut(3).biasInit(1).build(); + + NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(cnn).build(); + + val numParams = conf.getFlattenedLayerConfigurations().get(0).initializer().numParams(conf); + INDArray params = Nd4j.create(1, numParams); + Layer layer = + conf.getFlattenedLayerConfigurations() + .get(0) + .instantiate(conf, null, 0, params, true, params.dataType()); + + assertEquals(1, layer.getParam("b").size(0)); + } + + @Test + public void testCNNInputSetupMNIST() throws Exception { + INDArray input = getMnistData(); + Layer layer = getMNISTConfig(); + layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); + + assertEquals(input, layer.input()); + assertArrayEquals(input.shape(), layer.input().shape()); + } + + @Test + public void testFeatureMapShapeMNIST() throws Exception { + int inputWidth = 28; + int[] stride = new int[] {1, 1}; + int[] padding = new int[] {0, 0}; + int[] kernelSize = new int[] {9, 9}; + int nChannelsIn = 1; + int depth = 20; + int featureMapWidth = (inputWidth + padding[1] * 2 - kernelSize[1]) / stride[1] + 1; + + INDArray input = getMnistData(); + + Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); + INDArray convActivations = layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); + + assertEquals(featureMapWidth, convActivations.size(2)); + assertEquals(depth, convActivations.size(1)); + } + + @Test + public void testActivateResultsContained() { + Layer layer = getContainedConfig(); + INDArray input = getContainedData(); + INDArray expectedOutput = + Nd4j.create( + new float[] { + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.98201379f, + 0.99966465f, + 0.99966465f, + 0.99966465f, + 0.99966465f + }, + new int[] {1, 2, 4, 4}); + + INDArray convActivations = layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); + + assertArrayEquals(expectedOutput.shape(), convActivations.shape()); + assertEquals(expectedOutput, convActivations); + } + + public Layer getMNISTConfig() { + int[] kernelSize = new int[] {9, 9}; + int[] stride = new int[] {1, 1}; + int[] padding = new int[] {1, 1}; + int nChannelsIn = 1; + int depth = 20; + + return getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); + } + + public INDArray getMnistData() throws Exception { + int inputWidth = 28; + int inputHeight = 28; + int nChannelsIn = 1; + int nExamples = 5; + + DataSetIterator data = new MnistDataSetIterator(nExamples, nExamples); + DataSet mnist = data.next(); + nExamples = mnist.numExamples(); + return mnist.getFeatures().reshape(nExamples, nChannelsIn, inputHeight, inputWidth); + } + + public Layer getContainedConfig() { + int[] kernelSize = new int[] {2, 2}; + int[] stride = new int[] {2, 2}; + int[] padding = new int[] {0, 0}; + int nChannelsIn = 1; + int depth = 2; + + INDArray W = + Nd4j.create(new double[] {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}, new int[] {2, 1, 2, 2}); + INDArray b = Nd4j.create(new double[] {1, 1}); + Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); + layer.setParam("W", W); + layer.setParam("b", b); + + return layer; + } + + public INDArray getContainedData() { + INDArray ret = + Nd4j.create( + new float[] { + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, + 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, + 4, 4, 4, 4, 4, 4 + }, + new int[] {1, 1, 8, 8}); + return ret; + } + + public INDArray getContainedCol() { + return Nd4j.create( + new float[] { + 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, + 3, 3, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, + 4, 4, 4, 4 + }, + new int[] {1, 1, 2, 2, 4, 4}); + } + + @Test + public void testCNNMLNPretrain() throws Exception { + // Note CNN does not do pretrain + int numSamples = 10; + int batchSize = 10; + DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true); + + MultiLayerNetwork model = getCNNMLNConfig(false, true); + model.fit(mnistIter); + + mnistIter.reset(); + + MultiLayerNetwork model2 = getCNNMLNConfig(false, true); + model2.fit(mnistIter); + mnistIter.reset(); + + DataSet test = mnistIter.next(); + + Evaluation eval = new Evaluation(); + INDArray output = model.output(test.getFeatures()); + eval.eval(test.getLabels(), output); + double f1Score = eval.f1(); + + Evaluation eval2 = new Evaluation(); + INDArray output2 = model2.output(test.getFeatures()); + eval2.eval(test.getLabels(), output2); + double f1Score2 = eval2.f1(); + + assertEquals(f1Score, f1Score2, 1e-4); + } + + @Test + public void testCNNMLNBackprop() throws Exception { + int numSamples = 10; + int batchSize = 10; + DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true); + + MultiLayerNetwork model = getCNNMLNConfig(true, false); + model.fit(mnistIter); + + MultiLayerNetwork model2 = getCNNMLNConfig(true, false); + model2.fit(mnistIter); + + mnistIter.reset(); + DataSet test = mnistIter.next(); + + Evaluation eval = new Evaluation(); + INDArray output = model.output(test.getFeatures()); + eval.eval(test.getLabels(), output); + double f1Score = eval.f1(); + + Evaluation eval2 = new Evaluation(); + INDArray output2 = model2.output(test.getFeatures()); + eval2.eval(test.getLabels(), output2); + double f1Score2 = eval2.f1(); + + assertEquals(f1Score, f1Score2, 1e-4); + } + + @Test + public void testGetSetParams() { + + MultiLayerNetwork net = getCNNMLNConfig(true, false); + + INDArray paramsOrig = net.getModelParams().dup(); + net.setParams(paramsOrig); + + INDArray params2 = net.getModelParams(); + + assertEquals(paramsOrig, params2); + } + + @Test + public void testCnnIm2ColReshaping() { + // This test: a bit unusual in that it tests the *assumptions* of the CNN implementation rather + // than the implementation itself + // Specifically, it tests the row and column orders after reshaping on im2col is reshaped (both + // forward and backward pass) + INDArray input = getInput(); + + // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input + // [miniBatch,channels,kH,kW,outH,outW] + // given the current im2col implementation + // To get this: create an array of the order we want, permute it to the order required by im2col + // implementation, and then do im2col on that + // to get old order from required order: permute(2,3,4,5,1,2) + INDArray col = Nd4j.create(new int[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); + Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], false, col2); + + /* + Expected Output, im2col + - example 0 - + channels 0 channels 1 + h0,w0 h0,w1 h0,w0 h0,w1 + 0 1 1 2 9 10 10 11 + 3 4 4 5 12 13 13 14 + + h1,w0 h1,w1 h1,w0 h1,w1 + 3 4 4 5 12 13 13 14 + 6 7 7 8 15 16 16 17 + + - example 1 - + channels 0 channels 1 + h0,w0 h0,w1 h0,w0 h0,w1 + 18 19 19 20 27 28 28 29 + 21 22 22 23 30 31 31 32 + + h1,w0 h1,w1 h1,w0 h1,w1 + 21 22 22 23 30 31 31 32 + 24 25 25 26 33 34 34 35 + */ + + // Now, after reshaping im2col to 2d, we expect: + // Rows with order (wOut0,hOut0,mb0), (wOut1,hOut0,mb0), (wOut0,hOut1,mb0), (wOut1,hOut1,mb0), + // (wOut0,hOut0,mb1), ... + // Columns with order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), ... + + INDArray reshapedCol = + Shape.newShapeNoCopy(col, new int[] {miniBatch * outH * outW, inDepth * kH * kW}, false); + + INDArray exp2d = Nd4j.create(outW * outH * miniBatch, inDepth * kH * kW); + exp2d.putRow( + 0, + Nd4j.create( + new double[] { + 0, 1, 3, 4, 9, 10, 12, 13 + })); // wOut0,hOut0,mb0 -> both depths, in order (d0,kh0,kw0), (d0,kh0,kw1), + // (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), (d1,kh0,kw1), (d1,kh1,kw0), + // (d1,kh1,kw1) + exp2d.putRow(1, Nd4j.create(new double[] {1, 2, 4, 5, 10, 11, 13, 14})); // wOut1,hOut0,mb0 + exp2d.putRow(2, Nd4j.create(new double[] {3, 4, 6, 7, 12, 13, 15, 16})); // wOut0,hOut1,mb0 + exp2d.putRow(3, Nd4j.create(new double[] {4, 5, 7, 8, 13, 14, 16, 17})); // wOut1,hOut1,mb0 + exp2d.putRow(4, Nd4j.create(new double[] {18, 19, 21, 22, 27, 28, 30, 31})); // wOut0,hOut0,mb1 + exp2d.putRow(5, Nd4j.create(new double[] {19, 20, 22, 23, 28, 29, 31, 32})); // wOut1,hOut0,mb1 + exp2d.putRow(6, Nd4j.create(new double[] {21, 22, 24, 25, 30, 31, 33, 34})); // wOut0,hOut1,mb1 + exp2d.putRow(7, Nd4j.create(new double[] {22, 23, 25, 26, 31, 32, 34, 35})); // wOut1,hOut1,mb1 + + assertEquals(exp2d, reshapedCol); + + // Check the same thing for the backprop im2col (different order) + INDArray colBackprop = Nd4j.create(new int[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + INDArray colBackprop2 = colBackprop.permute(0, 3, 4, 5, 1, 2); + + Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], false, colBackprop2); + + INDArray reshapedColBackprop = + Shape.newShapeNoCopy( + colBackprop, new int[] {miniBatch * outH * outW, inDepth * kH * kW}, false); + + // Rows with order (mb0,h0,w0), (mb0,h0,w1), (mb0,h1,w0), (mb0,h1,w1), (mb1,h0,w0), (mb1,h0,w1), + // (mb1,h1,w0), (mb1,h1,w1) + // Columns with order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), ... + + INDArray exp2dv2 = Nd4j.create(outW * outH * miniBatch, inDepth * kH * kW); + exp2dv2.putRow( + 0, + Nd4j.create( + new double[] { + 0, 1, 3, 4, 9, 10, 12, 13 + })); // wOut0,hOut0,mb0 -> both depths, in order (d0,kh0,kw0), (d0,kh0,kw1), + // (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), (d1,kh0,kw1), (d1,kh1,kw0), + // (d1,kh1,kw1) + exp2dv2.putRow(1, Nd4j.create(new double[] {1, 2, 4, 5, 10, 11, 13, 14})); // wOut1,hOut0,mb0 + exp2dv2.putRow(2, Nd4j.create(new double[] {3, 4, 6, 7, 12, 13, 15, 16})); // wOut0,hOut1,mb0 + exp2dv2.putRow(3, Nd4j.create(new double[] {4, 5, 7, 8, 13, 14, 16, 17})); // wOut1,hOut1,mb0 + exp2dv2.putRow( + 4, Nd4j.create(new double[] {18, 19, 21, 22, 27, 28, 30, 31})); // wOut0,hOut0,mb1 + exp2dv2.putRow( + 5, Nd4j.create(new double[] {19, 20, 22, 23, 28, 29, 31, 32})); // wOut1,hOut0,mb1 + exp2dv2.putRow( + 6, Nd4j.create(new double[] {21, 22, 24, 25, 30, 31, 33, 34})); // wOut0,hOut1,mb1 + exp2dv2.putRow( + 7, Nd4j.create(new double[] {22, 23, 25, 26, 31, 32, 34, 35})); // wOut1,hOut1,mb1 + + assertEquals(exp2dv2, reshapedColBackprop); + } + + @Test + public void testDeltaReshaping() { + // As per above test: testing assumptions of cnn implementation... + + // Delta: initially shape [miniBatch,dOut,outH,outW] + // permute to [dOut,miniB,outH,outW] + // then reshape to [dOut,miniB*outH*outW] + // Expect columns of delta2d to be like: (mb0,h0,w0), (mb0,h0,w1), (mb1,h0,w2), (mb0,h1,w0), ... + // (mb1,...), ..., (mb2,...) + int miniBatch = 3; + int depth = 2; + int outW = 3; + int outH = 3; + + /* + ----- Input delta ----- + example 0: + channels 0 channels 1 + [ 0 1 2 [ 9 10 11 + 3 4 5 12 13 14 + 6 7 8] 15 16 17] + example 1: + [18 19 20 [27 28 29 + 21 22 23 30 31 32 + 24 25 26] 33 34 35] + example 2: + [36 37 38 [45 46 47 + 39 40 41 48 49 50 + 42 43 44] 51 52 53] + */ + + INDArray deltaOrig = Nd4j.create(new int[] {miniBatch, depth, outH, outW}, 'c'); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}})); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}})); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}})); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{27, 28, 29}, {30, 31, 32}, {33, 34, 35}})); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(2), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}})); + deltaOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(2), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}})); + + INDArray deltaPermute = deltaOrig.permute(1, 0, 2, 3).dup('c'); + INDArray delta2d = + Shape.newShapeNoCopy(deltaPermute, new int[] {depth, miniBatch * outW * outH}, false); + + INDArray exp = + Nd4j.create( + new double[][] { + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 18, 19, 20, 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, + 40, 41, 42, 43, 44 + }, // depth0 + { + 9, 10, 11, 12, 13, 14, 15, 16, 17, 27, 28, 29, 30, 31, 32, 33, 34, 35, 45, 46, + 47, 48, 49, 50, 51, 52, 53 + } // depth1 + }) + .castTo(delta2d.dataType()); + + assertEquals(exp, delta2d); + } + + ////////////////////////////////////////////////////////////////////////////////// + + @Test + public void testWeightReshaping() { + // Test assumptions of weight reshaping + // Weights: originally c order, shape [outDepth, inDepth, kH, kw] + // permute (3,2,1,0) + + int depthOut = 2; + int depthIn = 3; + int kH = 2; + int kW = 2; + + /* + ----- Weights ----- + - dOut 0 - + dIn 0 dIn 1 dIn 2 + [ 0 1 [ 4 5 [ 8 9 + 2 3] 6 7] 10 11] + - dOut 1 - + [12 13 [16 17 [20 21 + 14 15] 18 19] 22 23] + */ + + INDArray weightOrig = Nd4j.create(new int[] {depthOut, depthIn, kH, kW}, 'c'); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{0, 1}, {2, 3}})); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{4, 5}, {6, 7}})); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(0), NDArrayIndex.point(2), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{8, 9}, {10, 11}})); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{12, 13}, {14, 15}})); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{16, 17}, {18, 19}})); + weightOrig.put( + new INDArrayIndex[] { + NDArrayIndex.point(1), NDArrayIndex.point(2), NDArrayIndex.all(), NDArrayIndex.all() + }, + Nd4j.create(new double[][] {{20, 21}, {22, 23}})); + + INDArray weightPermute = weightOrig.permute(3, 2, 1, 0); + INDArray w2d = + Shape.newShapeNoCopy(weightPermute, new int[] {depthIn * kH * kW, depthOut}, true); + + assertNotNull(w2d); + + // Expected order of weight rows, after reshaping: (kw0,kh0,din0), (kw1,kh0,din0), + // (kw0,kh1,din0), (kw1,kh1,din0), (kw0,kh0,din1), ... + INDArray wExp = + Nd4j.create( + new double[][] { + {0, 12}, {1, 13}, {2, 14}, {3, 15}, {4, 16}, {5, 17}, {6, 18}, {7, 19}, {8, 20}, + {9, 21}, {10, 22}, {11, 23} + }) + .castTo(DataType.FLOAT); + + assertEquals(wExp, w2d); + } + + @Test + public void test1dInputType() { + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DLayer.builder() + .nOut(3) + .kernelSize(2) + .activation(Activation.TANH) + .build()) + .layer(Subsampling1DLayer.builder().kernelSize(2).stride(2).build()) + .layer(Upsampling1D.builder().size(2).build()) + .layer(RnnOutputLayer.builder().nOut(7).activation(Activation.SOFTMAX).build()) + .inputType(InputType.recurrent(10)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + List l = conf.getLayerActivationTypes(InputType.recurrent(10)); + assertEquals(InputType.recurrent(3, -1), l.get(0)); + assertEquals(InputType.recurrent(3, -1), l.get(1)); + assertEquals(InputType.recurrent(3, -1), l.get(2)); + assertEquals(InputType.recurrent(7, -1), l.get(3)); + + List l2 = conf.getLayerActivationTypes(InputType.recurrent(10, 6)); + assertEquals(InputType.recurrent(3, 6), l2.get(0)); + assertEquals(InputType.recurrent(3, 3), l2.get(1)); + assertEquals(InputType.recurrent(3, 6), l2.get(2)); + assertEquals(InputType.recurrent(7, 6), l2.get(3)); + + INDArray in = Nd4j.create(2, 10, 6); + INDArray out = net.output(in); + assertArrayEquals(new long[] {2, 7, 6}, out.shape()); + } + + @Test + public void testDeconvBadInput() { + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .list() + .layer(Deconvolution2D.builder().nIn(5).nOut(3).build()) + .build(); + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray badInput = Nd4j.create(DataType.FLOAT, 1, 10, 5, 5); + try { + net.output(badInput); + } catch (DL4JInvalidInputException e) { + String msg = e.getMessage(); + assertTrue( + msg.contains("Deconvolution2D") && msg.contains("input") && msg.contains("channels"), + msg); + } + } + + @Test + public void testConv1dCausalAllowed() { + Convolution1DLayer.builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); + Subsampling1DLayer.builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); + } + + @Test + public void testConv2dNoCausalAllowed() { + + try { + ConvolutionLayer.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } - @Test - public void testCNNBiasInit() { - ConvolutionLayer cnn = ConvolutionLayer.builder().nIn(1).nOut(3).biasInit(1).build(); - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(cnn).build(); - - val numParams = conf.getFlattenedLayerConfigurations().get(0).initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - Layer layer = conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, null, 0, params, true, params.dataType()); - - assertEquals(1, layer.getParam("b").size(0)); + try { + Deconvolution2D.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } - @Test - public void testCNNInputSetupMNIST() throws Exception { - INDArray input = getMnistData(); - Layer layer = getMNISTConfig(); - layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); - - assertEquals(input, layer.input()); - assertArrayEquals(input.shape(), layer.input().shape()); + try { + DepthwiseConvolution2D.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } - @Test - public void testFeatureMapShapeMNIST() throws Exception { - int inputWidth = 28; - int[] stride = new int[] {1, 1}; - int[] padding = new int[] {0, 0}; - int[] kernelSize = new int[] {9, 9}; - int nChannelsIn = 1; - int depth = 20; - int featureMapWidth = (inputWidth + padding[1] * 2 - kernelSize[1]) / stride[1] + 1; - - INDArray input = getMnistData(); - - Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); - INDArray convActivations = layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); - - assertEquals(featureMapWidth, convActivations.size(2)); - assertEquals(depth, convActivations.size(1)); + try { + SeparableConvolution2D.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } - @Test - public void testActivateResultsContained() { - Layer layer = getContainedConfig(); - INDArray input = getContainedData(); - INDArray expectedOutput = Nd4j.create(new float[] {0.98201379f, 0.98201379f, 0.98201379f, 0.98201379f, 0.99966465f, - 0.99966465f, 0.99966465f, 0.99966465f, 0.98201379f, 0.98201379f, 0.98201379f, 0.98201379f, 0.99966465f, - 0.99966465f, 0.99966465f, 0.99966465f, 0.98201379f, 0.98201379f, 0.98201379f, 0.98201379f, 0.99966465f, - 0.99966465f, 0.99966465f, 0.99966465f, 0.98201379f, 0.98201379f, 0.98201379f, 0.98201379f, 0.99966465f, - 0.99966465f, 0.99966465f, 0.99966465f}, new int[] {1, 2, 4, 4}); + try { + SubsamplingLayer.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); + } + } - INDArray convActivations = layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); - - assertArrayEquals(expectedOutput.shape(), convActivations.shape()); - assertEquals(expectedOutput, convActivations); + @Test + public void testConv3dNoCausalAllowed() { + try { + Convolution3D.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } - ////////////////////////////////////////////////////////////////////////////////// - - private static Layer getCNNConfig(int nIn, int nOut, int[] kernelSize, int[] stride, int[] padding) { - - ConvolutionLayer layer = ConvolutionLayer.builder(kernelSize, stride, padding).nIn(nIn).nOut(nOut) - .activation(Activation.SIGMOID).build(); - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder().layer(layer).build(); - - val numParams = conf.getFlattenedLayerConfigurations().get(0).initializer().numParams(conf); - INDArray params = Nd4j.create(1, numParams); - return conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, null, 0, params, true, params.dataType()); - } - - public Layer getMNISTConfig() { - int[] kernelSize = new int[] {9, 9}; - int[] stride = new int[] {1, 1}; - int[] padding = new int[] {1, 1}; - int nChannelsIn = 1; - int depth = 20; - - return getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); - - } - - public INDArray getMnistData() throws Exception { - int inputWidth = 28; - int inputHeight = 28; - int nChannelsIn = 1; - int nExamples = 5; - - DataSetIterator data = new MnistDataSetIterator(nExamples, nExamples); - DataSet mnist = data.next(); - nExamples = mnist.numExamples(); - return mnist.getFeatures().reshape(nExamples, nChannelsIn, inputHeight, inputWidth); - } - - public Layer getContainedConfig() { - int[] kernelSize = new int[] {2, 2}; - int[] stride = new int[] {2, 2}; - int[] padding = new int[] {0, 0}; - int nChannelsIn = 1; - int depth = 2; - - INDArray W = Nd4j.create(new double[] {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}, new int[] {2, 1, 2, 2}); - INDArray b = Nd4j.create(new double[] {1, 1}); - Layer layer = getCNNConfig(nChannelsIn, depth, kernelSize, stride, padding); - layer.setParam("W", W); - layer.setParam("b", b); - - return layer; - - } - - public INDArray getContainedData() { - INDArray ret = Nd4j.create(new float[] {1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4}, new int[] {1, 1, 8, 8}); - return ret; - } - - public INDArray getContainedCol() { - return Nd4j.create(new float[] {1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 3, 3, 1, 1, - 1, 1, 3, 3, 3, 3, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, - 2, 2, 4, 4, 4, 4}, new int[] {1, 1, 2, 2, 4, 4}); - } - - - - ////////////////////////////////////////////////////////////////////////////////// - - - @Test - public void testCNNMLNPretrain() throws Exception { - // Note CNN does not do pretrain - int numSamples = 10; - int batchSize = 10; - DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true); - - MultiLayerNetwork model = getCNNMLNConfig(false, true); - model.fit(mnistIter); - - mnistIter.reset(); - - MultiLayerNetwork model2 = getCNNMLNConfig(false, true); - model2.fit(mnistIter); - mnistIter.reset(); - - DataSet test = mnistIter.next(); - - Evaluation eval = new Evaluation(); - INDArray output = model.output(test.getFeatures()); - eval.eval(test.getLabels(), output); - double f1Score = eval.f1(); - - Evaluation eval2 = new Evaluation(); - INDArray output2 = model2.output(test.getFeatures()); - eval2.eval(test.getLabels(), output2); - double f1Score2 = eval2.f1(); - - assertEquals(f1Score, f1Score2, 1e-4); - - - } - - - @Test - public void testCNNMLNBackprop() throws Exception { - int numSamples = 10; - int batchSize = 10; - DataSetIterator mnistIter = new MnistDataSetIterator(batchSize, numSamples, true); - - MultiLayerNetwork model = getCNNMLNConfig(true, false); - model.fit(mnistIter); - - MultiLayerNetwork model2 = getCNNMLNConfig(true, false); - model2.fit(mnistIter); - - mnistIter.reset(); - DataSet test = mnistIter.next(); - - Evaluation eval = new Evaluation(); - INDArray output = model.output(test.getFeatures()); - eval.eval(test.getLabels(), output); - double f1Score = eval.f1(); - - Evaluation eval2 = new Evaluation(); - INDArray output2 = model2.output(test.getFeatures()); - eval2.eval(test.getLabels(), output2); - double f1Score2 = eval2.f1(); - - assertEquals(f1Score, f1Score2, 1e-4); - - } - - @Test - public void testGetSetParams() { - - MultiLayerNetwork net = getCNNMLNConfig(true, false); - - INDArray paramsOrig = net.getModelParams().dup(); - net.setParams(paramsOrig); - - INDArray params2 = net.getModelParams(); - - assertEquals(paramsOrig, params2); - } - - private static final int kH = 2; - private static final int kW = 2; - private static final int[] strides = {1, 1}; - private static final int[] pad = {0, 0}; - - private static final int miniBatch = 2; - private static final int inDepth = 2; - private static final int height = 3; - private static final int width = 3; - - private static final int outW = 2; - private static final int outH = 2; - - private static INDArray getInput() { - - /* - ----- Input images ----- - example 0: - channels 0 channels 1 - [ 0 1 2 [ 9 10 11 - 3 4 5 12 13 14 - 6 7 8] 15 16 17] - example 1: - [18 19 20 [27 28 29 - 21 22 23 30 31 32 - 24 25 26] 33 34 35] - */ - - INDArray input = Nd4j.create(new int[] {miniBatch, inDepth, height, width}, 'c'); - input.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}})); - input.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}})); - input.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}})); - input.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{27, 28, 29}, {30, 31, 32}, {33, 34, 35}})); - - return input; - } - - @Test - public void testCnnIm2ColReshaping() { - //This test: a bit unusual in that it tests the *assumptions* of the CNN implementation rather than the implementation itself - //Specifically, it tests the row and column orders after reshaping on im2col is reshaped (both forward and backward pass) - INDArray input = getInput(); - - //im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input [miniBatch,channels,kH,kW,outH,outW] - // given the current im2col implementation - //To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that - //to get old order from required order: permute(2,3,4,5,1,2) - INDArray col = Nd4j.create(new int[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); - INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); - Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], false, col2); - - /* - Expected Output, im2col - - example 0 - - channels 0 channels 1 - h0,w0 h0,w1 h0,w0 h0,w1 - 0 1 1 2 9 10 10 11 - 3 4 4 5 12 13 13 14 - - h1,w0 h1,w1 h1,w0 h1,w1 - 3 4 4 5 12 13 13 14 - 6 7 7 8 15 16 16 17 - - - example 1 - - channels 0 channels 1 - h0,w0 h0,w1 h0,w0 h0,w1 - 18 19 19 20 27 28 28 29 - 21 22 22 23 30 31 31 32 - - h1,w0 h1,w1 h1,w0 h1,w1 - 21 22 22 23 30 31 31 32 - 24 25 25 26 33 34 34 35 - */ - - //Now, after reshaping im2col to 2d, we expect: - //Rows with order (wOut0,hOut0,mb0), (wOut1,hOut0,mb0), (wOut0,hOut1,mb0), (wOut1,hOut1,mb0), (wOut0,hOut0,mb1), ... - //Columns with order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), ... - - INDArray reshapedCol = Shape.newShapeNoCopy(col, new int[] {miniBatch * outH * outW, inDepth * kH * kW}, false); - - INDArray exp2d = Nd4j.create(outW * outH * miniBatch, inDepth * kH * kW); - exp2d.putRow(0, Nd4j.create(new double[] {0, 1, 3, 4, 9, 10, 12, 13})); //wOut0,hOut0,mb0 -> both depths, in order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), (d1,kh0,kw1), (d1,kh1,kw0), (d1,kh1,kw1) - exp2d.putRow(1, Nd4j.create(new double[] {1, 2, 4, 5, 10, 11, 13, 14})); //wOut1,hOut0,mb0 - exp2d.putRow(2, Nd4j.create(new double[] {3, 4, 6, 7, 12, 13, 15, 16})); //wOut0,hOut1,mb0 - exp2d.putRow(3, Nd4j.create(new double[] {4, 5, 7, 8, 13, 14, 16, 17})); //wOut1,hOut1,mb0 - exp2d.putRow(4, Nd4j.create(new double[] {18, 19, 21, 22, 27, 28, 30, 31})); //wOut0,hOut0,mb1 - exp2d.putRow(5, Nd4j.create(new double[] {19, 20, 22, 23, 28, 29, 31, 32})); //wOut1,hOut0,mb1 - exp2d.putRow(6, Nd4j.create(new double[] {21, 22, 24, 25, 30, 31, 33, 34})); //wOut0,hOut1,mb1 - exp2d.putRow(7, Nd4j.create(new double[] {22, 23, 25, 26, 31, 32, 34, 35})); //wOut1,hOut1,mb1 - - assertEquals(exp2d, reshapedCol); - - //Check the same thing for the backprop im2col (different order) - INDArray colBackprop = Nd4j.create(new int[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); - INDArray colBackprop2 = colBackprop.permute(0, 3, 4, 5, 1, 2); - - Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], false, colBackprop2); - - INDArray reshapedColBackprop = Shape.newShapeNoCopy(colBackprop, - new int[] {miniBatch * outH * outW, inDepth * kH * kW}, false); - - //Rows with order (mb0,h0,w0), (mb0,h0,w1), (mb0,h1,w0), (mb0,h1,w1), (mb1,h0,w0), (mb1,h0,w1), (mb1,h1,w0), (mb1,h1,w1) - //Columns with order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), ... - - INDArray exp2dv2 = Nd4j.create(outW * outH * miniBatch, inDepth * kH * kW); - exp2dv2.putRow(0, Nd4j.create(new double[] {0, 1, 3, 4, 9, 10, 12, 13})); //wOut0,hOut0,mb0 -> both depths, in order (d0,kh0,kw0), (d0,kh0,kw1), (d0,kh1,kw0), (d0,kh1,kw1), (d1,kh0,kw0), (d1,kh0,kw1), (d1,kh1,kw0), (d1,kh1,kw1) - exp2dv2.putRow(1, Nd4j.create(new double[] {1, 2, 4, 5, 10, 11, 13, 14})); //wOut1,hOut0,mb0 - exp2dv2.putRow(2, Nd4j.create(new double[] {3, 4, 6, 7, 12, 13, 15, 16})); //wOut0,hOut1,mb0 - exp2dv2.putRow(3, Nd4j.create(new double[] {4, 5, 7, 8, 13, 14, 16, 17})); //wOut1,hOut1,mb0 - exp2dv2.putRow(4, Nd4j.create(new double[] {18, 19, 21, 22, 27, 28, 30, 31})); //wOut0,hOut0,mb1 - exp2dv2.putRow(5, Nd4j.create(new double[] {19, 20, 22, 23, 28, 29, 31, 32})); //wOut1,hOut0,mb1 - exp2dv2.putRow(6, Nd4j.create(new double[] {21, 22, 24, 25, 30, 31, 33, 34})); //wOut0,hOut1,mb1 - exp2dv2.putRow(7, Nd4j.create(new double[] {22, 23, 25, 26, 31, 32, 34, 35})); //wOut1,hOut1,mb1 - - assertEquals(exp2dv2, reshapedColBackprop); - } - - @Test - public void testDeltaReshaping() { - //As per above test: testing assumptions of cnn implementation... - - //Delta: initially shape [miniBatch,dOut,outH,outW] - //permute to [dOut,miniB,outH,outW] - //then reshape to [dOut,miniB*outH*outW] - //Expect columns of delta2d to be like: (mb0,h0,w0), (mb0,h0,w1), (mb1,h0,w2), (mb0,h1,w0), ... (mb1,...), ..., (mb2,...) - int miniBatch = 3; - int depth = 2; - int outW = 3; - int outH = 3; - - /* - ----- Input delta ----- - example 0: - channels 0 channels 1 - [ 0 1 2 [ 9 10 11 - 3 4 5 12 13 14 - 6 7 8] 15 16 17] - example 1: - [18 19 20 [27 28 29 - 21 22 23 30 31 32 - 24 25 26] 33 34 35] - example 2: - [36 37 38 [45 46 47 - 39 40 41 48 49 50 - 42 43 44] 51 52 53] - */ - - INDArray deltaOrig = Nd4j.create(new int[] {miniBatch, depth, outH, outW}, 'c'); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{0, 1, 2}, {3, 4, 5}, {6, 7, 8}})); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}})); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}})); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{27, 28, 29}, {30, 31, 32}, {33, 34, 35}})); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(2), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}})); - deltaOrig.put(new INDArrayIndex[] {NDArrayIndex.point(2), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}})); - - - INDArray deltaPermute = deltaOrig.permute(1, 0, 2, 3).dup('c'); - INDArray delta2d = Shape.newShapeNoCopy(deltaPermute, new int[] {depth, miniBatch * outW * outH}, false); - - INDArray exp = Nd4j.create(new double[][] { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 18, 19, 20, 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, 40, 41, 42, 43, - 44}, //depth0 - {9, 10, 11, 12, 13, 14, 15, 16, 17, 27, 28, 29, 30, 31, 32, 33, 34, 35, 45, 46, 47, 48, 49, 50, - 51, 52, 53} //depth1 - }).castTo(delta2d.dataType()); - - assertEquals(exp, delta2d); - } - - @Test - public void testWeightReshaping() { - //Test assumptions of weight reshaping - //Weights: originally c order, shape [outDepth, inDepth, kH, kw] - //permute (3,2,1,0) - - int depthOut = 2; - int depthIn = 3; - int kH = 2; - int kW = 2; - - /* - ----- Weights ----- - - dOut 0 - - dIn 0 dIn 1 dIn 2 - [ 0 1 [ 4 5 [ 8 9 - 2 3] 6 7] 10 11] - - dOut 1 - - [12 13 [16 17 [20 21 - 14 15] 18 19] 22 23] - */ - - INDArray weightOrig = Nd4j.create(new int[] {depthOut, depthIn, kH, kW}, 'c'); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{0, 1}, {2, 3}})); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{4, 5}, {6, 7}})); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(0), NDArrayIndex.point(2), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{8, 9}, {10, 11}})); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(0), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{12, 13}, {14, 15}})); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(1), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{16, 17}, {18, 19}})); - weightOrig.put(new INDArrayIndex[] {NDArrayIndex.point(1), NDArrayIndex.point(2), NDArrayIndex.all(), - NDArrayIndex.all()}, Nd4j.create(new double[][] {{20, 21}, {22, 23}})); - - INDArray weightPermute = weightOrig.permute(3, 2, 1, 0); - INDArray w2d = Shape.newShapeNoCopy(weightPermute, new int[] {depthIn * kH * kW, depthOut}, true); - - assertNotNull(w2d); - - //Expected order of weight rows, after reshaping: (kw0,kh0,din0), (kw1,kh0,din0), (kw0,kh1,din0), (kw1,kh1,din0), (kw0,kh0,din1), ... - INDArray wExp = Nd4j.create(new double[][] {{0, 12}, {1, 13}, {2, 14}, {3, 15}, {4, 16}, {5, 17}, {6, 18}, - {7, 19}, {8, 20}, {9, 21}, {10, 22}, {11, 23}}).castTo(DataType.FLOAT); - - assertEquals(wExp, w2d); - } - - ////////////////////////////////////////////////////////////////////////////////// - - private static MultiLayerNetwork getCNNMLNConfig(boolean backprop, boolean pretrain) { - int outputNum = 10; - int seed = 123; - - NeuralNetConfiguration.NeuralNetConfigurationBuilder conf = - NeuralNetConfiguration.builder().seed(seed) - .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list() - .layer(0, ConvolutionLayer.builder(new int[] {10, 10}).nOut(6).build()) - .layer(1, SubsamplingLayer.builder(SubsamplingLayer.PoolingType.MAX, - new int[] {2, 2}).stride(1, 1).build()) - .layer(2, OutputLayer.builder( - LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) - .nOut(outputNum).weightInit(WeightInit.XAVIER) - .activation(Activation.SOFTMAX).build()) - .inputType(InputType.convolutionalFlat(28, 28, 1)); - - MultiLayerNetwork model = new MultiLayerNetwork(conf.build()); - model.init(); - - return model; - } - - - - @Test - public void test1dInputType(){ - - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .convolutionMode(ConvolutionMode.Same) - - .layer(Convolution1DLayer.builder().nOut(3).kernelSize(2).activation(Activation.TANH).build()) - .layer(Subsampling1DLayer.builder().kernelSize(2).stride(2).build()) - .layer(Upsampling1D.builder().size(2).build()) - .layer(RnnOutputLayer.builder().nOut(7).activation(Activation.SOFTMAX).build()) - .inputType(InputType.recurrent(10)) - .build(); - - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - List l = conf.getLayerActivationTypes(InputType.recurrent(10)); - assertEquals(InputType.recurrent(3, -1), l.get(0)); - assertEquals(InputType.recurrent(3, -1), l.get(1)); - assertEquals(InputType.recurrent(3, -1), l.get(2)); - assertEquals(InputType.recurrent(7, -1), l.get(3)); - - List l2 = conf.getLayerActivationTypes(InputType.recurrent(10, 6)); - assertEquals(InputType.recurrent(3, 6), l2.get(0)); - assertEquals(InputType.recurrent(3, 3), l2.get(1)); - assertEquals(InputType.recurrent(3, 6), l2.get(2)); - assertEquals(InputType.recurrent(7, 6), l2.get(3)); - - - INDArray in = Nd4j.create(2, 10, 6); - INDArray out = net.output(in); - assertArrayEquals(new long[]{2,7,6}, out.shape()); - } - - @Test - public void testDeconvBadInput(){ - NeuralNetConfiguration conf = NeuralNetConfiguration.builder() - .list() - .layer(Deconvolution2D.builder().nIn(5).nOut(3).build()) - .build(); - MultiLayerNetwork net = new MultiLayerNetwork(conf); - net.init(); - - INDArray badInput = Nd4j.create(DataType.FLOAT, 1, 10, 5, 5); - try { - net.output(badInput); - } catch (DL4JInvalidInputException e){ - String msg = e.getMessage(); - assertTrue(msg.contains("Deconvolution2D") && msg.contains("input") && msg.contains("channels"), msg); - } - } - - @Test - public void testConv1dCausalAllowed(){ - Convolution1DLayer.builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); - Subsampling1DLayer.builder().convolutionMode(ConvolutionMode.Causal).kernelSize(2).build(); - } - - @Test - public void testConv2dNoCausalAllowed(){ - - try{ - ConvolutionLayer.builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - - try{ - Deconvolution2D.builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - - try{ - DepthwiseConvolution2D.builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - - try{ - SeparableConvolution2D.builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - - try{ - SubsamplingLayer.builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - } - - @Test - public void testConv3dNoCausalAllowed(){ - try{ - Convolution3D.builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } - - try{ - Subsampling3DLayer.builder().convolutionMode(ConvolutionMode.Causal).build(); - fail("Expected exception"); - } catch (Throwable t){ - String m = t.getMessage().toLowerCase(); - assertTrue(m.contains("causal") && m.contains("1d"), m); - } + try { + Subsampling3DLayer.builder().convolutionMode(ConvolutionMode.Causal).build(); + fail("Expected exception"); + } catch (Throwable t) { + String m = t.getMessage().toLowerCase(); + assertTrue(m.contains("causal") && m.contains("1d"), m); } + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java index 93ffb0be4..6fda18385 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/LocallyConnectedLayerTest.java @@ -71,7 +71,7 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { .layer(LocallyConnected2D.builder().kernelSize(8, 8).nIn(3) .stride(4, 4).nOut(16).dropOut(0.5) .convolutionMode(ConvolutionMode.Strict) - .setInputSize(28, 28) + .inputSize(28, 28) .activation(Activation.RELU).weightInit( WeightInit.XAVIER) .build()) @@ -94,11 +94,10 @@ public class LocallyConnectedLayerTest extends BaseDL4JTest { NeuralNetConfiguration.NeuralNetConfigurationBuilder builder = NeuralNetConfiguration.builder().seed(123) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).l2(2e-4) .updater(new Nesterovs(0.9)).dropOut(0.5) - .list() .layer(LocallyConnected1D.builder().kernelSize(4).nIn(3) .stride(1).nOut(16).dropOut(0.5) .convolutionMode(ConvolutionMode.Strict) - .setInputSize(28) + .inputSize(28) .activation(Activation.RELU).weightInit( WeightInit.XAVIER) .build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java index 1c47e1b2d..b8548ea33 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/convolution/SpaceToDepthTest.java @@ -61,7 +61,7 @@ public class SpaceToDepthTest extends BaseDL4JTest { private Layer getSpaceToDepthLayer() { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).seed(123) - .layer(new SpaceToDepthLayer.Builder(blockSize, dataFormat).build()).build(); + .layer(SpaceToDepthLayer.builder().blockSize(blockSize).dataFormat(dataFormat.toFormat()).build()).build(); return conf.getFlattenedLayerConfigurations().get(0).instantiate(conf, null, 0, null, true, Nd4j.defaultFloatingPointType()); } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java index 91b695e91..7627b0657 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomActivation.java @@ -20,6 +20,8 @@ package org.deeplearning4j.nn.layers.custom; +import static org.junit.jupiter.api.Assertions.assertEquals; + import org.deeplearning4j.BaseDL4JTest; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.layers.DenseLayer; @@ -30,31 +32,38 @@ import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.learning.config.Sgd; import org.nd4j.linalg.lossfunctions.LossFunctions; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - public class TestCustomActivation extends BaseDL4JTest { - @Test - public void testCustomActivationFn() { - //Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works... + @Test + public void testCustomActivationFn() { + // Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config + // actually works... - NeuralNetConfiguration conf = NeuralNetConfiguration.builder().updater(new Sgd(0.1)).list() - .layer(0, DenseLayer.builder().nIn(10).nOut(10).activation(new CustomActivation()).build()) - .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) - .build(); + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .updater(new Sgd(0.1)) - String json = conf.toJson(); - String yaml = conf.toYaml(); + .layer( + 0, DenseLayer.builder().nIn(10).nOut(10).activation(new CustomActivation()).build()) + .layer( + 1, + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nIn(10) + .nOut(10) + .build()) + .build(); -// System.out.println(json); + String json = conf.toJson(); + String yaml = conf.toYaml(); - NeuralNetConfiguration confFromJson = NeuralNetConfiguration.fromJson(json); - assertEquals(conf, confFromJson); + // System.out.println(json); - NeuralNetConfiguration confFromYaml = NeuralNetConfiguration.fromYaml(yaml); - assertEquals(conf, confFromYaml); - - } + NeuralNetConfiguration confFromJson = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, confFromJson); + NeuralNetConfiguration confFromYaml = NeuralNetConfiguration.fromYaml(yaml); + assertEquals(conf, confFromYaml); + } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java index a88c16bf3..7e35d071b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/custom/TestCustomLayers.java @@ -119,7 +119,7 @@ public class TestCustomLayers extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).list() .layer(0, DenseLayer.builder().nIn(10).nOut(10).build()) - .layer(1, new CustomOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) + .layer(1, CustomOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) .nIn(10).nOut(10).build()) .build(); @@ -172,7 +172,7 @@ public class TestCustomLayers extends BaseDL4JTest { ComputationGraphConfiguration conf = NeuralNetConfiguration.builder().seed(12345) .graphBuilder().addInputs("in") .addLayer("0", DenseLayer.builder().nIn(10).nOut(10).build(), "in").addLayer("1", - new CustomOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(10) + CustomOutputLayer.builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(10) .nOut(10).activation(Activation.SOFTMAX).build(), "0") .setOutputs("1").build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java index c63f7f99e..56a8fee95 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/objdetect/TestYolo2OutputLayer.java @@ -91,8 +91,8 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { .l2(0.01) .list() .layer(ConvolutionLayer.builder().nIn(depth).nOut(depth).kernelSize(1,1).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPrior) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPrior) .build()) .build(); @@ -179,8 +179,8 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(ConvolutionLayer.builder().nIn(1).nOut(1).kernelSize(1,1).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPrior) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPrior) .build()) .build(); @@ -337,8 +337,8 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() .layer(ConvolutionLayer.builder().kernelSize(3,3).stride(1,1).nIn(3).nOut(3).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPriors) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPriors) .build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -506,8 +506,8 @@ public class TestYolo2OutputLayer extends BaseDL4JTest { .layer(ConvolutionLayer.builder().kernelSize(5,5).stride(2,2).nOut(256).build()) .layer(SubsamplingLayer.builder().kernelSize(2,2).stride(2,2)/*.poolingType(SubsamplingLayer.PoolingType.AVG)*/.build()) .layer(ConvolutionLayer.builder().activation(Activation.IDENTITY).kernelSize(5,5).stride(1,1).nOut(depthOut).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPriors) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPriors) .build()) .inputType(InputType.convolutional(h,w,c)) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java index 9728afc85..423fe9e42 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java @@ -209,7 +209,7 @@ public class RnnDataFormatTests extends BaseDL4JTest { return getNetWithLayer(GravesBidirectionalLSTM.builder().nOut(3) .dataFormat(format).build(), format, lastTimeStep, maskZeros); } else { - return getNetWithLayer(new GravesBidirectionalLSTM.Builder().nOut(3).build(), format, lastTimeStep, maskZeros); + return getNetWithLayer(GravesBidirectionalLSTM.builder().nOut(3).build(), format, lastTimeStep, maskZeros); } } private MultiLayerNetwork getGravesLstmNet(RNNFormat format, boolean setOnLayerAlso, boolean lastTimeStep, boolean maskZeros) { @@ -240,7 +240,7 @@ public class RnnDataFormatTests extends BaseDL4JTest { } private MultiLayerNetwork getNetWithLayer(LayerConfiguration layer, RNNFormat format, boolean lastTimeStep, boolean maskZeros) { if (maskZeros){ - layer = new MaskZeroLayer.Builder().setMaskValue(0.).setUnderlying(layer).build(); + layer = MaskZeroLayer.builder().maskingValue(0.).underlying(layer).build(); } if(lastTimeStep){ layer = new LastTimeStep(layer); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java index e612e66e5..7581e1db9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRecurrentWeightInit.java @@ -27,6 +27,7 @@ import org.deeplearning4j.nn.conf.layers.GravesLSTM; import org.deeplearning4j.nn.conf.layers.LSTM; import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.nn.weights.WeightInitDistribution; import org.junit.jupiter.api.Test; import org.nd4j.linalg.api.ndarray.INDArray; @@ -48,17 +49,17 @@ public class TestRecurrentWeightInit extends BaseDL4JTest { switch (i) { case 0: b.layer(LSTM.builder().nIn(10).nOut(10) - .weightInitRecurrent(new UniformDistribution(2, 3)) + .weightInitRecurrent(new WeightInitDistribution(new UniformDistribution(2, 3))) .build()); break; case 1: b.layer(GravesLSTM.builder().nIn(10).nOut(10) - .weightInitRecurrent(new UniformDistribution(2, 3)) + .weightInitRecurrent(new WeightInitDistribution(new UniformDistribution(2, 3))) .build()); break; case 2: b.layer(SimpleRnn.builder().nIn(10).nOut(10) - .weightInitRecurrent(new UniformDistribution(2, 3)).build()); + .weightInitRecurrent(new WeightInitDistribution(new UniformDistribution(2, 3))).build()); break; default: throw new RuntimeException(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java index 3cbcf59eb..be057ca2b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java @@ -145,8 +145,8 @@ public class TestTimeDistributed extends BaseDL4JTest { l2 = SimpleRnn.builder().nOut(5).build(); break; case 2: - l0 = Bidirectional.builder(LSTM.builder().nOut(5).build()); - l2 = Bidirectional.builder(LSTM.builder().nOut(5).build()); + l0 = Bidirectional.builder(LSTM.builder().nOut(5).build()).build(); + l2 = Bidirectional.builder(LSTM.builder().nOut(5).build()).build(); break; default: throw new RuntimeException("Not implemented: " + rnnType); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java index 3ed79a8ac..44f216a86 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffConv.java @@ -67,7 +67,7 @@ public class TestSameDiffConv extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new SameDiffConv.Builder().nIn(nIn).nOut(nOut).kernelSize(kH, kW).build()) + .layer(SameDiffConv.builder().nIn(nIn).nOut(nOut).kernelSize(kH, kW).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -131,7 +131,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .dataType(DataType.DOUBLE) .seed(12345) .list() - .layer(new SameDiffConv.Builder() + .layer(SameDiffConv.builder() .weightInit(WeightInit.XAVIER) .nIn(nIn) .nOut(nOut) @@ -142,7 +142,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .activation(a) .hasBias(hasBias) .build()) - .layer(new SameDiffConv.Builder() + .layer(SameDiffConv.builder() .weightInit(WeightInit.XAVIER) .nIn(nOut) .nOut(nOut) @@ -273,7 +273,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .list() - .layer(new SameDiffConv.Builder() + .layer(SameDiffConv.builder() .weightInit(WeightInit.XAVIER) .nIn(nIn) .nOut(nOut) @@ -284,7 +284,7 @@ public class TestSameDiffConv extends BaseDL4JTest { .activation(Activation.TANH) .hasBias(hasBias) .build()) - .layer(new SameDiffConv.Builder() + .layer(SameDiffConv.builder() .weightInit(WeightInit.XAVIER) .nIn(nOut) .nOut(nOut) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java index 29fc421ae..14a8d1fb6 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java @@ -65,7 +65,7 @@ public class TestSameDiffDense extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut).build()) + .layer(SameDiffDense.builder().nIn(nIn).nOut(nOut).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); @@ -106,7 +106,7 @@ public class TestSameDiffDense extends BaseDL4JTest { .inferenceWorkspaceMode(wsm) .trainingWorkspaceMode(wsm) .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .layer(SameDiffDense.builder().nIn(nIn).nOut(nOut) .activation(a) .build()) .build(); @@ -178,10 +178,10 @@ public class TestSameDiffDense extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .seed(12345) .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .layer(SameDiffDense.builder().nIn(nIn).nOut(nOut) .weightInit(WeightInit.XAVIER) .activation(a).build()) - .layer(new SameDiffDense.Builder().nIn(nOut).nOut(nOut) + .layer(SameDiffDense.builder().nIn(nOut).nOut(nOut) .weightInit(WeightInit.XAVIER) .activation(a).build()) .layer(OutputLayer.builder().nIn(nOut).nOut(nOut) @@ -267,7 +267,7 @@ public class TestSameDiffDense extends BaseDL4JTest { .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) + .layer(SameDiffDense.builder().nIn(nIn).nOut(nOut) .activation(a) .build()) .layer(OutputLayer.builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) @@ -357,8 +357,8 @@ public class TestSameDiffDense extends BaseDL4JTest { .inferenceWorkspaceMode(wsm) .updater(new Adam(0.1)) .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(5).activation(Activation.TANH).build()) - .layer(new SameDiffDense.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) + .layer(SameDiffDense.builder().nIn(nIn).nOut(5).activation(Activation.TANH).build()) + .layer(SameDiffDense.builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(OutputLayer.builder().nIn(5).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); @@ -428,8 +428,8 @@ public class TestSameDiffDense extends BaseDL4JTest { .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .list() - .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut).activation(a).build()) - .layer(new SameDiffDense.Builder().nIn(nOut).nOut(nOut).activation(a).build()) + .layer(SameDiffDense.builder().nIn(nIn).nOut(nOut).activation(a).build()) + .layer(SameDiffDense.builder().nIn(nOut).nOut(nOut).activation(a).build()) .layer(OutputLayer.builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) //.inputType(InputType.feedForward(nIn)) //TODO diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java index 90322b49d..2dc3df4e0 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffOutput.java @@ -60,7 +60,7 @@ public class TestSameDiffOutput extends BaseDL4JTest { .updater(new Adam(0.01)) .list() .layer(DenseLayer.builder().nIn(5).nOut(5).activation(Activation.TANH).build()) - .layer(LossLayer.builder().lossFunction().activation(Activation.IDENTITY).lossFunction(LossFunctions.LossFunction.MSE).build()) + .layer(LossLayer.builder().activation(Activation.IDENTITY).lossFunction(LossFunctions.LossFunction.MSE.getILossFunction()).build()) .build(); MultiLayerNetwork netSD = new MultiLayerNetwork(confSD); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java index e1432e14e..35f87c35a 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.layers.samediff.testlayers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; @@ -45,52 +46,62 @@ import java.util.*; @Data @EqualsAndHashCode(callSuper = true) @JsonIgnoreProperties({"paramShapes"}) +@NoArgsConstructor +@SuperBuilder public class SameDiffConv extends SameDiffLayer { + public static abstract class SameDiffConvBuilder> extends + SameDiffLayerBuilder { + public B kernelSize(int... k) { + this.kernelSize$value = k; + this.kernelSize$set = true; + return self(); + } + + public B stride(int... s) { + this.stride$value = s; + this.stride$set = true; + return self(); + } + + public B padding(int... p) { + this.padding$value = p; + this.padding$set = true; + return self(); + } + } private static final List WEIGHT_KEYS = Collections.singletonList(ConvolutionParamInitializer.WEIGHT_KEY); private static final List BIAS_KEYS = Collections.singletonList(ConvolutionParamInitializer.BIAS_KEY); //Order to match 'vanilla' conv layer implementation, for easy comparison private static final List PARAM_KEYS = Arrays.asList(ConvolutionParamInitializer.BIAS_KEY, ConvolutionParamInitializer.WEIGHT_KEY); - private long nIn; - private long nOut; - private Activation activation; - private int[] kernel; - private int[] stride; - private int[] padding; - private ConvolutionMode cm; - private int[] dilation; - private boolean hasBias; - protected SameDiffConv(Builder b) { - super(b); - this.nIn = b.nIn; - this.nOut = b.nOut; - this.activation = b.activation; - this.kernel = b.kernel; - this.stride = b.stride; - this.padding = b.padding; - this.cm = b.cm; - this.dilation = b.dilation; - this.hasBias = b.hasBias; - } + private int nIn; + private int nOut; + @Builder.Default private Activation activation = Activation.TANH; + @Builder.Default private int[] kernelSize = new int[]{2, 2}; + + @Builder.Default private int[] stride = new int[]{1, 1}; + @Builder.Default private int[] padding = new int[]{0, 0}; + @Builder.Default private int[] dilation = new int[]{1, 1}; + @Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Same; + @Builder.Default private boolean hasBias = true; + + - private SameDiffConv(){ - //No arg constructor for Jackson/JSON serialization - } @Override public InputType getOutputType(int layerIndex, InputType inputType) { InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, padding, new int[]{1, 1}, - cm, nOut, layerIndex, getName(), SameDiffConv.class); + return InputTypeUtil.getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, new int[]{1, 1}, + convolutionMode, nOut, layerIndex, getName(), SameDiffConv.class); } @Override public void setNIn(InputType inputType, boolean override) { if (nIn <= 0 || override) { InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - this.nIn = c.getChannels(); + this.nIn = (int) c.getChannels(); } } @@ -102,7 +113,7 @@ public class SameDiffConv extends SameDiffLayer { @Override public void defineParameters(SDLayerParams params) { params.clear(); - val weightsShape = new long[]{kernel[0], kernel[1], nIn, nOut}; //[kH, kW, iC, oC] in libnd4j + val weightsShape = new long[]{kernelSize[0], kernelSize[1], nIn, nOut}; //[kH, kW, iC, oC] in libnd4j params.addWeightParam(ConvolutionParamInitializer.WEIGHT_KEY, weightsShape); if(hasBias) { val biasShape = new long[]{1, nOut}; @@ -113,8 +124,8 @@ public class SameDiffConv extends SameDiffLayer { @Override public void initializeParameters(Map params) { try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - double fanIn = nIn * kernel[0] * kernel[1]; - double fanOut = nOut * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]); + double fanIn = nIn * kernelSize[0] * kernelSize[1]; + double fanOut = nOut * kernelSize[0] * kernelSize[1] / ((double) stride[0] * stride[1]); for (Map.Entry e : params.entrySet()) { if(paramWeightInit != null && paramWeightInit.containsKey(e.getKey())){ paramWeightInit.get(e.getKey()).init(fanIn, fanOut, e.getValue().shape(), 'c', e.getValue()); @@ -135,11 +146,11 @@ public class SameDiffConv extends SameDiffLayer { SDVariable w = paramTable.get(ConvolutionParamInitializer.WEIGHT_KEY); Conv2DConfig c = Conv2DConfig.builder() - .kH(kernel[0]).kW(kernel[1]) + .kH(kernelSize[0]).kW(kernelSize[1]) .pH(padding[0]).pW(padding[1]) .sH(stride[0]).sW(stride[1]) .dH(dilation[0]).dW(dilation[1]) - .isSameMode(this.cm == ConvolutionMode.Same) + .isSameMode(this.convolutionMode == ConvolutionMode.Same) .build(); SDVariable conv = null; @@ -159,72 +170,10 @@ public class SameDiffConv extends SameDiffLayer { if (activation == null) { activation = SameDiffLayerUtils.fromIActivation(clone.getActivation()); } - if (cm == null) { - cm = clone.getConvolutionMode(); + if (convolutionMode == null) { + convolutionMode = clone.getConvolutionMode(); } } - public static class Builder extends SameDiffLayer.Builder { - private int nIn; - private int nOut; - private Activation activation = Activation.TANH; - private int[] kernel = new int[]{2, 2}; - - private int[] stride = new int[]{1, 1}; - private int[] padding = new int[]{0, 0}; - private int[] dilation = new int[]{1, 1}; - private ConvolutionMode cm = ConvolutionMode.Same; - private boolean hasBias = true; - - public Builder nIn(int nIn) { - this.nIn = nIn; - return this; - } - - public Builder nOut(int nOut) { - this.nOut = nOut; - return this; - } - - public Builder activation(Activation activation) { - this.activation = activation; - return this; - } - - public Builder kernelSize(int... k) { - this.kernel = k; - return this; - } - - public Builder stride(int... s) { - this.stride = s; - return this; - } - - public Builder padding(int... p) { - this.padding = p; - return this; - } - - public Builder convolutionMode(ConvolutionMode cm) { - this.cm = cm; - return this; - } - - public Builder dilation(int... d) { - this.dilation = d; - return this; - } - - public Builder hasBias(boolean hasBias){ - this.hasBias = hasBias; - return this; - } - - @Override - public SameDiffConv build() { - return new SameDiffConv(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java index e1799443d..1f47a3b7b 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java @@ -22,6 +22,8 @@ package org.deeplearning4j.nn.layers.samediff.testlayers; import lombok.Data; import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -40,30 +42,22 @@ import java.util.*; @Data @EqualsAndHashCode(callSuper = true, exclude = {"paramShapes"}) +@NoArgsConstructor() @JsonIgnoreProperties("paramShapes") +@SuperBuilder public class SameDiffDense extends SameDiffLayer { private static final List W_KEYS = Collections.singletonList(DefaultParamInitializer.WEIGHT_KEY); private static final List B_KEYS = Collections.singletonList(DefaultParamInitializer.BIAS_KEY); private static final List PARAM_KEYS = Arrays.asList(DefaultParamInitializer.WEIGHT_KEY, DefaultParamInitializer.BIAS_KEY); - private Map paramShapes; + private final Map paramShapes = new HashMap<>(); private long nIn; private long nOut; private Activation activation; - protected SameDiffDense(Builder builder) { - super(builder); - nIn = builder.nIn; - nOut = builder.nOut; - activation = builder.activation; - } - - private SameDiffDense(){ - //No op constructor for Jackson - } @Override public InputType getOutputType(int layerIndex, InputType inputType) { @@ -128,31 +122,5 @@ public class SameDiffDense extends SameDiffLayer { return 'f'; } - public static class Builder extends SameDiffLayer.Builder { - private int nIn; - private int nOut; - - private Activation activation; - - public Builder nIn(int nIn){ - this.nIn = nIn; - return this; - } - - public Builder nOut(int nOut){ - this.nOut = nOut; - return this; - } - - public Builder activation(Activation activation){ - this.activation = activation; - return this; - } - - @Override - public SameDiffDense build() { - return new SameDiffDense(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java index 7dceb4a2c..6e16cacc2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/layers/variational/TestVAE.java @@ -58,7 +58,7 @@ public class TestVAE extends BaseDL4JTest { NeuralNetConfiguration mlc = NeuralNetConfiguration.builder() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .nIn(10).nOut(5).encoderLayerSizes(12).decoderLayerSizes(13) .build()) .build(); @@ -95,7 +95,7 @@ public class TestVAE extends BaseDL4JTest { for (int i = 0; i < encLayerSizes.length; i++) { NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list().layer(0, - new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().nIn(10) + org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder().nIn(10) .nOut(5).encoderLayerSizes(encLayerSizes[i]).decoderLayerSizes(13).build()) .build(); @@ -121,7 +121,7 @@ public class TestVAE extends BaseDL4JTest { int inputSize = 3; NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .nIn(inputSize).nOut(4).encoderLayerSizes(5).decoderLayerSizes(6).build()) .build(); @@ -159,7 +159,7 @@ public class TestVAE extends BaseDL4JTest { public void testParamGradientOrderAndViews() { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .nIn(10).nOut(5).encoderLayerSizes(12, 13).decoderLayerSizes(14, 15).build()) .build(); @@ -217,7 +217,7 @@ public class TestVAE extends BaseDL4JTest { Nd4j.getRandom().setSeed(12345); NeuralNetConfiguration mlc = NeuralNetConfiguration.builder().seed(12345).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .nIn(10).nOut(5).encoderLayerSizes(12, 13).decoderLayerSizes(14, 15).build()) .layer(1, OutputLayer.builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(5).nOut(6) .activation(new ActivationTanH()).build()) @@ -269,22 +269,22 @@ public class TestVAE extends BaseDL4JTest { public void testJsonYaml() { NeuralNetConfiguration config = NeuralNetConfiguration.builder().seed(12345).list() - .layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(0, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new GaussianReconstructionDistribution(Activation.IDENTITY)) .nIn(3).nOut(4).encoderLayerSizes(5).decoderLayerSizes(6).build()) - .layer(1, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(1, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new GaussianReconstructionDistribution(Activation.TANH)) .nIn(7).nOut(8).encoderLayerSizes(9).decoderLayerSizes(10).build()) - .layer(2, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(2, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new BernoulliReconstructionDistribution()).nIn(11) .nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()) - .layer(3, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(3, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new ExponentialReconstructionDistribution(Activation.TANH)) .nIn(11).nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()) - .layer(4, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(4, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .lossFunction(new ActivationTanH(), LossFunctions.LossFunction.MSE).nIn(11) .nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()) - .layer(5, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder() + .layer(5, org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.builder() .reconstructionDistribution(new CompositeReconstructionDistribution.Builder() .addDistribution(5, new GaussianReconstructionDistribution()) .addDistribution(5, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java index 4c2fd5f64..2909a7521 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/TestMemoryReports.java @@ -59,7 +59,7 @@ public class TestMemoryReports extends BaseDL4JTest { l.add(new Pair<>(DropoutLayer.builder().nIn(20).nOut(20).build(), InputType.feedForward(20))); l.add(new Pair<>(EmbeddingLayer.builder().nIn(1).nOut(20).build(), InputType.feedForward(20))); l.add(new Pair<>(OutputLayer.builder().nIn(20).nOut(20).build(), InputType.feedForward(20))); - l.add(new Pair<>(LossLayer.builder().lossFunction().build(), InputType.feedForward(20))); + l.add(new Pair<>(LossLayer.builder().build(), InputType.feedForward(20))); //RNN layers: l.add(new Pair<>(GravesLSTM.builder().nIn(20).nOut(20).build(), InputType.recurrent(20, 30))); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java index d3a04cedd..becd89ab2 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/misc/WorkspaceTests.java @@ -469,7 +469,7 @@ public class WorkspaceTests extends BaseDL4JTest { .addLayer("a", GravesLSTM.builder().nOut(300).activation(Activation.HARDTANH).build(), "embeddings") .addVertex("b", new LastTimeStepVertex("in"), "a") .addLayer("c", DenseLayer.builder().nOut(300).activation(Activation.HARDTANH).build(), "b") - .addLayer("output", LossLayer.builder().lossFunction().lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY).build(), "c") + .addLayer("output", LossLayer.builder().lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY.getILossFunction()).build(), "c") .setOutputs("output") .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java index b6212d00a..0db9170fa 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTest.java @@ -1455,10 +1455,10 @@ public class MultiLayerTest extends BaseDL4JTest { NeuralNetConfiguration conf = NeuralNetConfiguration.builder() .l2(0.01) - .list() + .layer(ConvolutionLayer.builder().nIn(depth).nOut(depth).kernelSize(1, 1).build()) - .layer(new Yolo2OutputLayer.Builder() - .boundingBoxPriors(bbPrior) + .layer(Yolo2OutputLayer.builder() + .boundingBoxes(bbPrior) .build()) .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java index 60e6a695c..55af24b0c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningCompGraphTest.java @@ -500,10 +500,10 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .addInputs(inputName) .setOutputs(outputName) .setInputTypes(InputType.inferInputTypes(input)) - .addLayer(firstConv, new Convolution2D.Builder(3, 3) + .addLayer(firstConv, Convolution2D.builder(3, 3) .nOut(10) .build(), inputName) - .addLayer(secondConv, new Convolution2D.Builder(1, 1) + .addLayer(secondConv, Convolution2D.builder(1, 1) .nOut(3) .build(), firstConv) .addLayer(outputName, OutputLayer.builder() @@ -546,11 +546,11 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .addInputs(inputName) .setOutputs(outputName) .setInputTypes(InputType.inferInputTypes(input)) - .addLayer(changeNoutName, new Convolution2D.Builder(1, 1) + .addLayer(changeNoutName, Convolution2D.builder(1, 1) .nOut(10) .build(), inputName) .addLayer(poolName, SubsamplingLayer.builder(1,1).build(), changeNoutName) - .addLayer(afterPoolName, new Convolution2D.Builder(1, 1) + .addLayer(afterPoolName, Convolution2D.builder(1, 1) .nOut(7) .build(), poolName) .addLayer(outputName, OutputLayer.builder() @@ -583,7 +583,7 @@ public class TransferLearningCompGraphTest extends BaseDL4JTest { .graphBuilder() .addInputs("in") .layer("l0", LSTM.builder().nIn(5).nOut(5).build(), "in") - .layer("l1", new RecurrentAttentionLayer.Builder().nHeads(1).headSize(5).nIn(5).nOut(5).build(), "l0") + .layer("l1", RecurrentAttentionLayer.builder().nHeads(1).headSize(5).nIn(5).nOut(5).build(), "l0") .layer("out", RnnOutputLayer.builder().nIn(5).nOut(5).activation(Activation.SOFTMAX).build(), "l1") .setOutputs("out") .build(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java index 1cb19495a..9b3bc0c11 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningHelperTest.java @@ -28,7 +28,6 @@ import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.graph.MergeVertex; import org.deeplearning4j.nn.conf.graph.SubsetVertex; import org.deeplearning4j.nn.conf.layers.DenseLayer; -import org.deeplearning4j.nn.conf.layers.DenseLayer.Builder; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.graph.ComputationGraph; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; @@ -214,9 +213,9 @@ public class TransferLearningHelperTest extends BaseDL4JTest { MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( (NeuralNetConfiguration) overallConf.clone() - .layer(0, new Builder().nIn(4).nOut(3).build()) - .layer(1, new Builder().nIn(3).nOut(2).build()) - .layer(2, new Builder().nIn(2).nOut(3).build()) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) + .layer(1, DenseLayer.builder().nIn(3).nOut(2).build()) + .layer(2, DenseLayer.builder().nIn(2).nOut(3).build()) .layer(3, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) @@ -233,7 +232,7 @@ public class TransferLearningHelperTest extends BaseDL4JTest { Nd4j.hstack(modelToFineTune.getLayer(2).getParams(), modelToFineTune.getLayer(3).getParams()); MultiLayerNetwork notFrozen = new MultiLayerNetwork( (NeuralNetConfiguration) overallConf.clone().list() - .layer(0, new Builder().nIn(2).nOut(3).build()) + .layer(0, DenseLayer.builder().nIn(2).nOut(3).build()) .layer(1, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java index 98f9377e4..1776d0d70 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/transferlearning/TransferLearningMLNTest.java @@ -32,7 +32,7 @@ import org.deeplearning4j.nn.conf.distribution.ConstantDistribution; import org.deeplearning4j.nn.conf.distribution.NormalDistribution; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.*; -import org.deeplearning4j.nn.conf.layers.DenseLayer.Builder; + import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor; import org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor; @@ -74,7 +74,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { MultiLayerNetwork modelToFineTune = new MultiLayerNetwork( (NeuralNetConfiguration) confToChange.list() - .layer(0, new Builder().nIn(4).nOut(3).build()) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) .layer(1, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) @@ -101,7 +101,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .updater(new RmsProp(0.5)).l2(0.4); MultiLayerNetwork expectedModel = new MultiLayerNetwork((NeuralNetConfiguration) confSet.list() - .layer(0, new Builder().nIn(4).nOut(3).build()) + .layer(0, DenseLayer.builder().nIn(4).nOut(3).build()) .layer(1, OutputLayer.builder( LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) @@ -651,8 +651,8 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .weightInit(new ConstantDistribution(666)) .list() .inputType(InputType.inferInputTypes(input)[0]) - .layer(new Convolution2D.Builder(3, 3).nOut(10).build()) - .layer(new Convolution2D.Builder(1, 1).nOut(3).build()) + .layer(Convolution2D.builder(3, 3).nOut(10).build()) + .layer(Convolution2D.builder(1, 1).nOut(3).build()) .layer(OutputLayer.builder().nOut(2).lossFunction(LossFunctions.LossFunction.MSE) .build()).build()); net.init(); @@ -682,9 +682,9 @@ public class TransferLearningMLNTest extends BaseDL4JTest { MultiLayerNetwork net = new MultiLayerNetwork( NeuralNetConfiguration.builder() .list() .inputType(InputType.inferInputTypes(input)[0]) - .layer(new Convolution2D.Builder(1, 1).nOut(10).build()) + .layer(Convolution2D.builder(1, 1).nOut(10).build()) .layer(SubsamplingLayer.builder(1,1).build()) - .layer(new Convolution2D.Builder(1, 1).nOut(7).build()) + .layer(Convolution2D.builder(1, 1).nOut(7).build()) .layer(OutputLayer.builder().activation(Activation.SOFTMAX).nOut(2).build()) .build()); net.init(); @@ -712,7 +712,7 @@ public class TransferLearningMLNTest extends BaseDL4JTest { .weightInit(WeightInit.XAVIER) .list() .layer(LSTM.builder().nOut(8).build()) - .layer( new SelfAttentionLayer.Builder().nOut(4).nHeads(2).projectInput(true).build()) + .layer( SelfAttentionLayer.builder().nOut(4).nHeads(2).projectInput(true).build()) .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.MAX).build()) .layer(OutputLayer.builder().nOut(2).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java index c32e550f3..5dc7bd0a3 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/nn/weights/WeightInitIdentityTest.java @@ -52,7 +52,7 @@ public class WeightInitIdentityTest extends BaseDL4JTest { .graphBuilder() .addInputs(inputName) .setOutputs(output) - .layer(conv, new Convolution1DLayer.Builder(7) + .layer(conv, Convolution1DLayer.builder(7) .convolutionMode(ConvolutionMode.Same) .nOut(input.size(1)) .weightInit(new WeightInitIdentity()) @@ -115,7 +115,7 @@ public class WeightInitIdentityTest extends BaseDL4JTest { .weightInit(new WeightInitIdentity()) .activation(new ActivationIdentity()) .build(), inputName) - .layer(output, new Cnn3DLossLayer.Builder(Convolution3D.DataFormat.NCDHW).activation(new ActivationIdentity()).build(), conv) + .layer(output, Cnn3DLossLayer.builder().dataFormat(Convolution3D.DataFormat.NCDHW).activation(new ActivationIdentity()).build(), conv) .build()); graph.init(); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java index 05da239af..fb8ba0fb1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/util/ModelGuesserTest.java @@ -249,7 +249,7 @@ public class ModelGuesserTest extends BaseDL4JTest { int nOut = 6; NeuralNetConfiguration conf = NeuralNetConfiguration.builder().seed(12345).l1(0.01).l2(0.01) - .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER).list() + .updater(new Sgd(0.1)).activation(Activation.TANH).weightInit(WeightInit.XAVIER) .layer(0, DenseLayer.builder().nIn(nIn).nOut(20).build()) .layer(1, DenseLayer.builder().nIn(20).nOut(30).build()).layer(2, OutputLayer.builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(30).nOut(nOut).build()) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSpaceToDepth.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSpaceToDepth.java index 0b99b511b..81e5d0a77 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSpaceToDepth.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSpaceToDepth.java @@ -56,10 +56,10 @@ public class KerasSpaceToDepth extends KerasLayer { // TODO: we hard-code block size here to import YOLO9000. This size is not available as property // in the hdf5 file outside of the serialized lambda function (that we can't really well deserialize). - SpaceToDepthLayer.Builder builder = new SpaceToDepthLayer.Builder() - .blocks(2) + var builder = SpaceToDepthLayer.builder() + .blockSize(2) //the default data format is tensorflow/NWHC for keras import - .dataFormat(SpaceToDepthLayer.DataFormat.NHWC) + .dataFormat(SpaceToDepthLayer.DataFormat.NHWC.toFormat()) .name(name); this.layer = builder.build(); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java index 3c83220a0..042207a8c 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java @@ -63,7 +63,7 @@ public class KerasUpsampling3D extends KerasLayer { int[] size = KerasConvolutionUtils.getUpsamplingSizeFromConfig(layerConfig, 3, conf); // TODO: make sure to allow different sizes. - Upsampling3D.Builder builder = new Upsampling3D.Builder() + var builder = Upsampling3D.builder() .name(this.name) .dropOut(this.dropout) .size(size[0]); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java index 6d788b4c1..78abac569 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/custom/KerasLRN.java @@ -59,7 +59,7 @@ public class KerasLRN extends KerasLayer { super(layerConfig, enforceTrainingConfig); Map lrnParams = KerasLayerUtils.getInnerLayerConfigFromConfig(layerConfig, conf); - LocalResponseNormalization.Builder builder = LocalResponseNormalization.builder().name(this.name) + var builder = LocalResponseNormalization.builder().name(this.name) .dropOut(this.dropout).alpha((double) lrnParams.get("alpha")) .beta((double) lrnParams.get("beta")).k((int) lrnParams.get("k")).n((int) lrnParams.get("n")); this.layer = builder.build(); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java index a79ad4aa1..39e881326 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java @@ -33,6 +33,7 @@ import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolu import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils; import org.deeplearning4j.nn.params.ConvolutionParamInitializer; import org.deeplearning4j.nn.weights.IWeightInit; +import org.deeplearning4j.nn.weights.WeightInit; import org.nd4j.linalg.api.ndarray.INDArray; import java.util.HashMap; @@ -98,7 +99,7 @@ public class KerasLocallyConnected1D extends KerasConvolution { LocallyConnected1D.LocallyConnected1DBuilder builder = LocallyConnected1D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getActivationFromConfig(layerConfig, conf)) - .weightInit(conf.getKERAS_PARAM_NAME_W(), init) + .weightInit(WeightInit.valueOf(conf.getKERAS_PARAM_NAME_W())) .l1(this.weightL1Regularization).l2(this.weightL2Regularization) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .kernelSize(getKernelSizeFromConfig(layerConfig, 1, conf, kerasMajorVersion)[0]) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java index ba923f75e..1644f9f6f 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java @@ -99,7 +99,7 @@ public class KerasLocallyConnected2D extends KerasConvolution { LocallyConnected2D.LocallyConnected2DBuilder builder = LocallyConnected2D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getActivationFromConfig(layerConfig, conf)) - .weightInit(conf.getKERAS_PARAM_NAME_W(), init) + .weightInit(init.enumValue()) .l1(this.weightL1Regularization).l2(this.weightL2Regularization) .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf)) .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion)) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java index 23de00c39..82016be49 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java @@ -130,12 +130,14 @@ public class KerasBatchNormalization extends KerasLayer { BatchNormalization.BatchNormalizationBuilder builder =BatchNormalization.builder() .name(this.name) .dropOut(this.dropout) - .minibatch(true) + + .isMinibatch(true) .lockGammaBeta(false) .useLogStd(false) .decay(getMomentumFromConfig(layerConfig)) .eps(getEpsFromConfig(layerConfig)); if (betaConstraint != null) + builder.constrainBeta(betaConstraint); if (gammaConstraint != null) builder.constrainGamma(gammaConstraint); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java index 60be152c8..38d0d4f67 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java @@ -58,11 +58,11 @@ public class KerasModelImportTest extends BaseDL4JTest { MultiLayerNetwork model = loadModel("modelimport/keras/weights/conv2dnchw/simpleconv2d.hdf5"); List layerConfigs = model.getNetConfiguration().getFlattenedLayerConfigurations(); ConvolutionLayer convolutionLayer = (ConvolutionLayer) layerConfigs.get(0); - assertEquals(CNN2DFormat.NCHW,convolutionLayer.getDataFormat()); + assertEquals(CNN2DFormat.NCHW,convolutionLayer.getConvFormat()); SubsamplingLayer subsamplingLayer = (SubsamplingLayer) layerConfigs.get(1); assertEquals(CNN2DFormat.NHWC,subsamplingLayer.getDataFormat()); ConvolutionLayer convolutionLayer1 = (ConvolutionLayer) layerConfigs.get(2); - assertEquals(CNN2DFormat.NHWC,convolutionLayer1.getDataFormat()); + assertEquals(CNN2DFormat.NHWC,convolutionLayer1.getConvFormat()); model.output(Nd4j.zeros(1,1,28,28)); assertNotNull(model); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java index a5ab1f512..acf06120d 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java @@ -60,8 +60,8 @@ public class KerasYolo9000PredictTest extends BaseDL4JTest { ComputationGraph model = new TransferLearning.GraphBuilder(graph) .addLayer("outputs", - new org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer.Builder() - .boundingBoxPriors(priors) + org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer.builder() + .boundingBoxes(priors) .build(), "conv2d_23") .setOutputs("outputs") diff --git a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java index 3248748ec..195039c8c 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java @@ -126,7 +126,7 @@ public class KerasLocallyConnected1DTest extends BaseDL4JTest { assertEquals(L1_REGULARIZATION, KerasTestUtils.getL1(layer), 0.0); assertEquals(L2_REGULARIZATION, KerasTestUtils.getL2(layer), 0.0); assertEquals(new Dropout(DROPOUT_DL4J), layer.getDropOut()); - assertEquals(KERNEL_SIZE, layer.getKernel()); + assertEquals(KERNEL_SIZE, layer.getKernelSize()); assertEquals(STRIDE, layer.getStride()); assertEquals(N_OUT, layer.getNOut()); assertEquals(ConvolutionMode.Truncate, layer.getConvolutionMode()); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java index 1462661bb..e3414d39e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/net/brutex/ai/dnn/api/ILayerConfiguration.java @@ -21,6 +21,7 @@ package net.brutex.ai.dnn.api; + public interface ILayerConfiguration { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java index 5a6c88fb9..f7111b153 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetBaseBuilderConfiguration.java @@ -561,13 +561,12 @@ public abstract class NeuralNetBaseBuilderConfiguration implements INeuralNetwor List innerConfigurations$value = new ArrayList<>(); // initialize with an empty list - public B activation(IActivation activation) { + public B activation(Activation activation) { this.activation = activation; return self(); } - - public B activation(Activation activation) { - this.activation = activation.getActivationFunction(); + public B activation(IActivation activation) { + this.activation = activation; return self(); } /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java index 1a5a30e72..e50e1aae5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/graph/AttentionVertex.java @@ -157,9 +157,9 @@ public class AttentionVertex extends SameDiffVertex { val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); - attention = sameDiff.nn.multiHeadDotProductAttention(getLayerName(), queries, keys, values, Wq, Wk, Wv, Wo, mask, true); + attention = sameDiff.nn.multiHeadDotProductAttention(getName(), queries, keys, values, Wq, Wk, Wv, Wo, mask, true); }else{ - attention = sameDiff.nn.dotProductAttention(getLayerName(), queries, keys, values, mask, true); + attention = sameDiff.nn.dotProductAttention(getName(), queries, keys, values, mask, true); } if(maskVars != null){ diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java index 625835b65..48aaacded 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ActivationLayer.java @@ -53,11 +53,11 @@ public class ActivationLayer extends NoParamLayer { public static ActivationLayerBuilder builder(Activation activation) { return innerBuilder().activation(activation); } - public static ActivationLayerBuilder builder(IActivation activation) { return innerBuilder().activation(activation); } + public static ActivationLayerBuilder builder() { return innerBuilder(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java index a5b3a2c87..095d5b3bd 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; +import org.deeplearning4j.nn.api.layers.LayerConstraint; import net.brutex.ai.dnn.api.LayerType; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; @@ -80,19 +81,38 @@ public class BatchNormalization extends FeedForwardLayer { @lombok.Builder.Default protected boolean isMinibatch = true; /** - * Used only when 'true' is passed to {@link #lockGammaBeta(boolean)}. Value is not used otherwise.
Default: + * Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}. Value is not used otherwise.
Default: * 1.0 * * @param gamma Gamma parameter for all activations, used only with locked gamma/beta configuration mode */ @lombok.Builder.Default protected double gamma = 1.0; /** - * Used only when 'true' is passed to {@link #lockGammaBeta(boolean)}. Value is not used otherwise.
Default: + * Used only when 'true' is passed to {@link BatchNormalizationBuilder#lockGammaBeta(boolean)}. Value is not used otherwise.
Default: * 0.0 * * @param beta Beta parameter for all activations, used only with locked gamma/beta configuration mode */ @lombok.Builder.Default protected double beta = 0.0; + /** + * Set constraints to be applied to the beta parameter of this batch normalisation layer. Default: no + * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, + * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have + * been updated. + * + */ + protected List betaConstraints; + + /** + * Set constraints to be applied to the gamma parameter of this batch normalisation layer. Default: no + * constraints.
Constraints can be used to enforce certain conditions (non-negativity of parameters, + * max-norm regularization, etc). These constraints are applied at each iteration, after the parameters have + * been updated. + * + */ + protected List gammaConstraints; + + /** * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed? * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in @@ -298,6 +318,15 @@ public class BatchNormalization extends FeedForwardLayer { this.cudnnAllowFallback$set = true; return self(); } + + public B constrainBeta(LayerConstraint ... constraints) { + this.betaConstraints = List.of(constraints); + return self(); + } + public B constrainGamma(LayerConstraint ... constraints) { + this.gammaConstraints = List.of(constraints); + return self(); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java index f8b87c974..ecbba495e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DLayer.java @@ -51,6 +51,16 @@ import org.nd4j.linalg.api.ndarray.INDArray; @SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class Convolution1DLayer extends ConvolutionLayer { @Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat dataFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons /** * Size of the convolution * diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java index 0dc5a5f02..086fcb677 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution3D.java @@ -26,10 +26,7 @@ import lombok.*; import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.layers.convolution.Convolution3DLayer; import org.deeplearning4j.nn.params.Convolution3DParamInitializer; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java index ecdd6b1c2..8aaea0d98 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java @@ -65,6 +65,18 @@ public class ConvolutionLayer extends FeedForwardLayer { * details Default is {@link ConvolutionMode}.Truncate. */ @Builder.Default protected ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat convFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons + /** * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated * convolutions, which are also known as atrous convolutions. @@ -86,16 +98,7 @@ public class ConvolutionLayer extends FeedForwardLayer { * false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used */ @Builder.Default protected boolean cudnnAllowFallback = true; - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * - * @param format Format for activations (in and out) - */ - @Builder.Default - protected CNN2DFormat dataFormat = - CNN2DFormat.NCHW; // default value for legacy serialization reasons + /** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */ @Builder.Default protected AlgoMode cudnnAlgoMode = AlgoMode.PREFER_FASTEST; @@ -179,7 +182,7 @@ public class ConvolutionLayer extends FeedForwardLayer { nOut, layerIndex, getName(), - dataFormat, + convFormat, ConvolutionLayer.class); } @@ -196,11 +199,11 @@ public class ConvolutionLayer extends FeedForwardLayer { if (!defaultValueOverriden || nIn <= 0 || override) { InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; this.nIn = c.getChannels(); - this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + this.convFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); } - if (dataFormat == null || override) - this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + if (convFormat == null || override) + this.convFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); } @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java index 11319f8e0..06980d565 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DepthwiseConvolution2D.java @@ -53,6 +53,16 @@ public class DepthwiseConvolution2D extends ConvolutionLayer { */ @Builder.Default protected int depthMultiplier = 1; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat dataFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons /** * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). * See {@link CNN2DFormat} for more details.
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java index d774ff656..82d80ab84 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/EmbeddingLayer.java @@ -36,6 +36,7 @@ import org.deeplearning4j.nn.weights.embeddings.ArrayEmbeddingInitializer; import org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer; import org.deeplearning4j.nn.weights.embeddings.WeightInitEmbedding; import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.impl.ActivationIdentity; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -63,7 +64,7 @@ public class EmbeddingLayer extends FeedForwardLayer { */ public static EmbeddingLayerBuilder builder() { return innerBuilder() - .activation(new ActivationIdentity()); + .activation(Activation.IDENTITY); } public static abstract class EmbeddingLayerBuilder> diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java index 356a50e13..179ecd5da 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java @@ -88,7 +88,7 @@ public abstract class LayerConfiguration * Activation#getActivationFunction()} but not vice versa. The default is Identity Activation. */ @Builder.Default - @Getter @Setter private IActivation activation = new ActivationIdentity(); + @Getter @Setter private IActivation activation = Activation.IDENTITY; /** * Get the activation interface (function) from the activation. The activation must have been set @@ -333,9 +333,9 @@ public abstract class LayerConfiguration runInheritance(getNetConfiguration()); } - public static abstract class LayerConfigurationBuilder> { + public static abstract class LayerConfigurationBuilder> { public B activation(Activation activation) { - this.activation$value = activation.getActivationFunction(); + this.activation$value = activation; this.activation$set = true; return self(); } @@ -344,6 +344,7 @@ public abstract class LayerConfiguration this.activation$set = true; return self(); } + public B dropOut(double d) { this.dropOut = new Dropout(d); return self(); @@ -352,6 +353,14 @@ public abstract class LayerConfiguration this.dropOut = d; return self(); } + + public B constrainBias(LayerConstraint constraint) { + return this.biasConstraints(List.of(constraint)); + } + + public B constrainWeights(LayerConstraint constraint) { + return this.weightConstraints(List.of(constraint)); + } } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java index db2da5133..f56dc79ee 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LearnedSelfAttentionLayer.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; @@ -32,223 +34,161 @@ import org.nd4j.autodiff.samediff.SDIndex; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import org.nd4j.common.primitives.Pair; - -import java.util.Map; @Data @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class LearnedSelfAttentionLayer extends SameDiffLayer { - private long nIn; - private long nOut; - private int nHeads; - private long headSize; - private boolean projectInput; - private int nQueries; + private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; + private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; + private static final String WEIGHT_KEY_VALUE_PROJECTION = "Wv"; + private static final String WEIGHT_KEY_OUT_PROJECTION = "Wo"; + private static final String WEIGHT_QUERIES = "Q"; + /** Number of inputs to the layer (input size) */ + private int nIn; + /** Number of outputs (output size) */ + private int nOut; + /** Number of Attention Heads */ + private int nHeads; + /** Size of attention heads */ + private int headSize; + /** Project input before applying attention or not. */ + private boolean projectInput; + /** Number of queries to learn */ + private int nQueries; - private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; - private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; - private static final String WEIGHT_KEY_VALUE_PROJECTION = "Wv"; - private static final String WEIGHT_KEY_OUT_PROJECTION = "Wo"; - private static final String WEIGHT_QUERIES = "Q"; + private LearnedSelfAttentionLayer() { + /*No arg constructor for serialization*/ + } - private LearnedSelfAttentionLayer(){/*No arg constructor for serialization*/} + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); + } - protected LearnedSelfAttentionLayer(Builder builder){ - super(builder); - nIn = builder.nIn; - nOut = builder.nOut; - nHeads = builder.nHeads; - headSize = builder.headSize == 0 ? nOut / nHeads : builder.headSize; - projectInput = builder.projectInput; - nQueries = builder.nQueries; + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for Learned Self Attention layer (layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); + if (nIn <= 0 || override) { + InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; + this.nIn = (int) r.getSize(); + } + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for Learned Self Attention layer (layer index = " + + layerIndex + + ", layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); } - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for Learned Self Attention layer (layer name = \"" + getName() - + "\"): expect RNN input type with size > 0. Got: " + inputType); - } - - if (nIn <= 0 || override) { - InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; - this.nIn = r.getSize(); - } + if (projectInput) { + return InputType.recurrent(nOut, nQueries); + } else { + return InputType.recurrent(nIn, nQueries); } + } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for Learned Self Attention layer (layer index = " + layerIndex - + ", layer name = \"" + getName() + "\"): expect RNN input type with size > 0. Got: " - + inputType); - } + @Override + public void defineParameters(SDLayerParams params) { + params.clear(); - if(projectInput){ - return InputType.recurrent(nOut, nQueries); - }else{ - return InputType.recurrent(nIn, nQueries); - } + params.addWeightParam(WEIGHT_QUERIES, 1, nIn, nQueries); + + if (projectInput) { + params.addWeightParam(WEIGHT_KEY_QUERY_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_KEY_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_VALUE_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_OUT_PROJECTION, nHeads * headSize, nOut); } + } - @Override - public void defineParameters(SDLayerParams params) { - params.clear(); - - params.addWeightParam(WEIGHT_QUERIES, 1, nIn, nQueries); - - if(projectInput){ - params.addWeightParam(WEIGHT_KEY_QUERY_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_KEY_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_VALUE_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_OUT_PROJECTION, nHeads * headSize, nOut); + @Override + public void initializeParameters(Map params) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + for (Map.Entry e : params.entrySet()) { + if (e.getKey().equals(WEIGHT_KEY_OUT_PROJECTION)) { + WeightInitUtil.initWeights( + nIn, headSize, e.getValue().shape(), weightInit, null, 'c', e.getValue()); + } else if (e.getKey().equals(WEIGHT_QUERIES)) { + WeightInitUtil.initWeights( + nIn, nQueries, e.getValue().shape(), weightInit, null, 'c', e.getValue()); + } else { + WeightInitUtil.initWeights( + nHeads * headSize, nOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); } + } } + } - @Override - public void initializeParameters(Map params) { - try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - for (Map.Entry e : params.entrySet()) { - if(e.getKey().equals(WEIGHT_KEY_OUT_PROJECTION)){ - WeightInitUtil.initWeights(nIn, headSize, e.getValue().shape(), weightInit, null, 'c', e.getValue()); - }else if(e.getKey().equals(WEIGHT_QUERIES)){ - WeightInitUtil.initWeights(nIn, nQueries, e.getValue().shape(), weightInit, null, 'c', e.getValue()); - }else{ - WeightInitUtil.initWeights(nHeads * headSize, nOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); - } - } - } + @Override + public SDVariable defineLayer( + SameDiff sameDiff, + SDVariable layerInput, + Map paramTable, + SDVariable mask) { + val baseQueries = paramTable.get(WEIGHT_QUERIES); + val batchSize = layerInput.shape().get(SDIndex.point(0)); + val tileAxis = + sameDiff.scatterUpdate( + sameDiff.onesLike(layerInput.shape()), sameDiff.constant(0), batchSize); + + val queries = sameDiff.tile(baseQueries, tileAxis); + + if (projectInput) { + val Wq = paramTable.get(WEIGHT_KEY_QUERY_PROJECTION); + val Wk = paramTable.get(WEIGHT_KEY_KEY_PROJECTION); + val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); + val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); + + return sameDiff.nn.multiHeadDotProductAttention( + getName(), queries, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true); + } else { + return sameDiff.nn.dotProductAttention( + getName(), queries, layerInput, layerInput, mask, true); } + } + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + // No further mask propagation here, as the results have taken any mask into account, like in a + // global pooling layer + return null; + } - @Override - public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable, SDVariable mask) { - val baseQueries = paramTable.get(WEIGHT_QUERIES); - val batchSize = layerInput.shape().get(SDIndex.point(0)); - val tileAxis = sameDiff.scatterUpdate(sameDiff.onesLike(layerInput.shape()), sameDiff.constant(0), batchSize); + public abstract static class LearnedSelfAttentionLayerBuilder< + C extends LearnedSelfAttentionLayer, B extends LearnedSelfAttentionLayerBuilder> + extends SameDiffLayerBuilder { + public C build() { + Preconditions.checkArgument( + this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); + Preconditions.checkArgument( + this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); + Preconditions.checkArgument( + !this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); + Preconditions.checkArgument( + this.nOut % nHeads == 0 || headSize > 0, + "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); + Preconditions.checkArgument(this.nQueries > 0, "You must set numQueries."); - val queries = sameDiff.tile(baseQueries, tileAxis); - - if(projectInput){ - val Wq = paramTable.get(WEIGHT_KEY_QUERY_PROJECTION); - val Wk = paramTable.get(WEIGHT_KEY_KEY_PROJECTION); - val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); - val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); - - return sameDiff.nn.multiHeadDotProductAttention(getName(), queries, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true); - }else{ - return sameDiff.nn.dotProductAttention(getName(), queries, layerInput, layerInput, mask, true); - } - } - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - // No further mask propagation here, as the results have taken any mask into account, like in a global pooling layer - return null; - } - - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder { - - /** - * Number of inputs to the layer (input size) - */ - private int nIn; - - /** - * Number of outputs (output size) - */ - private int nOut; - - /** - * Number of Attention Heads - */ - private int nHeads; - - /** - * Size of attention heads - */ - private int headSize; - - /** - * Project input before applying attention or not. - */ - private boolean projectInput; - - - /** - * Number of queries to learn - */ - private int nQueries; - - /** - * @param nIn Number of inputs to the layer (input size) - */ - public Builder nIn(int nIn) { - this.nIn = nIn; - return this; - } - - /** - * @param nOut Number of outputs (output size) - */ - public Builder nOut(int nOut) { - this.nOut = nOut; - return this; - } - - /** - * Number of Attention Heads - */ - public Builder nHeads(int nHeads){ - this.nHeads = nHeads; - return this; - } - - /** - * Size of attention heads - */ - public Builder headSize(int headSize){ - this.headSize = headSize; - return this; - } - - /** - * Project input before applying attention or not. - */ - public Builder projectInput(boolean projectInput){ - this.projectInput = projectInput; - return this; - } - - /** - * Number of queries to learn - */ - public Builder nQueries(int nQueries){ - this.nQueries = nQueries; - return this; - } - - @Override - @SuppressWarnings("unchecked") - public LearnedSelfAttentionLayer build() { - Preconditions.checkArgument(this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); - Preconditions.checkArgument(this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); - Preconditions.checkArgument(!this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); - Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); - Preconditions.checkArgument(this.nQueries > 0, "You must set numQueries."); - - return new LearnedSelfAttentionLayer(this); - } + return initBuild(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java index 138ecfd73..2d5b448a5 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected2D.java @@ -41,6 +41,7 @@ import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.enums.PadMode; import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.activations.IActivation; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; @@ -333,6 +334,11 @@ public class LocallyConnected2D extends SameDiffLayer { return self(); } + public B inputSize(int ... size) { + this.inputSize = size; + return self(); + } + public B stride(int ... stride) { this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride"); this.stride$set = true; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java index 8617adf7d..9edb50322 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/PrimaryCapsules.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeConvolutional; @@ -37,388 +39,387 @@ import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv2DConfig; import org.nd4j.linalg.factory.Nd4j; -import java.util.Map; - @Data @NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild", builderMethodName = "innerBuilder") public class PrimaryCapsules extends SameDiffLayer { - private int[] kernelSize; - private int[] stride; - private int[] padding; - private int[] dilation; - private int inputChannels; - private int channels; + private static final String WEIGHT_PARAM = "weight"; + private static final String BIAS_PARAM = "bias"; + /** + * Sets the kernel size of the 2d convolution + * + * @param kernelSize + * @return + */ + @Builder.Default private int[] kernelSize = new int[] {9, 9}; + /** + * Sets the stride of the 2d convolution + * + * @param stride + * @return + */ + @Builder.Default private int[] stride = new int[] {2, 2}; + /** + * Sets the padding of the 2d convolution + * + * @param padding + * @return + */ + @Builder.Default private int[] padding = new int[] {0, 0}; + /** + * Sets the dilation of the 2d convolution + * + * @param dilation + * @return + */ + @Builder.Default private int[] dilation = new int[] {1, 1}; - private boolean hasBias; + private int inputChannels; + /** + * Sets the number of channels to use in the 2d convolution. + * + *

Note that the actual number of channels is channels * capsuleDimensions + * + *

Does the same thing as nOut() + * + * @param channels + * @return + */ + @Builder.Default private int channels = 32; - private int capsules; - private int capsuleDimensions; + @Builder.Default private boolean hasBias = true; + /** + * Usually inferred automatically. + * + * @param capsules + * @return + */ + private int capsules; + /** + * Sets the number of dimensions to use in the capsules. + * + * @param capsuleDimensions + * @return + */ + private int capsuleDimensions; + /** + * The convolution mode to use in the 2d convolution + * + * @param convolutionMode + * @return + */ + @Builder.Default private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + /** + * Whether to use a ReLU activation on the 2d convolution + * + * @param useRelu + * @return + */ + @Builder.Default private boolean useRelU = false; + /** + * Use a LeakyReLU activation on the 2d convolution + * + * @param leak the alpha value for the LeakyReLU activation. + * @return + */ + @Builder.Default private double useLeakyReLU = 0; - private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + public static PrimaryCapsulesBuilder builder() { + return innerBuilder(); + } - private boolean useRelu = false; - private double leak = 0; + public static PrimaryCapsulesBuilder builder( + int capsuleDimensions, + int channels, + int[] kernelSize, + int[] stride, + int[] padding, + int[] dilation, + ConvolutionMode convolutionMode) { + return innerBuilder() + .capsuleDimensions(capsuleDimensions) + .channels(channels) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + .dilation(dilation) + .convolutionMode(convolutionMode); + } - private static final String WEIGHT_PARAM = "weight"; - private static final String BIAS_PARAM = "bias"; + public static PrimaryCapsulesBuilder builder( + int capsuleDimensions, + int channels, + int[] kernelSize, + int[] stride, + int[] padding, + int[] dilation) { + return innerBuilder() + .capsuleDimensions(capsuleDimensions) + .channels(channels) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + .dilation(dilation); + } - public PrimaryCapsules(Builder builder){ - super(builder); + public static PrimaryCapsulesBuilder builder( + int capsuleDimensions, int channels, int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .capsuleDimensions(capsuleDimensions) + .channels(channels) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding); + } - this.kernelSize = builder.kernelSize; - this.stride = builder.stride; - this.padding = builder.padding; - this.dilation = builder.dilation; - this.channels = builder.channels; - this.hasBias = builder.hasBias; - this.capsules = builder.capsules; - this.capsuleDimensions = builder.capsuleDimensions; - this.convolutionMode = builder.convolutionMode; - this.useRelu = builder.useRelu; - this.leak = builder.leak; + public static PrimaryCapsulesBuilder builder( + int capsuleDimensions, int channels, int[] kernelSize, int[] stride) { + return innerBuilder() + .capsuleDimensions(capsuleDimensions) + .channels(channels) + .kernelSize(kernelSize) + .stride(stride); + } - if(capsuleDimensions <= 0 || channels <= 0){ - throw new IllegalArgumentException("Invalid configuration for Primary Capsules (layer name = \"" - + name + "\"):" - + " capsuleDimensions and channels must be > 0. Got: " - + capsuleDimensions + ", " + channels); - } + public static PrimaryCapsulesBuilder builder( + int capsuleDimensions, int channels, int[] kernelSize) { + return innerBuilder() + .capsuleDimensions(capsuleDimensions) + .channels(channels) + .kernelSize(kernelSize); + } - if(capsules < 0){ - throw new IllegalArgumentException("Invalid configuration for Capsule ILayer (layer name = \"" - + name + "\"):" - + " capsules must be >= 0 if set. Got: " - + capsules); - } + public static PrimaryCapsulesBuilder builder(int capsuleDimensions, int channels) { + return innerBuilder().capsuleDimensions(capsuleDimensions).channels(channels); + } + @Override + public SDVariable defineLayer( + SameDiff SD, SDVariable input, Map paramTable, SDVariable mask) { + Conv2DConfig conf = + Conv2DConfig.builder() + .kH(kernelSize[0]) + .kW(kernelSize[1]) + .sH(stride[0]) + .sW(stride[1]) + .pH(padding[0]) + .pW(padding[1]) + .dH(dilation[0]) + .dW(dilation[1]) + .isSameMode(convolutionMode == ConvolutionMode.Same) + .build(); + + SDVariable conved; + + if (hasBias) { + conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), paramTable.get(BIAS_PARAM), conf); + } else { + conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), conf); } - @Override - public SDVariable defineLayer(SameDiff SD, SDVariable input, Map paramTable, SDVariable mask) { - Conv2DConfig conf = Conv2DConfig.builder() - .kH(kernelSize[0]).kW(kernelSize[1]) - .sH(stride[0]).sW(stride[1]) - .pH(padding[0]).pW(padding[1]) - .dH(dilation[0]).dW(dilation[1]) - .isSameMode(convolutionMode == ConvolutionMode.Same) - .build(); - - SDVariable conved; - - if(hasBias){ - conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), paramTable.get(BIAS_PARAM), conf); - } else { - conved = SD.cnn.conv2d(input, paramTable.get(WEIGHT_PARAM), conf); - } - - if(useRelu){ - if(leak == 0) { - conved = SD.nn.relu(conved, 0); - } else { - conved = SD.nn.leakyRelu(conved, leak); - } - } - - SDVariable reshaped = conved.reshape(-1, capsules, capsuleDimensions); - return CapsuleUtils.squash(SD, reshaped, 2); + if (useRelU) { + if (useLeakyReLU == 0) { + conved = SD.nn.relu(conved, 0); + } else { + conved = SD.nn.leakyRelu(conved, useLeakyReLU); + } } - @Override - public void defineParameters(SDLayerParams params) { - params.clear(); - params.addWeightParam(WEIGHT_PARAM, - kernelSize[0], kernelSize[1], inputChannels, (long) capsuleDimensions * channels); + SDVariable reshaped = conved.reshape(-1, capsules, capsuleDimensions); + return CapsuleUtils.squash(SD, reshaped, 2); + } - if(hasBias){ - params.addBiasParam(BIAS_PARAM, (long) capsuleDimensions * channels); + @Override + public void defineParameters(SDLayerParams params) { + params.clear(); + params.addWeightParam( + WEIGHT_PARAM, + kernelSize[0], + kernelSize[1], + inputChannels, + (long) capsuleDimensions * channels); + + if (hasBias) { + params.addBiasParam(BIAS_PARAM, (long) capsuleDimensions * channels); + } + } + + @Override + public void initializeParameters(Map params) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + for (Map.Entry e : params.entrySet()) { + if (BIAS_PARAM.equals(e.getKey())) { + e.getValue().assign(0); + } else if (WEIGHT_PARAM.equals(e.getKey())) { + double fanIn = inputChannels * kernelSize[0] * kernelSize[1]; + double fanOut = + capsuleDimensions + * channels + * kernelSize[0] + * kernelSize[1] + / ((double) stride[0] * stride[1]); + WeightInitUtil.initWeights( + fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); } + } + } + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != Type.CNN) { + throw new IllegalStateException( + "Invalid input for Primary Capsules layer (layer name = \"" + + name + + "\"): expect CNN input. Got: " + + inputType); } - @Override - public void initializeParameters(Map params) { - try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - for (Map.Entry e : params.entrySet()) { - if (BIAS_PARAM.equals(e.getKey())) { - e.getValue().assign(0); - } else if(WEIGHT_PARAM.equals(e.getKey())){ - double fanIn = inputChannels * kernelSize[0] * kernelSize[1]; - double fanOut = capsuleDimensions * channels * kernelSize[0] * kernelSize[1] / ((double) stride[0] * stride[1]); - WeightInitUtil.initWeights(fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', - e.getValue()); - } - } - } + if (capsules > 0) { + return InputType.recurrent(capsules, capsuleDimensions); + } else { + + InputTypeConvolutional out = + (InputTypeConvolutional) + InputTypeUtil.getOutputTypeCnnLayers( + inputType, + kernelSize, + stride, + padding, + dilation, + convolutionMode, + (long) capsuleDimensions * channels, + -1, + getName(), + PrimaryCapsules.class); + + return InputType.recurrent( + (int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions), + capsuleDimensions); + } + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != Type.CNN) { + throw new IllegalStateException( + "Invalid input for Primary Capsules layer (layer name = \"" + + name + + "\"): expect CNN input. Got: " + + inputType); } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != Type.CNN) { - throw new IllegalStateException("Invalid input for Primary Capsules layer (layer name = \"" - + name + "\"): expect CNN input. Got: " + inputType); - } + InputTypeConvolutional ci = (InputTypeConvolutional) inputType; - if(capsules > 0){ - return InputType.recurrent(capsules, capsuleDimensions); - } else { + this.inputChannels = (int) ci.getChannels(); - InputTypeConvolutional out = (InputTypeConvolutional) InputTypeUtil - .getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode, - (long) capsuleDimensions * channels, -1, getName(), PrimaryCapsules.class); + if (capsules <= 0 || override) { - return InputType.recurrent((int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions), - capsuleDimensions); - } + InputTypeConvolutional out = + (InputTypeConvolutional) + InputTypeUtil.getOutputTypeCnnLayers( + inputType, + kernelSize, + stride, + padding, + dilation, + convolutionMode, + (long) capsuleDimensions * channels, + -1, + getName(), + PrimaryCapsules.class); + + this.capsules = + (int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions); + } + } + + public abstract static class PrimaryCapsulesBuilder< + C extends PrimaryCapsules, B extends PrimaryCapsulesBuilder> + extends SameDiffLayerBuilder { + + public B kernelSize(int... kernelSize) { + this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize"); + this.kernelSize$set = true; + return self(); } - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != Type.CNN) { - throw new IllegalStateException("Invalid input for Primary Capsules layer (layer name = \"" - + name + "\"): expect CNN input. Got: " + inputType); - } - - InputTypeConvolutional ci = (InputTypeConvolutional) inputType; - - this.inputChannels = (int) ci.getChannels(); - - if(capsules <= 0 || override) { - - InputTypeConvolutional out = (InputTypeConvolutional) InputTypeUtil - .getOutputTypeCnnLayers(inputType, kernelSize, stride, padding, dilation, convolutionMode, - (long) capsuleDimensions * channels, -1, getName(), PrimaryCapsules.class); - - this.capsules = (int) (out.getChannels() * out.getHeight() * out.getWidth() / capsuleDimensions); - } + public B stride(int... stride) { + this.stride$value = ValidationUtils.validate2NonNegative(stride, true, "stride"); + this.stride$set = true; + return self(); } - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder{ - - @Setter(AccessLevel.NONE) - private int[] kernelSize = new int[]{9, 9}; - - @Setter(AccessLevel.NONE) - private int[] stride = new int[]{2, 2}; - - @Setter(AccessLevel.NONE) - private int[] padding = new int[]{0, 0}; - - @Setter(AccessLevel.NONE) - private int[] dilation = new int[]{1, 1}; - - private int channels = 32; - - private boolean hasBias = true; - - private int capsules; - private int capsuleDimensions; - - private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; - - private boolean useRelu = false; - private double leak = 0; - - - public void setKernelSize(int... kernelSize){ - this.kernelSize = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize"); - } - - public void setStride(int... stride){ - this.stride = ValidationUtils.validate2NonNegative(stride, true, "stride"); - } - - public void setPadding(int... padding){ - this.padding = ValidationUtils.validate2NonNegative(padding, true, "padding"); - } - - public void setDilation(int... dilation){ - this.dilation = ValidationUtils.validate2NonNegative(dilation, true, "dilation"); - } - - - public Builder(int capsuleDimensions, int channels, - int[] kernelSize, int[] stride, int[] padding, int[] dilation, - ConvolutionMode convolutionMode){ - this.capsuleDimensions = capsuleDimensions; - this.channels = channels; - this.setKernelSize(kernelSize); - this.setStride(stride); - this.setPadding(padding); - this.setDilation(dilation); - this.convolutionMode = convolutionMode; - } - - public Builder(int capsuleDimensions, int channels, - int[] kernelSize, int[] stride, int[] padding, int[] dilation){ - this(capsuleDimensions, channels, kernelSize, stride, padding, dilation, ConvolutionMode.Truncate); - } - - public Builder(int capsuleDimensions, int channels, - int[] kernelSize, int[] stride, int[] padding){ - this(capsuleDimensions, channels, kernelSize, stride, padding, new int[]{1, 1}, ConvolutionMode.Truncate); - } - - public Builder(int capsuleDimensions, int channels, - int[] kernelSize, int[] stride){ - this(capsuleDimensions, channels, kernelSize, stride, new int[]{0, 0}, new int[]{1, 1}, ConvolutionMode.Truncate); - } - - public Builder(int capsuleDimensions, int channels, - int[] kernelSize){ - this(capsuleDimensions, channels, kernelSize, new int[]{2, 2}, new int[]{0, 0}, new int[]{1, 1}, ConvolutionMode.Truncate); - } - - public Builder(int capsuleDimensions, int channels){ - this(capsuleDimensions, channels, new int[]{9, 9}, new int[]{2, 2}, new int[]{0, 0}, new int[]{1, 1}, ConvolutionMode.Truncate); - } - - /** - * Sets the kernel size of the 2d convolution - * - * @see ConvolutionLayer.Builder#kernelSize(int...) - * @param kernelSize - * @return - */ - public Builder kernelSize(int... kernelSize){ - this.setKernelSize(kernelSize); - return this; - } - - /** - * Sets the stride of the 2d convolution - * - * @see ConvolutionLayer.Builder#stride(int...) - * @param stride - * @return - */ - public Builder stride(int... stride){ - this.setStride(stride); - return this; - } - - /** - * Sets the padding of the 2d convolution - * - * @see ConvolutionLayer.Builder#padding(int...) - * @param padding - * @return - */ - public Builder padding(int... padding){ - this.setPadding(padding); - return this; - } - - /** - * Sets the dilation of the 2d convolution - * - * @see ConvolutionLayer.Builder#dilation(int...) - * @param dilation - * @return - */ - public Builder dilation(int... dilation){ - this.setDilation(dilation); - return this; - } - - /** - * Sets the number of channels to use in the 2d convolution. - * - * Note that the actual number of channels is channels * capsuleDimensions - * - * Does the same thing as nOut() - * - * @param channels - * @return - */ - public Builder channels(int channels){ - this.channels = channels; - return this; - } - - /** - * Sets the number of channels to use in the 2d convolution. - * - * Note that the actual number of channels is channels * capsuleDimensions - * - * Does the same thing as channels() - * - * @param nOut - * @return - */ - public Builder nOut(int nOut){ - return channels(nOut); - } - - /** - * Sets the number of dimensions to use in the capsules. - * @param capsuleDimensions - * @return - */ - public Builder capsuleDimensions(int capsuleDimensions){ - this.capsuleDimensions = capsuleDimensions; - return this; - } - - /** - * Usually inferred automatically. - * @param capsules - * @return - */ - public Builder capsules(int capsules){ - this.capsules = capsules; - return this; - } - - public Builder hasBias(boolean hasBias){ - this.hasBias = hasBias; - return this; - } - - /** - * The convolution mode to use in the 2d convolution - * @param convolutionMode - * @return - */ - public Builder convolutionMode(ConvolutionMode convolutionMode){ - this.convolutionMode = convolutionMode; - return this; - } - - /** - * Whether to use a ReLU activation on the 2d convolution - * @param useRelu - * @return - */ - public Builder useReLU(boolean useRelu){ - this.useRelu = useRelu; - return this; - } - - /** - * Use a ReLU activation on the 2d convolution - * @return - */ - public Builder useReLU(){ - return useReLU(true); - } - - /** - * Use a LeakyReLU activation on the 2d convolution - * @param leak the alpha value for the LeakyReLU activation. - * @return - */ - public Builder useLeakyReLU(double leak){ - this.useRelu = true; - this.leak = leak; - return this; - } - - @Override - public E build() { - return (E) new PrimaryCapsules(this); - } + public B padding(int... padding) { + this.padding$value = ValidationUtils.validate2NonNegative(padding, true, "padding"); + this.padding$set = true; + return self(); } + + public B dilation(int... dilation) { + this.dilation$value = ValidationUtils.validate2NonNegative(dilation, true, "dilation"); + this.dilation$set = true; + return self(); + } + /** + * Sets the number of channels to use in the 2d convolution. + * + *

Note that the actual number of channels is channels * capsuleDimensions + * + *

Does the same thing as channels() + * + * @param nOut + * @return + */ + public B nOut(int nOut) { + return channels(nOut); + } + /** + * Use a ReLU activation on the 2d convolution + * + * @return + */ + public B useReLU() { + return useRelU(true); + } + + /** + * Use a LeakyReLU activation on the 2d convolution. Implies {@link #useReLU()} set true. + * + * @param leak the alpha value for the LeakyReLU activation. + * @return + */ + public B useLeakyReLU(double leak) { + this.useRelU(true); + this.useLeakyReLU$value = leak; + this.useLeakyReLU$set = true; + return self(); + } + + public C build() { + C l = initBuild(); + if (capsuleDimensions <= 0 || channels$value <= 0) { + throw new IllegalArgumentException( + "Invalid configuration for Primary Capsules (layer name = \"" + + l.getName() + + "\"):" + + " capsuleDimensions and channels must be > 0. Got: " + + capsuleDimensions + + ", " + + channels$value); + } + + if (capsules < 0) { + throw new IllegalArgumentException( + "Invalid configuration for Capsule ILayer (layer name = \"" + + l.getName() + + "\"):" + + " capsules must be >= 0 if set. Got: " + + capsules); + } + return l; + } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java index 67a821b3c..ec2ed14bc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RecurrentAttentionLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.RNNFormat; @@ -41,15 +42,63 @@ import org.nd4j.linalg.factory.Nd4j; import java.util.Map; @Data +@NoArgsConstructor @EqualsAndHashCode(callSuper = true) +@SuperBuilder(buildMethodName = "initBuild") public class RecurrentAttentionLayer extends SameDiffLayer { - private long nIn; - private long nOut; + + public static abstract class RecurrentAttentionLayerBuilder> + extends SameDiffLayerBuilder { + + public C build() { + Preconditions.checkArgument(this.projectInput$value || this.nHeads == 1, "projectInput must be true when nHeads != 1"); + Preconditions.checkArgument(this.projectInput$value || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); + Preconditions.checkArgument(!this.projectInput$value || nOut != 0, "nOut must be specified when projectInput is true"); + Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); + + C l = initBuild(); + return l; + } + } + + /** + * Number of inputs to the layer (input size) + */ + private int nIn; + + /** + * Number of outputs (output size) + */ + private int nOut; + + /** + * Number of Attention Heads + */ private int nHeads; - private long headSize; - private boolean projectInput; - private Activation activation; - private boolean hasBias; + + /** + * Size of attention heads + */ + private int headSize; + + /** + * Project input before applying attention or not. + */ + @Builder.Default + private boolean projectInput = true; + + /** + * If true (default is true) the layer will have a bias + */ + @Builder.Default + private boolean hasBias = true; + + /** + * Activation function for the layer + */ + @Builder.Default + private Activation activation = Activation.TANH; + private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; @@ -60,18 +109,7 @@ public class RecurrentAttentionLayer extends SameDiffLayer { private static final String RECURRENT_WEIGHT_KEY = SimpleRnnParamInitializer.RECURRENT_WEIGHT_KEY; private int timeSteps; - private RecurrentAttentionLayer(){/*No arg constructor for serialization*/} - protected RecurrentAttentionLayer(Builder builder){ - super(builder); - nIn = builder.nIn; - nOut = builder.nOut; - nHeads = builder.nHeads; - headSize = builder.headSize == 0 ? nOut / nHeads : builder.headSize; - projectInput = builder.projectInput; - activation = builder.activation; - hasBias = builder.hasBias; - } @Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { @@ -87,7 +125,7 @@ public class RecurrentAttentionLayer extends SameDiffLayer { if (nIn <= 0 || override) { InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; - this.nIn = r.getSize(); + this.nIn = (int) r.getSize(); } } @@ -206,109 +244,5 @@ public class RecurrentAttentionLayer extends SameDiffLayer { return sameDiff.concat(2, outputSlices); } - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder { - /** - * Number of inputs to the layer (input size) - */ - private int nIn; - - /** - * Number of outputs (output size) - */ - private int nOut; - - /** - * Number of Attention Heads - */ - private int nHeads; - - /** - * Size of attention heads - */ - private int headSize; - - /** - * Project input before applying attention or not. - */ - private boolean projectInput = true; - - /** - * If true (default is true) the layer will have a bias - */ - private boolean hasBias = true; - - /** - * Activation function for the layer - */ - private Activation activation = Activation.TANH; - - /** - * @param nIn Number of inputs to the layer (input size) - */ - public Builder nIn(int nIn) { - this.nIn = nIn; - return this; - } - - /** - * @param nOut Number of outputs (output size) - */ - public Builder nOut(int nOut) { - this.nOut = nOut; - return this; - } - - /** - * Number of Attention Heads - */ - public Builder nHeads(int nHeads){ - this.nHeads = nHeads; - return this; - } - - /** - * Size of attention heads - */ - public Builder headSize(int headSize){ - this.headSize = headSize; - return this; - } - - /** - * Project input before applying attention or not. - */ - public Builder projectInput(boolean projectInput){ - this.projectInput = projectInput; - return this; - } - - /** - * @param hasBias If true (default is true) the layer will have a bias - */ - public Builder hasBias(boolean hasBias) { - this.hasBias = hasBias; - return this; - } - - /** - * @param activation Activation function for the layer - */ - public Builder activation(Activation activation) { - this.activation = activation; - return this; - } - - @Override - @SuppressWarnings("unchecked") - public RecurrentAttentionLayer build() { - Preconditions.checkArgument(this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); - Preconditions.checkArgument(this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); - Preconditions.checkArgument(!this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); - Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); - return new RecurrentAttentionLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java index ab7947201..2f399d5e1 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SelfAttentionLayer.java @@ -20,7 +20,9 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.RNNFormat; import org.deeplearning4j.nn.conf.inputs.InputType; @@ -34,186 +36,130 @@ import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.factory.Nd4j; -import java.util.Map; - @Data @EqualsAndHashCode(callSuper = true) +@NoArgsConstructor() +@SuperBuilder(buildMethodName = "initBuild") public class SelfAttentionLayer extends SameDiffLayer { - private long nIn; - private long nOut; - private int nHeads; - private long headSize; - private boolean projectInput; + private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; + private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; + private static final String WEIGHT_KEY_VALUE_PROJECTION = "Wv"; + private static final String WEIGHT_KEY_OUT_PROJECTION = "Wo"; + /** Number of inputs to the layer (input size) */ + private int nIn; + /** Number of outputs (output size) */ + private int nOut; + /** Number of Attention Heads */ + private int nHeads; + /** Size of attention heads */ + private int headSize; + /** Project input before applying attention or not. */ + private boolean projectInput; - private static final String WEIGHT_KEY_QUERY_PROJECTION = "Wq"; - private static final String WEIGHT_KEY_KEY_PROJECTION = "Wk"; - private static final String WEIGHT_KEY_VALUE_PROJECTION = "Wv"; - private static final String WEIGHT_KEY_OUT_PROJECTION = "Wo"; + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); + } - private SelfAttentionLayer(){/*No arg constructor for serialization*/} - - protected SelfAttentionLayer(Builder builder){ - super(builder); - nIn = builder.nIn; - nOut = builder.nOut; - nHeads = builder.nHeads; - headSize = builder.headSize == 0 ? nOut / nHeads : builder.headSize; - projectInput = builder.projectInput; + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for Self Attention layer (layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); } - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, RNNFormat.NCW, getName()); + if (nIn <= 0 || override) { + InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; + this.nIn = (int) r.getSize(); + } + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for Self Attention layer (layer index = " + + layerIndex + + ", layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); } - @Override - public void setNIn(InputType inputType, boolean override) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for Self Attention layer (layer name = \"" + getName() - + "\"): expect RNN input type with size > 0. Got: " + inputType); - } + InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType; - if (nIn <= 0 || override) { - InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; - this.nIn = r.getSize(); - } + if (projectInput) { + return InputType.recurrent(nOut, itr.getTimeSeriesLength()); + } else { + return InputType.recurrent(nIn, itr.getTimeSeriesLength()); } + } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.RNN) { - throw new IllegalStateException("Invalid input for Self Attention layer (layer index = " + layerIndex - + ", layer name = \"" + getName() + "\"): expect RNN input type with size > 0. Got: " - + inputType); - } + @Override + public void defineParameters(SDLayerParams params) { + params.clear(); - InputType.InputTypeRecurrent itr = (InputType.InputTypeRecurrent) inputType; - - if(projectInput){ - return InputType.recurrent(nOut, itr.getTimeSeriesLength()); - }else{ - return InputType.recurrent(nIn, itr.getTimeSeriesLength()); - } + if (projectInput) { + params.addWeightParam(WEIGHT_KEY_QUERY_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_KEY_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_VALUE_PROJECTION, nHeads, headSize, nIn); + params.addWeightParam(WEIGHT_KEY_OUT_PROJECTION, nHeads * headSize, nOut); } + } - @Override - public void defineParameters(SDLayerParams params) { - params.clear(); - - if(projectInput){ - params.addWeightParam(WEIGHT_KEY_QUERY_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_KEY_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_VALUE_PROJECTION, nHeads, headSize, nIn); - params.addWeightParam(WEIGHT_KEY_OUT_PROJECTION, nHeads * headSize, nOut); + @Override + public void initializeParameters(Map params) { + try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { + for (Map.Entry e : params.entrySet()) { + if (e.getKey().equals(WEIGHT_KEY_OUT_PROJECTION)) { + WeightInitUtil.initWeights( + nIn, headSize, e.getValue().shape(), weightInit, null, 'c', e.getValue()); + } else { + WeightInitUtil.initWeights( + nHeads * headSize, nOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); } + } } + } - @Override - public void initializeParameters(Map params) { - try (MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { - for (Map.Entry e : params.entrySet()) { - if(e.getKey().equals(WEIGHT_KEY_OUT_PROJECTION)){ - WeightInitUtil.initWeights(nIn, headSize, e.getValue().shape(), weightInit, null, 'c', e.getValue()); - }else{ - WeightInitUtil.initWeights(nHeads * headSize, nOut, e.getValue().shape(), weightInit, null, 'c', e.getValue()); - } - } - } + @Override + public SDVariable defineLayer( + SameDiff sameDiff, + SDVariable layerInput, + Map paramTable, + SDVariable mask) { + if (projectInput) { + val Wq = paramTable.get(WEIGHT_KEY_QUERY_PROJECTION); + val Wk = paramTable.get(WEIGHT_KEY_KEY_PROJECTION); + val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); + val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); + + return sameDiff.nn.multiHeadDotProductAttention( + getName(), layerInput, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true); + } else { + return sameDiff.nn.dotProductAttention( + getName(), layerInput, layerInput, layerInput, mask, true); } + } + public abstract static class SelfAttentionLayerBuilder< + C extends SelfAttentionLayer, B extends SelfAttentionLayerBuilder> + extends SameDiffLayerBuilder { + public C build() { + Preconditions.checkArgument( + this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); + Preconditions.checkArgument( + this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); + Preconditions.checkArgument( + !this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); + Preconditions.checkArgument( + this.nOut % nHeads == 0 || headSize > 0, + "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); - @Override - public SDVariable defineLayer(SameDiff sameDiff, SDVariable layerInput, Map paramTable, SDVariable mask) { - if(projectInput){ - val Wq = paramTable.get(WEIGHT_KEY_QUERY_PROJECTION); - val Wk = paramTable.get(WEIGHT_KEY_KEY_PROJECTION); - val Wv = paramTable.get(WEIGHT_KEY_VALUE_PROJECTION); - val Wo = paramTable.get(WEIGHT_KEY_OUT_PROJECTION); - - return sameDiff.nn.multiHeadDotProductAttention(getName(), layerInput, layerInput, layerInput, Wq, Wk, Wv, Wo, mask, true); - }else{ - return sameDiff.nn.dotProductAttention(getName(), layerInput, layerInput, layerInput, mask, true); - } - } - - - @Getter - @Setter - public static class Builder extends SameDiffLayer.Builder { - - /** - * Number of inputs to the layer (input size) - */ - private int nIn; - - /** - * Number of outputs (output size) - */ - private int nOut; - - /** - * Number of Attention Heads - */ - private int nHeads; - - /** - * Size of attention heads - */ - private int headSize; - - /** - * Project input before applying attention or not. - */ - private boolean projectInput; - - /** - * @param nIn Number of inputs to the layer (input size) - */ - public Builder nIn(int nIn) { - this.nIn = nIn; - return this; - } - - /** - * @param nOut Number of outputs (output size) - */ - public Builder nOut(int nOut) { - this.nOut = nOut; - return this; - } - - /** - * Number of Attention Heads - */ - public Builder nHeads(int nHeads){ - this.nHeads = nHeads; - return this; - } - - /** - * Size of attention heads - */ - public Builder headSize(int headSize){ - this.headSize = headSize; - return this; - } - - /** - * Project input before applying attention or not. - */ - public Builder projectInput(boolean projectInput){ - this.projectInput = projectInput; - return this; - } - - @Override - @SuppressWarnings("unchecked") - public SelfAttentionLayer build() { - Preconditions.checkArgument(this.projectInput || this.nHeads == 1, "projectInput must be true when nHeads != 1"); - Preconditions.checkArgument(this.projectInput || nIn == nOut, "nIn must be equal to nOut when projectInput is false"); - Preconditions.checkArgument(!this.projectInput || nOut != 0, "nOut must be specified when projectInput is true"); - Preconditions.checkArgument(this.nOut % nHeads == 0 || headSize > 0, "nOut isn't divided by nHeads cleanly. Specify the headSize manually."); - return new SelfAttentionLayer(this); - } + return initBuild(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java index bc036193c..c6ed235ab 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SeparableConvolution2D.java @@ -63,7 +63,16 @@ public class SeparableConvolution2D extends ConvolutionLayer { * @return Builder */ @Builder.Default private int depthMultiplier = 1; - + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat dataFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons public static SeparableConvolution2DBuilder builder() { return innerBuilder(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java index 13bbe63c1..88a7b2444 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToBatchLayer.java @@ -20,7 +20,10 @@ package org.deeplearning4j.nn.conf.layers; +import java.util.Collection; +import java.util.Map; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -35,195 +38,160 @@ import org.nd4j.common.base.Preconditions; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -import java.util.Collection; -import java.util.Map; - @Data @NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder(builderMethodName = "innerBuilder") public class SpaceToBatchLayer extends NoParamLayer { - // TODO: throw error when block and padding dims don't match + /** + * Block size for SpaceToBatch layer. Should be a length 2 array for the height and width + * dimensions + */ + protected int[] blockSize; + /** A 2d array, with format [[padTop, padBottom], [padLeft, padRight]] */ + @Builder.Default protected int[][] padding = new int[][] {{0, 0}, {0, 0}}; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; - protected int[] blocks; - protected int[][] padding; - protected CNN2DFormat format = CNN2DFormat.NCHW; + public static SpaceToBatchLayerBuilder builder() { + return innerBuilder(); + } + // TODO: throw error when block and padding dims don't match + /** + * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and + * width dimensions + */ + public static SpaceToBatchLayerBuilder builder(int[] blocks) { + return innerBuilder().blockSize(blocks); + } - protected SpaceToBatchLayer(Builder builder) { - super(builder); - this.blocks = builder.blocks; - this.padding = builder.padding; - this.format = builder.format; + /** + * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and + * width dimensions + * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, + * padRight]] + */ + public static SpaceToBatchLayerBuilder builder(int[] blocks, int[][] padding) { + return innerBuilder().blockSize(blocks).padding(padding); + } + + @Override + public SpaceToBatchLayer clone() { + return (SpaceToBatchLayer) super.clone(); + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + + org.deeplearning4j.nn.layers.convolution.SpaceToBatch ret = + new org.deeplearning4j.nn.layers.convolution.SpaceToBatch(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + InputType.InputTypeConvolutional outputType = + (InputType.InputTypeConvolutional) getOutputType(-1, inputType); + + return new LayerMemoryReport.Builder(name, SpaceToBatchLayer.class, inputType, outputType) + .standardMemory(0, 0) // No params + .cacheMemory( + MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) // No caching + .build(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalStateException( + "Invalid input for Subsampling layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); + } + InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType; + return InputType.convolutional( + (i.getHeight() + padding[0][0] + padding[0][1]) / blockSize[0], + (i.getWidth() + padding[1][0] + padding[1][1]) / blockSize[1], + i.getChannels(), + i.getFormat()); + } + + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + Preconditions.checkState( + inputType.getType() == InputType.Type.CNN, + "Only CNN input types can be used with SpaceToBatchLayer, got %s", + inputType); + this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for space to batch layer (layer name=\"" + + getName() + + "\"): input is null"); + } + return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); + } + + @Override + public boolean isPretrainParam(String paramName) { + throw new UnsupportedOperationException("SpaceToBatchLayer does not contain parameters"); + } + + public abstract static class SpaceToBatchLayerBuilder< + C extends SpaceToBatchLayer, B extends SpaceToBatchLayerBuilder> + extends NoParamLayerBuilder { + /** + * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height + * and width dimensions + * @return + */ + public B blockSize(int... blocks) { + this.blockSize = ValidationUtils.validate2NonNegative(blocks, false, "blocks"); + return self(); } - @Override - public SpaceToBatchLayer clone() { - return (SpaceToBatchLayer) super.clone(); + /** + * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, + * padRight]] + * @return + */ + public B padding(int[][] padding) { + this.padding$value = ValidationUtils.validate2x2NonNegative(padding, "padding"); + this.padding$set = true; + return self(); } - - @Override - public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - - org.deeplearning4j.nn.layers.convolution.SpaceToBatch ret = - new org.deeplearning4j.nn.layers.convolution.SpaceToBatch(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType); - - return new LayerMemoryReport.Builder(name, SpaceToBatchLayer.class, inputType, outputType) - .standardMemory(0, 0) //No params - .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching - .build(); - } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Subsampling layer (layer name=\"" + getName() - + "\"): Expected CNN input, got " + inputType); - } - InputType.InputTypeConvolutional i = (InputType.InputTypeConvolutional) inputType; - return InputType.convolutional((i.getHeight() + padding[0][0] + padding[0][1]) / blocks[0], - (i.getWidth() + padding[1][0] + padding[1][1]) / blocks[1], i.getChannels(), i.getFormat()); - } - - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } - - - @Override - public void setNIn(InputType inputType, boolean override) { - Preconditions.checkState(inputType.getType() == InputType.Type.CNN, "Only CNN input types can be used with SpaceToBatchLayer, got %s", inputType); - this.format = ((InputType.InputTypeConvolutional)inputType).getFormat(); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - if (inputType == null) { - throw new IllegalStateException("Invalid input for space to batch layer (layer name=\"" + getName() - + "\"): input is null"); - } - return InputTypeUtil.getPreProcessorForInputTypeCnnLayers(inputType, getName()); - } - - @Override - public boolean isPretrainParam(String paramName) { - throw new UnsupportedOperationException("SpaceToBatchLayer does not contain parameters"); - } - - - @NoArgsConstructor - @Getter - @Setter - public static class Builder> extends LayerConfiguration.Builder { - - /** - * Block size for SpaceToBatch layer. Should be a length 2 array for the height and width - * dimensions - */ - @Setter(AccessLevel.NONE) - protected int[] blocks; - - /** - * A 2d array, with format [[padTop, padBottom], [padLeft, padRight]] - */ - protected int[][] padding; - - protected CNN2DFormat format = CNN2DFormat.NCHW; - - /** - * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width - * dimensions - */ - public void setBlocks(int... blocks) { - this.blocks = ValidationUtils.validate2NonNegative(blocks, false, "blocks"); - } - - /** - * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, padRight]] - */ - public void setPadding(int[][] padding) { - this.padding = ValidationUtils.validate2x2NonNegative(padding, "padding"); - } - - - /** - * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width - * dimensions - */ - public Builder(int[] blocks) { - this.setBlocks(blocks); - this.setPadding(new int[][] {{0, 0}, {0, 0}}); - } - - /** - * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width - * dimensions - * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, padRight]] - */ - public Builder(int[] blocks, int[][] padding) { - this.setBlocks(blocks); - this.setPadding(padding); - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param format Format for activations (in and out) - */ - public T dataFormat(CNN2DFormat format){ - this.format = format; - return (T)this; - } - - /** - * @param blocks Block size for SpaceToBatch layer. Should be a length 2 array for the height and width - * dimensions - */ - public T blocks(int... blocks) { - this.setBlocks(blocks); - return (T) this; - } - - /** - * @param padding Padding - should be a 2d array, with format [[padTop, padBottom], [padLeft, padRight]] - */ - public T padding(int[][] padding) { - this.setPadding(padding); - return (T) this; - } - - @Override - public T name(String layerName) { - this.setLayerName(layerName); - return (T) this; - } - - @Override - @SuppressWarnings("unchecked") - public SpaceToBatchLayer build() { - if(padding == null) - setPadding(new int[][] {{0, 0}, {0, 0}}); - return new SpaceToBatchLayer(this); - } - } - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java index 114fc2753..6b884a5d0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SpaceToDepthLayer.java @@ -21,6 +21,7 @@ package org.deeplearning4j.nn.conf.layers; import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -40,6 +41,7 @@ import java.util.Map; @NoArgsConstructor @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) +@SuperBuilder public class SpaceToDepthLayer extends NoParamLayer { /** @@ -53,16 +55,20 @@ public class SpaceToDepthLayer extends NoParamLayer { return this == NCHW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC; } } - + /** + * @param blockSize Block size + */ protected int blockSize; - protected CNN2DFormat dataFormat; + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * @param dataFormat Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; - protected SpaceToDepthLayer(Builder builder) { - super(builder); - this.setBlockSize(builder.blockSize); - this.setDataFormat(builder.dataFormat); - } @Override public SpaceToDepthLayer clone() { @@ -74,7 +80,7 @@ public class SpaceToDepthLayer extends NoParamLayer { Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - +runInheritance(); org.deeplearning4j.nn.layers.convolution.SpaceToDepth ret = new org.deeplearning4j.nn.layers.convolution.SpaceToDepth(lconf, networkDataType); ret.addTrainingListeners(trainingListeners); @@ -133,78 +139,5 @@ public class SpaceToDepthLayer extends NoParamLayer { } - @NoArgsConstructor - @Getter - @Setter - public static class Builder> extends LayerConfiguration.Builder { - - protected int blockSize; - - /** - * Data format for input activations. Note DL4J uses NCHW in most cases - */ - protected CNN2DFormat dataFormat = CNN2DFormat.NCHW; - - /** - * @param blockSize Block size - */ - public Builder(int blockSize) { - this.setBlockSize(blockSize); - } - - /** - * @param blockSize Block size - * @param dataFormat Data format for input activations. Note DL4J uses NCHW in most cases - */ - @Deprecated - public Builder(int blockSize, DataFormat dataFormat) { - this(blockSize, dataFormat.toFormat()); - } - - public Builder(int blockSize, CNN2DFormat dataFormat) { - this.setBlockSize(blockSize); - this.setDataFormat(dataFormat); - } - - /** - * @param blockSize Block size - */ - public T blocks(int blockSize) { - this.setBlockSize(blockSize); - return (T) this; - } - - /** - * @param dataFormat Data format for input activations. Note DL4J uses NCHW in most cases - * @deprecated Use {@link #dataFormat(CNN2DFormat)} - */ - @Deprecated - public T dataFormat(DataFormat dataFormat) { - return dataFormat(dataFormat.toFormat()); - } - - /** - * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). - * See {@link CNN2DFormat} for more details.
- * Default: NCHW - * @param dataFormat Format for activations (in and out) - */ - public T dataFormat(CNN2DFormat dataFormat) { - this.setDataFormat(dataFormat); - return (T) this; - } - - @Override - public T name(String layerName) { - this.setLayerName(layerName); - return (T) this; - } - - @Override - @SuppressWarnings("unchecked") - public SpaceToDepthLayer build() { - return new SpaceToDepthLayer(this); - } - } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java index b7e88d8be..b67ac6a8b 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/objdetect/Yolo2OutputLayer.java @@ -20,10 +20,14 @@ package org.deeplearning4j.nn.conf.layers.objdetect; -import lombok.Data; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.Setter; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import lombok.*; +import lombok.experimental.SuperBuilder; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.conf.CNN2DFormat; @@ -41,218 +45,139 @@ import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.impl.LossL2; import org.nd4j.serde.jackson.shaded.NDArrayTextSerializer; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; -import com.fasterxml.jackson.databind.annotation.JsonSerialize; - -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Map; @Data @EqualsAndHashCode(callSuper = false) +@SuperBuilder(buildMethodName = "initBuild") public class Yolo2OutputLayer extends LayerConfiguration { - private double lambdaCoord; - private double lambdaNoObj; - private ILossFunction lossPositionScale; - private ILossFunction lossClassPredictions; - @JsonSerialize(using = NDArrayTextSerializer.class) - @JsonDeserialize(using = BoundingBoxesDeserializer.class) - private INDArray boundingBoxes; + /** + * Loss function coefficient for position and size/scale components of the loss function. Default + * (as per paper): 5 + */ + @Builder.Default private double lambdaCoord = 5; + /** + * Loss function coefficient for the "no object confidence" components of the loss function. + * Default (as per paper): 0.5 + */ + @Builder.Default private double lambdaNoObj = 0.5; + /** Loss function for position/scale component of the loss function */ + @Builder.Default private ILossFunction lossPositionScale = new LossL2(); + /** + * Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as + * per the paper), however Loss MCXENT could also be used (which is more common for + * classification). + */ + @Builder.Default private ILossFunction lossClassPredictions = new LossL2(); + ; + /** + * Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows, + * columns] = [N, 2] Note that dimensions should be specified as fraction of grid size. For + * example, a network with 13x13 output, a value of 1.0 would correspond to one grid cell; a value + * of 13 would correspond to the entire image. + */ + @JsonSerialize(using = NDArrayTextSerializer.class) + @JsonDeserialize(using = BoundingBoxesDeserializer.class) + @Builder.Default + private INDArray boundingBoxes; - private CNN2DFormat format = CNN2DFormat.NCHW; //Default for serialization of old formats + @Builder.Default + private CNN2DFormat format = CNN2DFormat.NCHW; // Default for serialization of old formats - private Yolo2OutputLayer() { - //No-arg constructor for Jackson JSON - } + private Yolo2OutputLayer() { + // No-arg constructor for Jackson JSON + } - private Yolo2OutputLayer(Builder builder) { - super(builder); - this.lambdaCoord = builder.lambdaCoord; - this.lambdaNoObj = builder.lambdaNoObj; - this.lossPositionScale = builder.lossPositionScale; - this.lossClassPredictions = builder.lossClassPredictions; - this.boundingBoxes = builder.boundingBoxes; - } + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer ret = + new org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer(lconf, networkDataType); + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } - org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer ret = - new org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer(lconf, networkDataType); - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; - } + @Override + public ParamInitializer initializer() { + return EmptyParamInitializer.getInstance(); + } - @Override - public ParamInitializer initializer() { - return EmptyParamInitializer.getInstance(); - } + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + return inputType; // Same shape output as input + } - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - return inputType; //Same shape output as input - } + @Override + public void setNIn(InputType inputType, boolean override) { + InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; + this.format = c.getFormat(); + } - @Override - public void setNIn(InputType inputType, boolean override) { - InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; - this.format = c.getFormat(); - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - switch (inputType.getType()) { - case FF: - case RNN: - throw new UnsupportedOperationException("Cannot use FF or RNN input types"); - case CNN: - return null; - case CNNFlat: - InputType.InputTypeConvolutionalFlat cf = (InputType.InputTypeConvolutionalFlat) inputType; - return new FeedForwardToCnnPreProcessor(cf.getHeight(), cf.getWidth(), cf.getDepth()); - default: - return null; - } - } - - @Override - public List getRegularizationByParam(String paramName) { - //Not applicable + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + switch (inputType.getType()) { + case FF: + case RNN: + throw new UnsupportedOperationException("Cannot use FF or RNN input types"); + case CNN: + return null; + case CNNFlat: + InputType.InputTypeConvolutionalFlat cf = (InputType.InputTypeConvolutionalFlat) inputType; + return new FeedForwardToCnnPreProcessor(cf.getHeight(), cf.getWidth(), cf.getDepth()); + default: return null; } + } - @Override - public boolean isPretrainParam(String paramName) { - return false; //No params - } - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - long numValues = inputType.arrayElementsPerExample(); - - //This is a VERY rough estimate... - return new LayerMemoryReport.Builder(name, Yolo2OutputLayer.class, inputType, inputType) - .standardMemory(0, 0) //No params - .workingMemory(0, numValues, 0, 6 * numValues).cacheMemory(0, 0) //No cache - .build(); - } - - @Getter - @Setter - public static class Builder extends LayerConfiguration.Builder { - - /** - * Loss function coefficient for position and size/scale components of the loss function. Default (as per - * paper): 5 - * - */ - private double lambdaCoord = 5; - - /** - * Loss function coefficient for the "no object confidence" components of the loss function. Default (as per - * paper): 0.5 - * - */ - private double lambdaNoObj = 0.5; - - /** - * Loss function for position/scale component of the loss function - * - */ - private ILossFunction lossPositionScale = new LossL2(); - - /** - * Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as per the - * paper), however Loss MCXENT could also be used (which is more common for classification). - * - */ - private ILossFunction lossClassPredictions = new LossL2(); - - /** - * Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows, columns] = [N, - * 2] Note that dimensions should be specified as fraction of grid size. For example, a network with 13x13 - * output, a value of 1.0 would correspond to one grid cell; a value of 13 would correspond to the entire - * image. - * - */ - private INDArray boundingBoxes; - - /** - * Loss function coefficient for position and size/scale components of the loss function. Default (as per - * paper): 5 - * - * @param lambdaCoord Lambda value for size/scale component of loss function - */ - public Builder lambdaCoord(double lambdaCoord) { - this.setLambdaCoord(lambdaCoord); - return this; - } - - /** - * Loss function coefficient for the "no object confidence" components of the loss function. Default (as per - * paper): 0.5 - * - * @param lambdaNoObj Lambda value for no-object (confidence) component of the loss function - */ - public Builder lambdaNoObj(double lambdaNoObj) { - this.setLambdaNoObj(lambdaNoObj); - return this; - } - - /** - * Loss function for position/scale component of the loss function - * - * @param lossPositionScale Loss function for position/scale - */ - public Builder lossPositionScale(ILossFunction lossPositionScale) { - this.setLossPositionScale(lossPositionScale); - return this; - } - - /** - * Loss function for the class predictions - defaults to L2 loss (i.e., sum of squared errors, as per the - * paper), however Loss MCXENT could also be used (which is more common for classification). - * - * @param lossClassPredictions Loss function for the class prediction error component of the YOLO loss function - */ - public Builder lossClassPredictions(ILossFunction lossClassPredictions) { - this.setLossClassPredictions(lossClassPredictions); - return this; - } - - /** - * Bounding box priors dimensions [width, height]. For N bounding boxes, input has shape [rows, columns] = [N, - * 2] Note that dimensions should be specified as fraction of grid size. For example, a network with 13x13 - * output, a value of 1.0 would correspond to one grid cell; a value of 13 would correspond to the entire - * image. - * - * @param boundingBoxes Bounding box prior dimensions (width, height) - */ - public Builder boundingBoxPriors(INDArray boundingBoxes) { - this.setBoundingBoxes(boundingBoxes); - return this; - } - - @Override - public Yolo2OutputLayer build() { - if (boundingBoxes == null) { - throw new IllegalStateException("Bounding boxes have not been set"); - } - - if (boundingBoxes.rank() != 2 || boundingBoxes.size(1) != 2) { - throw new IllegalStateException("Bounding box priors must have shape [nBoxes, 2]. Has shape: " - + Arrays.toString(boundingBoxes.shape())); - } - - return new Yolo2OutputLayer(this); - } + @Override + public List getRegularizationByParam(String paramName) { + // Not applicable + return null; + } + + @Override + public boolean isPretrainParam(String paramName) { + return false; // No params + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + long numValues = inputType.arrayElementsPerExample(); + + // This is a VERY rough estimate... + return new LayerMemoryReport.Builder(name, Yolo2OutputLayer.class, inputType, inputType) + .standardMemory(0, 0) // No params + .workingMemory(0, numValues, 0, 6 * numValues) + .cacheMemory(0, 0) // No cache + .build(); + } + + public static abstract class Yolo2OutputLayerBuilder< + C extends Yolo2OutputLayer, B extends Yolo2OutputLayerBuilder> + extends LayerConfigurationBuilder { + public C build() { + if (boundingBoxes$value == null) { + throw new IllegalStateException("Bounding boxes have not been set"); + } + + if (boundingBoxes$value.rank() != 2 || boundingBoxes$value.size(1) != 2) { + throw new IllegalStateException( + "Bounding box priors must have shape [nBoxes, 2]. Has shape: " + + Arrays.toString(boundingBoxes$value.shape())); + } + return initBuild(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java index c89782a92..65866d427 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/SimpleRnn.java @@ -48,7 +48,7 @@ public class SimpleRnn extends BaseRecurrentLayer { * If true (default = false): enable layer normalization on this layer * */ - @lombok.Builder.Default @Accessors + @lombok.Builder.Default @Accessors @Getter private boolean hasLayerNorm = false; diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index 8dd744c9a..86a8038dc 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -20,6 +20,9 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import java.util.Collection; +import java.util.List; +import java.util.Map; import lombok.*; import lombok.experimental.SuperBuilder; import lombok.extern.slf4j.Slf4j; @@ -44,11 +47,6 @@ import org.nd4j.linalg.learning.regularization.L2Regularization; import org.nd4j.linalg.learning.regularization.Regularization; import org.nd4j.linalg.learning.regularization.WeightDecay; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; - @Slf4j @Data @EqualsAndHashCode(callSuper = true, doNotUseGetters = true) @@ -56,291 +54,324 @@ import java.util.Map; @NoArgsConstructor public abstract class AbstractSameDiffLayer extends LayerConfiguration { + /** + * The regularization for the parameters (excluding biases) - for example {@link WeightDecay} + * + *

-- SETTER -- Set the regularization for the parameters (excluding biases) - for example + * {@link WeightDecay} + * + * @param regularization Regularization to apply for the network parameters/weights (excluding + * biases) + */ + protected List regularization; + /** + * The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the + * regularization for the biases only - for example {@link WeightDecay} + * + * @param regularizationBias Regularization to apply for the network biases only + */ + protected List regularizationBias; + /** + * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link + * org.nd4j.linalg.learning.config.Nesterovs} + * + * @param updater Updater to use + */ + protected @Getter @Setter IUpdater updater; + /** + * Gradient updater configuration, for the biases only. If not set, biases will use the updater as + * set by {@link #setUpdater(IUpdater)} + * + * @param biasUpdater Updater to use for bias parameters + */ + protected @Getter @Setter IUpdater biasUpdater; + + protected GradientNormalization gradientNormalization; + protected double gradientNormalizationThreshold = Double.NaN; + + private SDLayerParams layerParams; + + @Override + public List getRegularizationByParam(String paramName) { + if (layerParams.isWeightParam(paramName)) { + return regularization; + } else if (layerParams.isBiasParam(paramName)) { + return regularizationBias; + } + return null; + } + + public SDLayerParams getLayerParams() { + if (layerParams == null) { + layerParams = new SDLayerParams(); + defineParameters(layerParams); + } + return layerParams; + } + + @Override + public void setNIn(InputType inputType, boolean override) { + // Default implementation: no-op + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + // Default implementation: no-op + return null; + } + + public void applyGlobalConfigToLayer( + NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { + // Default implementation: no op + } + + /** + * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String, + * long...)} and {@link SDLayerParams#addBiasParam(String, long...)} + * + * @param params Object used to set parameters for this layer + */ + public abstract void defineParameters(SDLayerParams params); + + /** + * Set the initial parameter values for this layer, if required + * + * @param params Parameter arrays that may be initialized + */ + public abstract void initializeParameters(Map params); + + @Override + public abstract org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType); + + // ================================================================================================================== + + @Override + public ParamInitializer initializer() { + return SameDiffParamInitializer.getInstance(); + } + + @Override + public IUpdater getUpdaterByParam(String paramName) { + if (biasUpdater != null && initializer().isBiasParam(this, paramName)) { + return biasUpdater; + } else if (initializer().isBiasParam(this, paramName) + || initializer().isWeightParam(this, paramName)) { + return updater; + } + throw new IllegalStateException("Unknown parameter key: " + paramName); + } + + @Override + public boolean isPretrainParam(String paramName) { + return false; + } + + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return new LayerMemoryReport(); // TODO + } + + /** + * Returns the memory layout ('c' or 'f' order - i.e., row/column major) of the parameters. In + * most cases, this can/should be left + * + * @param param Name of the parameter + * @return Memory layout ('c' or 'f') of the parameter + */ + public char paramReshapeOrder(String param) { + return 'c'; + } + + protected void initWeights(int fanIn, int fanOut, WeightInit weightInit, INDArray array) { + WeightInitUtil.initWeights( + fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array); + } + + public void applyGlobalConfig(NeuralNetConfiguration.NeuralNetConfigurationBuilder b) { + NeuralNetConfiguration bConf = b.build(); + if (regularization == null || regularization.isEmpty()) { + regularization = bConf.getRegularization(); + } + if (regularizationBias == null || regularizationBias.isEmpty()) { + regularizationBias = bConf.getRegularizationBias(); + } + if (updater == null) { + updater = bConf.getUpdater(); + } + if (biasUpdater == null) { + biasUpdater = bConf.getBiasUpdater(); + } + if (gradientNormalization == null) { + gradientNormalization = bConf.getGradientNormalization(); + } + if (Double.isNaN(gradientNormalizationThreshold)) { + gradientNormalizationThreshold = bConf.getGradientNormalizationThreshold(); + } + + applyGlobalConfigToLayer(b); + } + + /** + * This method generates an "all ones" mask array for use in the SameDiff model when none is + * provided. + * + * @param input Input to the layer + * @return A mask array - should be same datatype as the input (usually) + */ + public INDArray onesMaskForInput(INDArray input) { + if (input.rank() == 2) { + return Nd4j.ones(input.dataType(), input.size(0), 1); + } else if (input.rank() == 3) { + return Nd4j.ones( + input.dataType(), + input.size(0), + input.size(2)); // mask: [mb, length] vs. input [mb, nIn, length] + } else if (input.rank() == 4) { + // CNN style - return [mb, 1, 1, 1] for broadcast... + return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1); + } else if (input.rank() == 5) { + // CNN3D style - return [mb, 1, 1, 1, 1] for broadcast... + return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1, 1); + } else { + throw new IllegalStateException( + "When using masking with rank 1 or 6+ inputs, the onesMaskForInput method must be implemented, " + + "in order to determine the correct mask shape for this layer"); + } + } + + public abstract static class AbstractSameDiffLayerBuilder< + C extends AbstractSameDiffLayer, B extends AbstractSameDiffLayerBuilder> + extends LayerConfigurationBuilder { /** - * The regularization for the parameters (excluding biases) - for example {@link WeightDecay} + * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 + * regularization coefficient for the bias. + */ + public B l1(double l1) { + // Check if existing L1 exists; if so, replace it + NetworkUtils.removeInstances(this.regularization, L1Regularization.class); + if (l1 > 0.0) { + this.regularization.add(new L1Regularization(l1)); + } + return self(); + } + + /** + * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2 + * regularization coefficient for the bias.
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double,boolean)} + * should be preferred to L2 regularization. See {@link WeightDecay} javadoc for further + * details.
+ */ + public B l2(double l2) { + // Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make + // sense to use both + NetworkUtils.removeInstances(this.regularization, L2Regularization.class); + if (l2 > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularization, + WeightDecay.class, + "WeightDecay regularization removed: incompatible with added L2 regularization"); + this.regularization.add(new L2Regularization(l2)); + } + return self(); + } + + /** L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)} */ + public B l1Bias(double l1Bias) { + NetworkUtils.removeInstances(this.regularizationBias, L1Regularization.class); + if (l1Bias > 0.0) { + this.regularizationBias.add(new L1Regularization(l1Bias)); + } + return self(); + } + + /** + * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)}
+ * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double,boolean)} + * should be preferred to L2 regularization. See {@link WeightDecay} javadoc for further + * details.
+ */ + public B l2Bias(double l2Bias) { + NetworkUtils.removeInstances(this.regularizationBias, L2Regularization.class); + if (l2Bias > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularizationBias, + WeightDecay.class, + "WeightDecay bias regularization removed: incompatible with added L2 regularization"); + this.regularizationBias.add(new L2Regularization(l2Bias)); + } + return self(); + } + + /** + * Add weight decay regularization for the network parameters (excluding biases).
+ * This applies weight decay with multiplying the learning rate - see {@link WeightDecay} + * for more details.
* - * -- SETTER -- - * Set the regularization for the parameters (excluding biases) - for example {@link WeightDecay} - * @param regularization Regularization to apply for the network parameters/weights (excluding biases) + * @param coefficient Weight decay regularization coefficient + * @see #weightDecay(double, boolean) */ - protected List regularization; + public B weightDecay(double coefficient) { + return weightDecay(coefficient, true); + } + /** - * The regularization for the biases only - for example {@link WeightDecay} - * -- SETTER -- - * Set the regularization for the biases only - for example {@link WeightDecay} - * @param regularizationBias Regularization to apply for the network biases only - */ - protected List regularizationBias; - /** - * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link - * org.nd4j.linalg.learning.config.Nesterovs} + * Add weight decay regularization for the network parameters (excluding biases). See {@link + * WeightDecay} for more details.
* - * @param updater Updater to use + * @param coefficient Weight decay regularization coefficient + * @param applyLR Whether the learning rate should be multiplied in when performing weight decay + * updates. See {@link WeightDecay} for more details. + * @see #weightDecay(double, boolean) */ - protected @Getter @Setter IUpdater updater; + public B weightDecay(double coefficient, boolean applyLR) { + // Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't + // make sense to use both + NetworkUtils.removeInstances(this.regularization, WeightDecay.class); + if (coefficient > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularization, + L2Regularization.class, + "L2 regularization removed: incompatible with added WeightDecay regularization"); + this.regularization.add(new WeightDecay(coefficient, applyLR)); + } + return self(); + } + /** - * Gradient updater configuration, for the biases only. If not set, biases will use the updater as set by {@link - * #setUpdater(IUpdater)} + * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. This + * applies weight decay with multiplying the learning rate.
* - * @param biasUpdater Updater to use for bias parameters + * @param coefficient Weight decay regularization coefficient + * @see #weightDecayBias(double, boolean) */ - protected @Getter @Setter IUpdater biasUpdater; - - - protected GradientNormalization gradientNormalization; - protected double gradientNormalizationThreshold = Double.NaN; - - private SDLayerParams layerParams; - - @Override - public List getRegularizationByParam(String paramName) { - if(layerParams.isWeightParam(paramName)){ - return regularization; - } else if(layerParams.isBiasParam(paramName)){ - return regularizationBias; - } - return null; - } - - public SDLayerParams getLayerParams() { - if (layerParams == null) { - layerParams = new SDLayerParams(); - defineParameters(layerParams); - } - return layerParams; - } - - @Override - public void setNIn(InputType inputType, boolean override) { - //Default implementation: no-op - } - - @Override - public InputPreProcessor getPreProcessorForInputType(InputType inputType) { - //Default implementation: no-op - return null; - } - - - public void applyGlobalConfigToLayer(NeuralNetConfiguration.NeuralNetConfigurationBuilder globalConfig) { - //Default implementation: no op + public B weightDecayBias(double coefficient) { + return weightDecayBias(coefficient, true); } /** - * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String, long...)} and {@link - * SDLayerParams#addBiasParam(String, long...)} + * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
* - * @param params Object used to set parameters for this layer + * @param coefficient Weight decay regularization coefficient */ - public abstract void defineParameters(SDLayerParams params); - - /** - * Set the initial parameter values for this layer, if required - * - * @param params Parameter arrays that may be initialized - */ - public abstract void initializeParameters(Map params); - - @Override - public abstract org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, - Collection trainingListeners, int layerIndex, INDArray layerParamsView, - boolean initializeParams, DataType networkDataType); - - //================================================================================================================== - - @Override - public ParamInitializer initializer() { - return SameDiffParamInitializer.getInstance(); - } - - @Override - public IUpdater getUpdaterByParam(String paramName) { - if (biasUpdater != null && initializer().isBiasParam(this, paramName)) { - return biasUpdater; - } else if (initializer().isBiasParam(this, paramName) || initializer().isWeightParam(this, paramName)) { - return updater; - } - throw new IllegalStateException("Unknown parameter key: " + paramName); - } - - @Override - public boolean isPretrainParam(String paramName) { - return false; - } - - @Override - public LayerMemoryReport getMemoryReport(InputType inputType) { - return new LayerMemoryReport(); //TODO - } - - /** - * Returns the memory layout ('c' or 'f' order - i.e., row/column major) of the parameters. In most cases, this - * can/should be left - * - * @param param Name of the parameter - * @return Memory layout ('c' or 'f') of the parameter - */ - public char paramReshapeOrder(String param) { - return 'c'; - } - - protected void initWeights(int fanIn, int fanOut, WeightInit weightInit, INDArray array) { - WeightInitUtil.initWeights(fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array); - } - - public void applyGlobalConfig(NeuralNetConfiguration.NeuralNetConfigurationBuilder b) { - NeuralNetConfiguration bConf = b.build(); - if (regularization == null || regularization.isEmpty()) { - regularization = bConf.getRegularization(); - } - if (regularizationBias == null || regularizationBias.isEmpty()) { - regularizationBias = bConf.getRegularizationBias(); - } - if (updater == null) { - updater = bConf.getUpdater(); - } - if (biasUpdater == null) { - biasUpdater = bConf.getBiasUpdater(); - } - if (gradientNormalization == null) { - gradientNormalization = bConf.getGradientNormalization(); - } - if (Double.isNaN(gradientNormalizationThreshold)) { - gradientNormalizationThreshold = bConf.getGradientNormalizationThreshold(); - } - - applyGlobalConfigToLayer(b); - } - - /** - * This method generates an "all ones" mask array for use in the SameDiff model when none is provided. - * @param input Input to the layer - * @return A mask array - should be same datatype as the input (usually) - */ - public INDArray onesMaskForInput(INDArray input){ - if(input.rank() == 2){ - return Nd4j.ones(input.dataType(), input.size(0), 1); - } else if(input.rank() == 3){ - return Nd4j.ones(input.dataType(), input.size(0), input.size(2)); //mask: [mb, length] vs. input [mb, nIn, length] - } else if(input.rank() == 4){ - //CNN style - return [mb, 1, 1, 1] for broadcast... - return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1); - } else if(input.rank() == 5){ - //CNN3D style - return [mb, 1, 1, 1, 1] for broadcast... - return Nd4j.ones(input.dataType(), input.size(0), 1, 1, 1, 1); - } else { - throw new IllegalStateException("When using masking with rank 1 or 6+ inputs, the onesMaskForInput method must be implemented, " + - "in order to determine the correct mask shape for this layer"); - } - } - - public static abstract class AbstractSameDiffLayerBuilder> { - /** - * L1 regularization coefficient (weights only). Use {@link #l1Bias(double)} to configure the l1 regularization - * coefficient for the bias. - */ - public B l1(double l1) { - //Check if existing L1 exists; if so, replace it - NetworkUtils.removeInstances(this.regularization, L1Regularization.class); - if(l1 > 0.0) { - this.regularization.add(new L1Regularization(l1)); - } - return self(); - } - - /** - * L2 regularization coefficient (weights only). Use {@link #l2Bias(double)} to configure the l2 regularization - * coefficient for the bias.
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecay(double,boolean)} should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
- */ - public B l2(double l2) { - //Check if existing L2 exists; if so, replace it. Also remove weight decay - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularization, L2Regularization.class); - if(l2 > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularization, WeightDecay.class, "WeightDecay regularization removed: incompatible with added L2 regularization"); - this.regularization.add(new L2Regularization(l2)); - } - return self(); - } - - /** - * L1 regularization coefficient for the bias. Default: 0. See also {@link #l1(double)} - */ - public B l1Bias(double l1Bias) { - NetworkUtils.removeInstances(this.regularizationBias, L1Regularization.class); - if(l1Bias > 0.0) { - this.regularizationBias.add(new L1Regularization(l1Bias)); - } - return self(); - } - - /** - * L2 regularization coefficient for the bias. Default: 0. See also {@link #l2(double)}
- * Note: Generally, {@link WeightDecay} (set via {@link #weightDecayBias(double,boolean)} should be preferred to - * L2 regularization. See {@link WeightDecay} javadoc for further details.
- */ - public B l2Bias(double l2Bias) { - NetworkUtils.removeInstances(this.regularizationBias, L2Regularization.class); - if(l2Bias > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularizationBias, WeightDecay.class, "WeightDecay bias regularization removed: incompatible with added L2 regularization"); - this.regularizationBias.add(new L2Regularization(l2Bias)); - } - return self(); - } - - /** - * Add weight decay regularization for the network parameters (excluding biases).
- * This applies weight decay with multiplying the learning rate - see {@link WeightDecay} for more details.
- * - * @param coefficient Weight decay regularization coefficient - * @see #weightDecay(double, boolean) - */ - public B weightDecay(double coefficient) { - return weightDecay(coefficient, true); - } - - /** - * Add weight decay regularization for the network parameters (excluding biases). See {@link WeightDecay} for more details.
- * - * @param coefficient Weight decay regularization coefficient - * @param applyLR Whether the learning rate should be multiplied in when performing weight decay updates. See {@link WeightDecay} for more details. - * @see #weightDecay(double, boolean) - */ - public B weightDecay(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularization, WeightDecay.class); - if(coefficient > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularization, L2Regularization.class, "L2 regularization removed: incompatible with added WeightDecay regularization"); - this.regularization.add(new WeightDecay(coefficient, applyLR)); - } - return self(); - } - - /** - * Weight decay for the biases only - see {@link #weightDecay(double)} for more details. - * This applies weight decay with multiplying the learning rate.
- * - * @param coefficient Weight decay regularization coefficient - * @see #weightDecayBias(double, boolean) - */ - public B weightDecayBias(double coefficient) { - return weightDecayBias(coefficient, true); - } - - /** - * Weight decay for the biases only - see {@link #weightDecay(double)} for more details
- * - * @param coefficient Weight decay regularization coefficient - */ - public B weightDecayBias(double coefficient, boolean applyLR) { - //Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't make sense to use both - NetworkUtils.removeInstances(this.regularizationBias, WeightDecay.class); - if(coefficient > 0.0) { - NetworkUtils.removeInstancesWithWarning(this.regularizationBias, L2Regularization.class, "L2 bias regularization removed: incompatible with added WeightDecay regularization"); - this.regularizationBias.add(new WeightDecay(coefficient, applyLR)); - } - return self(); - } + public B weightDecayBias(double coefficient, boolean applyLR) { + // Check if existing weight decay if it exists; if so, replace it. Also remove L2 - it doesn't + // make sense to use both + NetworkUtils.removeInstances(this.regularizationBias, WeightDecay.class); + if (coefficient > 0.0) { + NetworkUtils.removeInstancesWithWarning( + this.regularizationBias, + L2Regularization.class, + "L2 bias regularization removed: incompatible with added WeightDecay regularization"); + this.regularizationBias.add(new WeightDecay(coefficient, applyLR)); + } + return self(); } + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java index accc675d0..9d6144c90 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffVertex.java @@ -176,7 +176,7 @@ public abstract class SameDiffVertex extends GraphVertex implements ITraininable } @Override - public String getLayerName() { + public String getName() { return name; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java index be7be75c8..0c4419a76 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/variational/VariationalAutoencoder.java @@ -285,5 +285,14 @@ public class VariationalAutoencoder extends BasePretrainNetwork { super.nOut(nOut); return self(); } + + public B pzxActivationFunction(IActivation activation) { + this.pzxActivationFunction$value = activation; + this.pzxActivationFunction$set = true; + return self(); + } + public B pzxActivationFunction(Activation activation) { + return this.pzxActivationFunction(activation.getActivationFunction()); + } } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java index 3bb40171a..be4aea4fe 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java @@ -107,7 +107,7 @@ public class ConvolutionLayer extends BaseLayer p = preOutput4d(true, true, workspaceMgr); INDArray z = p.getFirst(); - CNN2DFormat f = getTypedLayerConfiguration().getDataFormat(); + CNN2DFormat f = getTypedLayerConfiguration().getConvFormat(); if(f != CNN2DFormat.NCHW){ z = z.permute(0,3,1,2); //NHWC to NCHW } @@ -159,7 +159,7 @@ public class ConvolutionLayer extends BaseLayer backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); - CNN2DFormat format = getTypedLayerConfiguration().getDataFormat(); + CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); boolean nchw = format == CNN2DFormat.NCHW; if (input.rank() != 4) { throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to Convolution layer with shape " + Arrays.toString(input.shape()) - + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getDataFormat().dimensionNames() + ". " + + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + ". " + layerId()); } INDArray bias; @@ -158,7 +158,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to DepthwiseConvolution2D (layer name = " + layerName + ", layer index = " + index + ") with shape " + Arrays.toString(input.shape()) + ". " - + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getDataFormat().dimensionNames() + "." + + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + "." + (input.rank() == 2 ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" : "") + " " + layerId()); @@ -166,7 +166,7 @@ public class DepthwiseConvolution2DLayer extends ConvolutionLayer { INDArray input = this.input.castTo(dataType); //no-op if correct dtype - CNN2DFormat format = getTypedLayerConfiguration().getDataFormat(); + CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); boolean nchw = format == CNN2DFormat.NCHW; long inDepth = depthWiseWeights.size(2); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java index c6a9bba63..60533ee2a 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SeparableConvolution2DLayer.java @@ -63,7 +63,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { if (input.rank() != 4) { throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to SubsamplingLayer with shape " + Arrays.toString(input.shape()) - + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getDataFormat().dimensionNames() + ". " + + ". Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + ". " + layerId()); } INDArray bias; @@ -74,7 +74,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { INDArray input = this.input.castTo(dataType); - CNN2DFormat format = getTypedLayerConfiguration().getDataFormat(); + CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); boolean nchw = format == CNN2DFormat.NCHW; long miniBatch = input.size(0); @@ -167,7 +167,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { getParamWithNoise(SeparableConvolutionParamInitializer.POINT_WISE_WEIGHT_KEY, training, workspaceMgr); INDArray input = this.input.castTo(dataType); - if(getTypedLayerConfiguration().getDataFormat() == CNN2DFormat.NHWC) { + if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { input = input.permute(0,3,1,2).dup(); } @@ -182,7 +182,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to SeparableConvolution2D (layer name = " + layerName + ", layer index = " + index + ") with shape " + Arrays.toString(input.shape()) + ". " - + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getDataFormat().dimensionNames() + "." + + "Expected rank 4 array with shape " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + "." + (input.rank() == 2 ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" : "") @@ -199,7 +199,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { String s = "Cannot do forward pass in SeparableConvolution2D layer (layer name = " + layerName + ", layer index = " + index + "): input array channels does not match CNN layer configuration" - + " (data format = " + getTypedLayerConfiguration().getDataFormat() + ", data input channels = " + input.size(1) + ", [minibatch,inputDepth,height,width]=" + + " (data format = " + getTypedLayerConfiguration().getConvFormat() + ", data input channels = " + input.size(1) + ", [minibatch,inputDepth,height,width]=" + Arrays.toString(input.shape()) + "; expected" + " input channels = " + inDepth + ") " + layerId(); @@ -287,7 +287,7 @@ public class SeparableConvolution2DLayer extends ConvolutionLayer { .build(); Nd4j.getExecutioner().exec(op); - if(getTypedLayerConfiguration().getDataFormat() == CNN2DFormat.NHWC) { + if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { output = output.permute(0,2,3,1); //NCHW to NHWC } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java index 1e5c7b270..371511075 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/SpaceToBatch.java @@ -47,7 +47,7 @@ public class SpaceToBatch extends AbstractLayer