From 4dc5a116b6affab315079be6b9b0668b50a5727c Mon Sep 17 00:00:00 2001 From: brian Date: Tue, 25 Jul 2023 10:59:46 +0200 Subject: [PATCH] Fixing tests Signed-off-by: brian --- .../ai/nd4j/tests/ExploreParamsTest.java | 167 +++ .../src/test/java/net/brutex/gan/App.java | 49 +- .../java/net/brutex/gan/MnistSimpleGAN.java | 8 +- .../org/nd4j/linalg/api/ndarray/INDArray.java | 14 +- .../java/org/nd4j/linalg/factory/Nd4j.java | 2 +- .../gradientcheck/BNGradientCheckTest.java | 2 +- .../gradientcheck/CNN1DGradientCheckTest.java | 3 +- .../CNN1DNewGradientCheckTest.java | 811 ++++++++++++ .../gradientcheck/CNNGradientCheckTest.java | 4 +- .../KerasAtrousConvolution2D.java | 3 +- .../convolutional/KerasConvolution2D.java | 3 +- .../nn/conf/NeuralNetConfiguration.java | 15 +- .../org/deeplearning4j/nn/conf/RNNFormat.java | 10 +- .../conf/layers/AbstractConvolutionLayer.java | 142 ++ .../conf/layers/BaseLayerConfiguration.java | 10 + .../nn/conf/layers/CapsuleLayer.java | 1 + .../nn/conf/layers/Convolution1D.java | 19 +- .../nn/conf/layers/Convolution1DNew.java | 250 ++++ .../nn/conf/layers/ConvolutionLayer.java | 37 +- .../nn/conf/layers/Deconvolution2D.java | 189 +-- .../nn/conf/layers/DenseLayer.java | 1 + .../nn/conf/layers/LayerConfiguration.java | 15 +- .../nn/conf/layers/LocallyConnected1D.java | 4 + .../nn/conf/layers/NoParamLayer.java | 17 +- .../nn/conf/layers/Subsampling1DLayer.java | 98 +- .../nn/conf/layers/SubsamplingLayer.java | 38 +- .../nn/conf/layers/misc/FrozenLayer.java | 1 + .../samediff/AbstractSameDiffLayer.java | 36 +- .../nn/layers/AbstractLayer.java | 32 +- .../deeplearning4j/nn/layers/BaseLayer.java | 1 + .../deeplearning4j/nn/layers/FrozenLayer.java | 3 + .../convolution/Convolution1DLayer.java | 311 +++-- .../convolution/Convolution1DNewLayer.java | 226 ++++ .../layers/convolution/ConvolutionLayer.java | 1155 ++++++++++------- .../convolution/ConvolutionNewLayer.java | 753 +++++++++++ .../layers/feedforward/dense/DenseLayer.java | 2 +- .../nn/layers/recurrent/RnnOutputLayer.java | 411 +++--- .../nn/layers/wrapper/BaseWrapperLayer.java | 6 +- .../nn/multilayer/MultiLayerNetwork.java | 2 +- .../ConvolutionNewParamInitializer.java | 183 +++ .../util/Convolution1DUtils.java | 560 ++++---- 41 files changed, 4285 insertions(+), 1309 deletions(-) create mode 100644 brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java create mode 100644 cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java create mode 100644 cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java diff --git a/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java b/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java new file mode 100644 index 000000000..1b0d7b840 --- /dev/null +++ b/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java @@ -0,0 +1,167 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package net.brutex.ai.nd4j.tests; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator; +import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator; +import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.OutputLayer; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.junit.jupiter.api.Test; +import org.nd4j.common.primitives.Pair; +import org.nd4j.evaluation.classification.Evaluation; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.lossfunctions.LossFunctions; + +@Slf4j +public class ExploreParamsTest { + + @Test + public void testParam() { + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .seed(12345) + .dataType(DataType.DOUBLE) + .layer( + DenseLayer.builder().nIn(4).nOut(30).name("1. Dense").activation(Activation.TANH)) + .layer(DenseLayer.builder().nIn(30).nOut(10).name("2. Dense")) + // .layer(FrozenLayer.builder(DenseLayer.builder().nOut(6).build()).build()) + + .layer( + OutputLayer.builder() + .nOut(3) + .lossFunction(LossFunctions.LossFunction.MSE) + .activation(Activation.SOFTMAX)) + .build(); + MultiLayerNetwork nn = new MultiLayerNetwork(conf); + nn.init(); + log.info(nn.summary()); + // INDArray input = Nd4j.rand(10,4); + INDArray labels = Nd4j.zeros(9, 3); + + INDArray input = + Nd4j.create( + new double[][] { + {5.15, 3.5, 1.4, 0.21}, // setosa + {4.9, 3.2, 1.4, 0.2}, // setosa + {4.7, 3.2, 1.23, 0.2}, // setosa + {7, 3.25, 4.7, 1.41}, // versicolor + {6.4, 3.2, 4.54, 1.5}, // versicolor + {6.9, 3.1, 4.92, 1.5}, // versicolor + {7.7, 3, 6.1, 2.3}, // virginica + {6.3, 3.4, 5.6, 2.45}, // virginica + {6.4, 3.12, 5.5, 1.8} // virginica + }); + + labels.putScalar(0, 1); + labels.putScalar(3, 1); + labels.putScalar(6, 1); + labels.putScalar(10, 1); + labels.putScalar(13, 1); + labels.putScalar(16, 1); + labels.putScalar(20, 1); + labels.putScalar(23, 1); + labels.putScalar(26, 1); + + IrisDataSetIterator iter = new IrisDataSetIterator(); + //Iterable> it = List.of(new Pair(input, labels)); + List l = new ArrayList<>(); + for (int i=0; i< input.rows(); i++) { + l.add(new Pair(input.getRow(i), labels.getRow(i))); + } + Iterable> it = l; + INDArrayDataSetIterator diter = new INDArrayDataSetIterator(it, 1); + + for (int i = 0; i < 100; i++) { + // nn.fit(input, labels); + // nn.fit( input, labels); + nn.fit(diter); + // nn.feedForward(input); + if(i%20==0) log.info("Score: {}", nn.getScore()); + } + + Evaluation eval = nn.evaluate(iter, List.of("setosa", "vericolor", "virginica")); + + log.info("\n{}", eval.stats()); + } + + @Test + public void testParam2() throws IOException { + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .seed(12345) + .layer( + DenseLayer.builder().nIn(784).nOut(20).name("1. Dense")) + .layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense")) + .layer( + OutputLayer.builder() + .nOut(10) + .lossFunction(LossFunctions.LossFunction.MSE) + .activation(Activation.SOFTMAX)) + .build(); + MultiLayerNetwork nn = new MultiLayerNetwork(conf); + nn.init(); + log.info(nn.summary()); + + NeuralNetConfiguration conf2 = + NeuralNetConfiguration.builder() + .seed(12345) + .layer( + DenseLayer.builder().nIn(784).nOut(20).name("1. Dense").dropOut(0.7)) + .layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense")) + .layer( + OutputLayer.builder() + .nOut(10) + .lossFunction(LossFunctions.LossFunction.MSE) + .activation(Activation.SOFTMAX)) + .build(); + MultiLayerNetwork nn2 = new MultiLayerNetwork(conf2); + nn2.init(); + log.info(nn2.summary()); + + + + MnistDataSetIterator iter = new MnistDataSetIterator(10, 500); + MnistDataSetIterator iter2 = new MnistDataSetIterator(10, 50); + + + for (int i = 0; i < 200; i++) { + nn.fit(iter); + nn2.fit(iter); + if(i%20==0) log.info("Score: {} vs. {}", nn.getScore(), nn2.getScore()); + } + + Evaluation eval = nn.evaluate(iter2); + Evaluation eval2 = nn2.evaluate(iter2); + + log.info("\n{} \n{}", eval.stats(), eval2.stats()); + } +} diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java index 5d06092ab..4243c46e2 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java @@ -45,6 +45,7 @@ import org.datavec.image.transform.PipelineImageTransform; import org.datavec.image.transform.ResizeImageTransform; import org.datavec.image.transform.ShowImageTransform; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; +import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.distribution.Distribution; @@ -65,6 +66,7 @@ import org.deeplearning4j.optimize.listeners.ScoreToChartListener; import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.impl.ActivationLReLU; +import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; @@ -80,11 +82,11 @@ public class App { private static final int X_DIM = 20 ; private static final int Y_DIM = 20; - private static final int CHANNELS = 1; - private static final int batchSize = 10; + private static final int CHANNELS = 3; + private static final int batchSize = 50; private static final int INPUT = 128; - private static final int OUTPUT_PER_PANEL = 4; + private static final int OUTPUT_PER_PANEL = 16; private static final int ARRAY_SIZE_PER_SAMPLE = X_DIM*Y_DIM*CHANNELS; private static final IUpdater UPDATER = Adam.builder().learningRate(LEARNING_RATE).beta1(0.5).build(); @@ -146,7 +148,7 @@ public class App { ActivationLayer.builder(new ActivationLReLU(0.2)).build(), DropoutLayer.builder(1 - 0.5).build(), - OutputLayer.builder().name("dis-output").lossFunction(LossFunction.XENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build() + OutputLayer.builder().name("dis-output").lossFunction(LossFunction.MCXENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build() }; } @@ -196,6 +198,7 @@ public class App { .activation( Activation.IDENTITY ) .layersFromArray( layers ) .inputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) + .dataType(DataType.FLOAT) .build(); ((NeuralNetConfiguration) conf).init(); return conf; @@ -212,9 +215,9 @@ public class App { log.info("\u001B[32m Some \u001B[1m green \u001B[22m text \u001B[0m \u001B[7m Inverted\u001B[0m "); Nd4j.getMemoryManager().setAutoGcWindow(500); -// MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45); - // FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/flowers"), NativeImageLoader.getALLOWED_FORMATS()); - FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/humans"), NativeImageLoader.getALLOWED_FORMATS()); + //MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45); + //FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/flowers"), NativeImageLoader.getALLOWED_FORMATS()); + FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/humans"), NativeImageLoader.getALLOWED_FORMATS()); ImageTransform transform = new ColorConversionTransform(new Random(42), 7 ); @@ -223,7 +226,7 @@ public class App { ImageTransform transform3 = new ResizeImageTransform(X_DIM, Y_DIM); ImageTransform tr = new PipelineImageTransform.Builder() - .addImageTransform(transform) //convert to GREY SCALE + //.addImageTransform(transform) //convert to GREY SCALE .addImageTransform(transform3) //.addImageTransform(transform2) .build(); @@ -270,10 +273,10 @@ public class App { break; } - if(i%20 == 0) { - // frame2 = visualize(new INDArray[]{real}, batchSize, - // frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images - } + //if(i%20 == 0) { + frame2 = visualize(new INDArray[]{real}, batchSize, + frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images + //} real.divi(255f); // int batchSize = (int) real.shape()[0]; @@ -290,7 +293,7 @@ public class App { DataSet data = DataSet.merge(Arrays.asList(realSet, fakeSet)); dis.fit(data); - dis.fit(data); + //dis.fit(data); // Update the discriminator in the GAN network updateGan(gen, dis, gan); @@ -298,7 +301,7 @@ public class App { //gan.fit(new DataSet(Nd4j.rand(batchSize, INPUT), Nd4j.zeros(batchSize, 1))); gan.fit(new DataSet(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1))); - + //Visualize and reporting if (j % 10 == 1) { System.out.println("Iteration " + j + " Visualizing..."); INDArray[] samples = batchSize > OUTPUT_PER_PANEL ? new INDArray[OUTPUT_PER_PANEL] : new INDArray[batchSize]; @@ -320,11 +323,16 @@ public class App { frame = visualize(samples, 1, frame == null ? new JFrame() : frame, false); //each samples only has 1 image, thus batchElements=1 } } - trainData.reset(); + if (trainData.resetSupported()) { + trainData.reset(); + } else { + log.error("Trainingdata {} does not support reset.", trainData.toString()); + } + // Copy the GANs generator to gen. + updateGen(gen, gan); } - // Copy the GANs generator to gen. - updateGen(gen, gan); + gen.save(new File("mnist-mlp-generator.dlj")); } @@ -383,7 +391,12 @@ public class App { } private static JLabel getImage(INDArray tensor, int batchElement, boolean isOrig) { - final BufferedImage bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY); + final BufferedImage bi; + if(CHANNELS>1) { + bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_INT_RGB); //need to change here based on channels + } else { + bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY); //need to change here based on channels + } final int imageSize = X_DIM * Y_DIM; final int offset = batchElement * imageSize; int pxl = offset * CHANNELS; //where to start in the INDArray diff --git a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java index d467f46a4..2cfddb795 100644 --- a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java +++ b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java @@ -24,12 +24,14 @@ package net.brutex.gan; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.DropoutLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.weights.WeightInit; +import org.junit.jupiter.api.Test; import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.impl.ActivationLReLU; import org.nd4j.linalg.api.ndarray.INDArray; @@ -98,7 +100,10 @@ public class MnistSimpleGAN { return new MultiLayerNetwork(discConf); } - + @Test + public void runTest() throws Exception { + main(null); + } public static void main(String[] args) throws Exception { GAN gan = new GAN.Builder() .generator(MnistSimpleGAN::getGenerator) @@ -108,6 +113,7 @@ public class MnistSimpleGAN { .updater(UPDATER) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalizationThreshold(100) + .build(); Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000); diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java index f4d4b200e..680d7e945 100644 --- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java +++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java @@ -2385,11 +2385,15 @@ public interface INDArray extends Serializable, AutoCloseable { */ long[] stride(); - /** - * Return the ordering (fortran or c 'f' and 'c' respectively) of this ndarray - * @return the ordering of this ndarray - */ - char ordering(); + /** + * Return the ordering (fortran or c 'f' and 'c' respectively) of this ndarray

+ * C Is Contiguous layout. Mathematically speaking, row major.
+ * F Is Fortran contiguous layout. Mathematically speaking, column major.
+ * {@see https://en.wikipedia.org/wiki/Row-_and_column-major_order}
+ * + * @return the ordering of this ndarray + */ + char ordering(); /** * Returns the size along a specified dimension diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java index dc4fa4cc4..5895f70e2 100644 --- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java +++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java @@ -5121,7 +5121,7 @@ public class Nd4j { Nd4j.backend = backend; updateNd4jContext(); props = Nd4jContext.getInstance().getConf(); - logger.info("Properties for Nd4jContext " + props); + log.debug("Properties for Nd4jContext {}", props); PropertyParser pp = new PropertyParser(props); String otherDtype = pp.toString(ND4JSystemProperties.DTYPE); diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java index 4ec963b3c..4c9c0c3e1 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java @@ -122,7 +122,7 @@ public class BNGradientCheckTest extends BaseDL4JTest { .dataType(DataType.DOUBLE) .updater(new NoOp()).seed(12345L) .dist(new NormalDistribution(0, 2)).list() - .layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) + .layer(0, Convolution2D.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) .activation(Activation.IDENTITY).build()) .layer(1,BatchNormalization.builder().useLogStd(useLogStd).build()) .layer(2, ActivationLayer.builder().activation(Activation.TANH).build()) diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java index 294b32dd5..b843f576c 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java @@ -91,7 +91,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()) .dist(new NormalDistribution(0, 1)) .convolutionMode(ConvolutionMode.Same) - .list() .layer( Convolution1D.builder() .activation(afn) @@ -435,7 +434,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()) .dist(new NormalDistribution(0, 1)) .convolutionMode(ConvolutionMode.Same) - .list() .layer( 0, Convolution1D.builder() @@ -461,6 +459,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest { .stride(stride) .padding(padding) .pnorm(pnorm) + .name("SubsamplingLayer") .build()) .layer( 3, diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java new file mode 100644 index 000000000..453f12190 --- /dev/null +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java @@ -0,0 +1,811 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.gradientcheck; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.BaseDL4JTest; +import org.deeplearning4j.TestUtils; +import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.distribution.NormalDistribution; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.*; +import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.deeplearning4j.util.Convolution1DUtils; +import org.junit.jupiter.api.Test; +import org.nd4j.common.primitives.Pair; +import org.nd4j.evaluation.classification.Evaluation; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.DataSet; +import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.indexing.NDArrayIndex; +import org.nd4j.linalg.learning.config.NoOp; +import org.nd4j.linalg.lossfunctions.LossFunctions; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +@Slf4j +public class CNN1DNewGradientCheckTest extends BaseDL4JTest { + private static final boolean PRINT_RESULTS = true; + private static final boolean RETURN_ON_FIRST_FAILURE = false; + private static final double DEFAULT_EPS = 1e-6; + private static final double DEFAULT_MAX_REL_ERROR = 1e-3; + private static final double DEFAULT_MIN_ABS_ERROR = 1e-8; + + static { + Nd4j.setDataType(DataType.DOUBLE); + } + + @Test + public void testCnn1D() { + int minibatchSize = 4; + int[] dataChannels = {4, 10}; //the input + int[] kernels = {2,4,5,8}; + int stride = 2; + int padding = 3; + int seriesLength = 300; + + for (int kernel : kernels) { + for (int dChannels : dataChannels) { + int numLabels = ((seriesLength + (2 * padding) - kernel) / stride) + 1; + final NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DNew.builder() + .activation(Activation.RELU) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nIn(dChannels) // channels + .nOut(3) + .rnnDataFormat(RNNFormat.NCW) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(4) + .build()) + .inputType(InputType.recurrent(dChannels, seriesLength)) + .build(); + + INDArray input = Nd4j.rand(minibatchSize, dChannels, seriesLength); + INDArray labels = Nd4j.zeros(minibatchSize, 4, numLabels); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < numLabels; j++) { + labels.putScalar(new int[] {i, i % 4, j}, 1.0); + } + } + final MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + String msg = + "Minibatch=" + + minibatchSize + + ", activationFn=" + + Activation.RELU + + ", kernel = " + + kernel; + + System.out.println(msg); + for (int j = 0; j < net.getnLayers(); j++) + System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); +/** + List> iter = new java.util.ArrayList<>(Collections.emptyList()); + iter.add(new Pair<>(input, labels)); + for(int x=0;x<100; x++) net.fit(input, labels); + Evaluation eval = net.evaluate(new INDArrayDataSetIterator(iter,2), Arrays.asList(new String[]{"One", "Two", "Three", "Four"})); + // net.fit(input, labels); + eval.eval(labels, net.output(input)); + + **/ + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + TestUtils.testModelSerialization(net); + } + } + + + } + + + @Test + public void testCnn1DWithLocallyConnected1D() { + Nd4j.getRandom().setSeed(1337); + + int[] minibatchSizes = {2, 3}; + int length = 25; + int convNIn = 18; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; + + int[] kernels = {1,2,4}; + int stride = 1; + int padding = 0; + + Activation[] activations = {Activation.SIGMOID}; + + for (Activation afn : activations) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < length; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nIn(convNIn) + .nOut(convNOut1) + .rnnDataFormat(RNNFormat.NCW) + .build()) + .layer( + LocallyConnected1D.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nIn(convNOut1) + .nOut(convNOut2) + .hasBias(false) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "Minibatch=" + minibatchSize + ", activationFn=" + afn + ", kernel = " + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + TestUtils.testModelSerialization(net); + } + } + } + } + + @Test + public void testCnn1DWithCropping1D() { + Nd4j.getRandom().setSeed(1337); + + int[] minibatchSizes = {1, 3}; + int length = 7; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; + + int[] kernels = {1, 2, 4}; + int stride = 1; + + int padding = 0; + int cropping = 1; + int croppedLength = length - 2 * cropping; + + Activation[] activations = {Activation.SIGMOID}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, croppedLength); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < croppedLength; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut1) + .build()) + .layer(Cropping1D.builder(cropping).build()) + .layer( + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut2) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "PoolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn + + ", kernel = " + + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + + TestUtils.testModelSerialization(net); + } + } + } + } + } + + @Test + public void testCnn1DWithZeroPadding1D() { + Nd4j.getRandom().setSeed(1337); + + int[] minibatchSizes = {1, 3}; + int length = 7; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; + + int[] kernels = {1, 2, 4}; + int stride = 1; + int pnorm = 2; + + int padding = 0; + int zeroPadding = 2; + int paddedLength = length + 2 * zeroPadding; + + Activation[] activations = {Activation.SIGMOID}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, paddedLength); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < paddedLength; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + Convolution1DNew.builder() + .activation(afn) + .kernelSize(2, kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut1) + .build()) + .layer(ZeroPadding1DLayer.builder(zeroPadding).build()) + .layer( + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut2) + .build()) + .layer(ZeroPadding1DLayer.builder(0).build()) + .layer( + Subsampling1DLayer.builder(poolingType) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .pnorm(pnorm) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "PoolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn + + ", kernel = " + + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + TestUtils.testModelSerialization(net); + } + } + } + } + } + + @Test + public void testCnn1DWithSubsampling1D() { + Nd4j.getRandom().setSeed(12345); + + int[] minibatchSizes = {1, 3}; + int length = 7; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 4; + + int[] kernels = {1, 2, 4}; + int stride = 1; + int padding = 0; + int pnorm = 2; + + Activation[] activations = {Activation.SIGMOID, Activation.TANH}; + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, + SubsamplingLayer.PoolingType.AVG, + SubsamplingLayer.PoolingType.PNORM + }; + + for (Activation afn : activations) { + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (int minibatchSize : minibatchSizes) { + for (int kernel : kernels) { + INDArray input = Nd4j.rand(minibatchSize, convNIn, length); + INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length); + for (int i = 0; i < minibatchSize; i++) { + for (int j = 0; j < length; j++) { + labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0); + } + } + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(ConvolutionMode.Same) + .layer( + 0, + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut1) + .build()) + .layer( + 1, + Convolution1DNew.builder() + .activation(afn) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .nOut(convNOut2) + .build()) + .layer( + 2, + Subsampling1DLayer.builder(poolingType) + .kernelSize(kernel) + .stride(stride) + .padding(padding) + .pnorm(pnorm) + .name("SubsamplingLayer") + .build()) + .layer( + 3, + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + String json = conf.toJson(); + NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json); + assertEquals(conf, c2); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + String msg = + "PoolingType=" + + poolingType + + ", minibatch=" + + minibatchSize + + ", activationFn=" + + afn + + ", kernel = " + + kernel; + + if (PRINT_RESULTS) { + System.out.println(msg); + // for (int j = 0; j < net.getnLayers(); j++) + // System.out.println("ILayer " + j + " # params: " + + // net.getLayer(j).numParams()); + } + + boolean gradOK = + GradientCheckUtil.checkGradients( + net, + DEFAULT_EPS, + DEFAULT_MAX_REL_ERROR, + DEFAULT_MIN_ABS_ERROR, + PRINT_RESULTS, + RETURN_ON_FIRST_FAILURE, + input, + labels); + + assertTrue(gradOK, msg); + TestUtils.testModelSerialization(net); + } + } + } + } + } + + @Test + public void testCnn1dWithMasking() { + int length = 12; + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 3; + + int pnorm = 2; + + SubsamplingLayer.PoolingType[] poolingTypes = + new SubsamplingLayer.PoolingType[] { + SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG + }; + + for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { + for (ConvolutionMode cm : + new ConvolutionMode[] {ConvolutionMode.Same, ConvolutionMode.Truncate}) { + for (int stride : new int[] {1, 2}) { + String s = cm + ", stride=" + stride + ", pooling=" + poolingType; + log.info("Starting test: " + s); + Nd4j.getRandom().setSeed(12345); + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .activation(Activation.TANH) + .dist(new NormalDistribution(0, 1)) + .convolutionMode(cm) + .seed(12345) + .layer( + Convolution1DNew.builder() + .kernelSize(2) + .rnnDataFormat(RNNFormat.NCW) + .stride(stride) + .nIn(convNIn) + .nOut(convNOut1) + .build()) + .layer( + Subsampling1DLayer.builder(poolingType) + .kernelSize(2) + .stride(stride) + .pnorm(pnorm) + .build()) + .layer( + Convolution1DNew.builder() + .kernelSize(2) + .rnnDataFormat(RNNFormat.NCW) + .stride(stride) + .nIn(convNOut1) + .nOut(convNOut2) + .build()) + .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build()) + .layer( + OutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray f = Nd4j.rand(2, convNIn, length); + INDArray fm = Nd4j.create(2, length); + fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1); + fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, 6)).assign(1); + + INDArray label = TestUtils.randomOneHot(2, finalNOut); + + boolean gradOK = + GradientCheckUtil.checkGradients( + new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm)); + + assertTrue(gradOK, s); + TestUtils.testModelSerialization(net); + + // TODO also check that masked step values don't impact forward pass, score or gradients + + DataSet ds = new DataSet(f, label, fm, null); + double scoreBefore = net.score(ds); + net.setInput(f); + net.setLabels(label); + net.setLayerMaskArrays(fm, null); + net.computeGradientAndScore(); + INDArray gradBefore = net.getFlattenedGradients().dup(); + f.putScalar(1, 0, 10, 10.0); + f.putScalar(1, 1, 11, 20.0); + double scoreAfter = net.score(ds); + net.setInput(f); + net.setLabels(label); + net.setLayerMaskArrays(fm, null); + net.computeGradientAndScore(); + INDArray gradAfter = net.getFlattenedGradients().dup(); + + assertEquals(scoreBefore, scoreAfter, 1e-6); + assertEquals(gradBefore, gradAfter); + } + } + } + } + + @Test + public void testCnn1Causal() throws Exception { + int convNIn = 2; + int convNOut1 = 3; + int convNOut2 = 4; + int finalNOut = 3; + + int[] lengths = {11, 12, 13, 9, 10, 11}; + int[] kernels = {2, 3, 2, 4, 2, 3}; + int[] dilations = {1, 1, 2, 1, 2, 1}; + int[] strides = {1, 2, 1, 2, 1, 1}; + boolean[] masks = {false, true, false, true, false, true}; + boolean[] hasB = {true, false, true, false, true, true}; + for (int i = 0; i < lengths.length; i++) { + int length = lengths[i]; + int k = kernels[i]; + int d = dilations[i]; + int st = strides[i]; + boolean mask = masks[i]; + boolean hasBias = hasB[i]; + // TODO has bias + String s = "k=" + k + ", s=" + st + " d=" + d + ", seqLen=" + length; + log.info("Starting test: " + s); + Nd4j.getRandom().setSeed(12345); + + NeuralNetConfiguration conf = + NeuralNetConfiguration.builder() + .dataType(DataType.DOUBLE) + .updater(new NoOp()) + .activation(Activation.TANH) + .weightInit(new NormalDistribution(0, 1)) + .seed(12345) + .layer( + Convolution1DNew.builder() + .kernelSize(k) + .dilation(d) + .hasBias(hasBias) + .convolutionMode(ConvolutionMode.Causal) + .stride(st) + .nOut(convNOut1) + .build()) + .layer( + Convolution1DNew.builder() + .kernelSize(k) + .dilation(d) + .convolutionMode(ConvolutionMode.Causal) + .stride(st) + .nOut(convNOut2) + .build()) + .layer( + RnnOutputLayer.builder() + .lossFunction(LossFunctions.LossFunction.MCXENT) + .activation(Activation.SOFTMAX) + .nOut(finalNOut) + .build()) + .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW)) + .build(); + + MultiLayerNetwork net = new MultiLayerNetwork(conf); + net.init(); + + INDArray f = Nd4j.rand(DataType.DOUBLE, 2, convNIn, length); + INDArray fm = null; + if (mask) { + fm = Nd4j.create(2, length); + fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1); + fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, length - 2)).assign(1); + } + + long outSize1 = Convolution1DUtils.getOutputSize(length, k, st, 0, ConvolutionMode.Causal, d); + long outSize2 = + Convolution1DUtils.getOutputSize(outSize1, k, st, 0, ConvolutionMode.Causal, d); + + INDArray label = TestUtils.randomOneHotTimeSeries(2, finalNOut, (int) outSize2); + + String msg = + "Minibatch=" + + 1 + + ", activationFn=" + + Activation.RELU + + ", kernel = " + + k; + + System.out.println(msg); + for (int j = 0; j < net.getnLayers(); j++) + System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams()); + + boolean gradOK = + GradientCheckUtil.checkGradients( + new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm)); + + assertTrue(gradOK, s); + TestUtils.testModelSerialization(net); + } + } +} diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java index a3ef0c082..f071baca5 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java @@ -108,8 +108,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest { .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .seed(12345L) - .list() - .layer(0, ConvolutionLayer.builder(1, 1).nOut(6).activation(afn).build()) + + .layer(0, Convolution2D.builder().kernelSize(1).stride(1).nOut(6).activation(afn).build()) .layer(1, OutputLayer.builder(lf).activation(outputActivation).nOut(3).build()) .inputType(InputType.convolutionalFlat(1, 4, 1)); diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java index 419d74490..6f6fa25b3 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java @@ -24,6 +24,7 @@ import lombok.val; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Convolution2D; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; @@ -85,7 +86,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution { IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(), enforceTrainingConfig, conf, kerasMajorVersion); - val builder = ConvolutionLayer.builder().name(this.name) + val builder = Convolution2D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) diff --git a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java index 7eb2c62a0..ff1e73819 100644 --- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java +++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java @@ -28,6 +28,7 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Convolution2D; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; @@ -95,7 +96,7 @@ public class KerasConvolution2D extends KerasConvolution { LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig( layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion); - final var builder = ConvolutionLayer.builder().name(this.name) + final var builder = Convolution2D.builder().name(this.name) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .activation(getIActivationFromConfig(layerConfig, conf)) .weightInit(init) diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java index f7a586313..fcdb56125 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java @@ -222,6 +222,14 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { // TODO do not put inside self to avoid serialization issues // innerConfigurations.add(0, this); //put this configuration at first place + + getLayerConfigurations().stream() + .forEach( + lconf -> + lconf.setNetConfiguration( + this)); // set this as net config for all layers (defined in here, not stacked + + /** * Inherit network wide configuration setting to those layer configurations that do not have an * individual setting (nor a default) @@ -230,11 +238,6 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { lconf.runInheritance(); } - getLayerConfigurations().stream() - .forEach( - lconf -> - lconf.setNetConfiguration( - this)); // set this as net config for all layers (defined in here, not stacked // Validate BackpropType setting if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH) @@ -326,7 +329,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration { LayerConfiguration layer = getFlattenedLayerConfigurations().get(i - 1); // convolution 1d is an edge case where it has rnn input type but the filters // should be the output - if (layer instanceof Convolution1D) { + if (layer instanceof Convolution1D || layer instanceof Convolution1DNew) { if (l instanceof DenseLayer && getInputType() instanceof InputType.InputTypeRecurrent) { FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l; if (getInputType() instanceof InputType.InputTypeRecurrent) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java index 84a2d1c3a..918bcf696 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java @@ -21,7 +21,13 @@ package org.deeplearning4j.nn.conf; +/** + * N is the batch size
+ * C is the number of feature maps (that is,, number of channels)
+ * H is the image height (not used for 1D conv as this is an RNN format
+ * W is the image width
+ * **/ public enum RNNFormat implements DataFormat { - NCW, - NWC + /** n=batch size; c=channels/ features; w=width **/ NCW, + /** n=batch size; w=width; c=channels/ features **/ NWC } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java new file mode 100644 index 000000000..dc90bb8f3 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java @@ -0,0 +1,142 @@ +/* + * + * ****************************************************************************** + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + * + */ + +package org.deeplearning4j.nn.conf.layers; + +import java.util.Arrays; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import lombok.*; +import lombok.experimental.Accessors; +import lombok.experimental.SuperBuilder; +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.conf.CNN2DFormat; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.util.ValidationUtils; + +/** + * ConvolutionLayer nIn in the input layer is the number of channels nOut is the number of filters + * to be used in the net or in other words the channels The builder specifies the filter/kernel + * size, the stride and padding The pooling layer takes the kernel size + * + *

Supports multiple dimensions: In 1D CNN, kernel moves in 1 direction. Input and output data of + * 1D CNN is 2 dimensional. Mostly used on Time-Series data. + * + *

In 2D CNN, kernel moves in 2 directions. Input and output data of 2D CNN is 3 dimensional. + * Mostly used on Image data. + * + *

In 3D CNN, kernel moves in 3 directions. Input and output data of 3D CNN is 4 dimensional. + * Mostly used on 3D Image data (MRI, CT Scans, Video). + */ +@ToString(callSuper = true) +@NoArgsConstructor +@EqualsAndHashCode(callSuper = true) +@Slf4j +@SuperBuilder +public abstract class AbstractConvolutionLayer extends FeedForwardLayer { + /** The kernel of this convolution with size in each n-dimensions */ + @Getter private int[] kernelSize; + /** The stride */ + @Getter private int[] stride; + /** The padding */ + @Getter private int[] padding; + /** The dilation */ + @Getter private int[] dilation; + /** If true (default): include bias parameters in the model. False: no bias. */ + @Builder.Default + @Getter + @Accessors(fluent = true) + @Setter + private boolean hasBias = true; + /** + * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more + * details Default is {@link ConvolutionMode}.Truncate. + */ + @Builder.Default @Getter @Setter + private ConvolutionMode convolutionMode = ConvolutionMode.Truncate; + /** + * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation + * be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If + * false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used + */ + @Getter @Setter @Builder.Default private boolean cudnnAllowFallback = true; + + /** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */ + @Getter @Setter @Builder.Default private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST; + + @Getter @Setter private ConvolutionLayer.FwdAlgo cudnnFwdAlgo; + @Getter @Setter private ConvolutionLayer.BwdFilterAlgo cudnnBwdFilterAlgo; + @Getter @Setter private ConvolutionLayer.BwdDataAlgo cudnnBwdDataAlgo; + + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default @Getter @Setter + private CNN2DFormat convFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons + + + /** + * Number of parameters this layer has a result of its configuration. + * + * @return number or parameters + */ + @Override + public long numParams() { + var kern = 1; + for (int i : getKernelSize()) { + kern = kern * i; + } + return nIn * nOut * kern + (hasBias() ? nOut : 0); + } + + public abstract static class AbstractConvolutionLayerBuilder< + C extends AbstractConvolutionLayer, B extends AbstractConvolutionLayerBuilder> + extends FeedForwardLayerBuilder { + + public B kernelSize(int @NonNull ... kernelSize) { + if (this.kernelSize != null) { + log.warn("You are setting the kernel more than once, last call with override prior calls."); + } + this.kernelSize = kernelSize; + return self(); + } + + public B stride(int @NonNull ... stride) { + this.stride = stride; + return self(); + } + + public B padding(int @NonNull ... padding) { + this.padding = padding; + return self(); + } + + public B dilation(int @NonNull ... dilation) { + this.dilation = dilation; + return self(); + } + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java index 9f0bf7798..502e5a5e4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java @@ -52,6 +52,16 @@ import org.nd4j.linalg.learning.regularization.WeightDecay; @SuperBuilder public abstract class BaseLayerConfiguration extends LayerConfiguration implements ITraininableLayerConfiguration, Serializable, Cloneable { + /** + * Number of parameters this layer has a result of its configuration. This default implementation + * calls {@link #initializer()}.numParams( this ). + * + * @return number or parameters + */ + @Override + public long numParams() { + return initializer().numParams(this); + } /** * Set constraints to be applied to all layers. Default: no constraints.
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java index 89c38520e..de7b1bf86 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java @@ -45,6 +45,7 @@ import org.nd4j.linalg.factory.Nd4j; @NoArgsConstructor public class CapsuleLayer extends SameDiffLayer { + private static final String WEIGHT_PARAM = "weight"; private static final String BIAS_PARAM = "bias"; /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java index ad686088b..21ab00585 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java @@ -36,22 +36,17 @@ import org.deeplearning4j.util.ValidationUtils; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; -/* + //TODO: We will eventually want to NOT subclass off of ConvolutionLayer. //Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1 - * This approach treats a multivariate time series with L timesteps and +/** + * This approach treats a multivariate time series with L time steps and * P variables as an L x 1 x P image (L rows high, 1 column wide, P * channels deep). The kernel should be H> + C extends Convolution1D, B extends Convolution1DBuilder> extends ConvolutionLayerBuilder { @Override diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java new file mode 100644 index 000000000..93f8f2065 --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java @@ -0,0 +1,250 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.nn.conf.layers; + +import java.util.Collection; +import java.util.Map; +import lombok.*; +import lombok.experimental.SuperBuilder; +import lombok.extern.jackson.Jacksonized; +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.nn.api.ParamInitializer; +import org.deeplearning4j.nn.conf.CNN2DFormat; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; +import org.deeplearning4j.nn.params.ConvolutionNewParamInitializer; +import org.deeplearning4j.optimize.api.TrainingListener; +import org.deeplearning4j.util.Convolution1DUtils; +import org.deeplearning4j.util.ValidationUtils; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; + +// TODO: We will eventually want to NOT subclass off of ConvolutionLayer. +// Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1 + +/** + * This approach treats a multivariate time series with L time steps and P variables as an L x 1 x P + * image (L rows high, 1 column wide, P channels deep). The kernel should be HIn 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions. Input and output data of + * 1D CNN is 2-dimensional. Mostly used on Time-Series data. + */ +@Data +@Slf4j +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = true) +@Jacksonized +@SuperBuilder +public class Convolution1DNew extends AbstractConvolutionLayer { + + /** + * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last). + * See {@link CNN2DFormat} for more details.
+ * Default: NCHW + * + * @param format Format for activations (in and out) + */ + @Builder.Default + protected CNN2DFormat dataFormat = + CNN2DFormat.NCHW; // default value for legacy serialization reasons + + @Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW; + + @Override + public ParamInitializer initializer() { + return ConvolutionNewParamInitializer.getInstance(); + } + + @Override + public org.deeplearning4j.nn.api.Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + setNetConfiguration(conf); + LayerValidation.assertNInNOutSet("Convolution1D", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + /* + Object ret; + try { + ret = lconf.getCanConfigure() + .getConstructor(LayerConfiguration.class, DataType.class) + .newInstance(new Object[] { lconf, networkDataType }); + } catch (Exception e) { + throw new RuntimeException(e); + + */ + org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer ret = + new org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer(lconf, networkDataType); + + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(this); + return ret; + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for 1D CNN layer (layer index = " + + layerIndex + + ", layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); + } + InputType.InputTypeRecurrent it = (InputType.InputTypeRecurrent) inputType; + long inputTsLength = it.getTimeSeriesLength(); + long outLength; + if (inputTsLength < 0) { + // Probably: user did InputType.recurrent(x) without specifying sequence length + outLength = -1; + } else { + outLength = + Convolution1DUtils.getOutputSize( + inputTsLength, + getKernelSize()[0], + getStride()[0], + getPadding()[0], + getConvolutionMode(), + getDilation()[0]); + } + + return InputType.recurrent(nOut, outLength, rnnDataFormat); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType == null || inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException( + "Invalid input for 1D CNN layer (layer name = \"" + + getName() + + "\"): expect RNN input type with size > 0. Got: " + + inputType); + } + + InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType; + if (nIn <= 0 || override) { + this.nIn = r.getSize(); + } + if (this.rnnDataFormat == null || override) this.rnnDataFormat = r.getFormat(); + + if (this.dataFormat == null || override) + this.dataFormat = rnnDataFormat == RNNFormat.NCW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC; + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + if (inputType == null) { + throw new IllegalStateException( + "Invalid input for Convolution1D layer (layer name=\"" + + getName() + + "\"): input is null"); + } + + return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat, getName()); + } + + /** + * This is a report of the estimated memory consumption for the given layer + * + * @param inputType Input type to the layer. Memory consumption is often a function of the input + * type + * @return Memory report for the layer + */ + @Override + public LayerMemoryReport getMemoryReport(InputType inputType) { + return null; + } + + protected boolean allowCausal() { + return true; + } + + private static final class Convolution1DNewBuilderImpl + extends Convolution1DNewBuilder { + public Convolution1DNew build() { + Convolution1DNew l = new Convolution1DNew(this); + if (l.getDilation() == null) { + dilation(1, 1); + } + if (l.getPadding() == null) { + padding(0); + } + l = new Convolution1DNew(this); + + Convolution1DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()[0]); + Convolution1DUtils.validateCnn1DKernelStridePadding( + l.getKernelSize()[0], l.getStride()[0], l.getPadding()[0]); + l.initializeConstraints(); + return l; + } + } + + public abstract static class Convolution1DNewBuilder< + C extends Convolution1DNew, B extends Convolution1DNewBuilder> + extends AbstractConvolutionLayerBuilder { + private int dimensions(Class arrayType) { + return arrayType.isArray() ? 1 + dimensions(arrayType.getComponentType()) : 0; + } + + @Override + public B kernelSize(int @NonNull ... kernel) { + // Todo, we always provide arrays, but only first element is really used + if (dimensions(kernel.getClass()) > 1) + log.warn( + "Kernel size has '{}' dimensions, only using first dimensions for 1D convolution layer.", + dimensions(kernel.getClass())); + super.kernelSize( + ValidationUtils.validate1NonNegative(new int[] {kernel[0]}, "kernelSize")[0], 1); + return self(); + } + + public B padding(int @NonNull ... padding) { + // Todo, we always provide arrays, but only first element is really used + super.padding(ValidationUtils.validate1NonNegative(new int[] {padding[0]}, "padding")); + + return self(); + } + + public B dilation(int @NonNull ... dilation) { + // Todo, we always provide arrays, but only first element is really used + super.dilation(ValidationUtils.validate1NonNegative(new int[] {dilation[0]}, "dilation")); + return self(); + } + + public B stride(int @NonNull ... stride) { + // Todo, we always provide arrays, but only first element is really used + super.stride(ValidationUtils.validate1NonNegative(new int[] {stride[0]}, "stride")[0], 1); + return self(); + } + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java index 8435701ff..0e76f5776 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java @@ -61,6 +61,23 @@ import org.nd4j.linalg.api.ndarray.INDArray; @EqualsAndHashCode(callSuper = true) @SuperBuilder(builderMethodName = "innerBuilder") public class ConvolutionLayer extends FeedForwardLayer { + + public static ConvolutionLayerBuilder builder() { + return innerBuilder(); + } + + public static ConvolutionLayerBuilder builder(int... kernelSize) { + return innerBuilder().kernelSize(kernelSize); + } + + public static ConvolutionLayerBuilder builder(int[] kernelSize, int[] stride) { + return innerBuilder().kernelSize(kernelSize).stride(stride); + } + + public static ConvolutionLayerBuilder builder( + int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding); + } /** * Size of the convolution rows/columns * @@ -122,23 +139,6 @@ public class ConvolutionLayer extends FeedForwardLayer { @Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter private boolean defaultValueOverriden = false; - public static ConvolutionLayerBuilder builder() { - return innerBuilder(); - } - - public static ConvolutionLayerBuilder builder(int... kernelSize) { - return innerBuilder().kernelSize(kernelSize); - } - - public static ConvolutionLayerBuilder builder(int[] kernelSize, int[] stride) { - return innerBuilder().kernelSize(kernelSize).stride(stride); - } - - public static ConvolutionLayerBuilder builder( - int[] kernelSize, int[] stride, int[] padding) { - return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding); - } - public boolean hasBias() { return hasBias; } @@ -429,6 +429,7 @@ public class ConvolutionLayer extends FeedForwardLayer { } } + /* private static final class ConvolutionLayerBuilderImpl extends ConvolutionLayerBuilder { public ConvolutionLayer build() { @@ -473,6 +474,6 @@ public class ConvolutionLayer extends FeedForwardLayer { return l; } } - +*/ } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java index 06da29a8e..62fab4f7f 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java @@ -38,110 +38,131 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; /** - * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of filters to be used - * in the net or in other words the channels The builder specifies the filter/kernel size, the stride and padding - * The pooling layer takes the kernel size + * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of + * filters to be used in the net or in other words the channels The builder specifies the + * filter/kernel size, the stride and padding The pooling layer takes the kernel size */ @Data @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) -@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") +@Jacksonized +@SuperBuilder public class Deconvolution2D extends ConvolutionLayer { + @Builder.Default private CNN2DFormat format = CNN2DFormat.NCHW; -@Builder.Default -private CNN2DFormat format = CNN2DFormat.NCHW; - protected boolean allowCausal() { - //Causal convolution - allowed for 1D only - return false; + protected boolean allowCausal() { + // Causal convolution - allowed for 1D only + return false; + } + + public boolean hasBias() { + return isHasBias(); + } + + @Override + public Deconvolution2D clone() { + Deconvolution2D clone = (Deconvolution2D) super.clone(); + if (clone.getKernelSize() != null) { + clone.setKernelSize(clone.getKernelSize().clone()); + } + if (clone.getStride() != null) { + clone.setStride(clone.getStride().clone()); + } + if (clone.getPadding() != null) { + clone.setPadding(clone.getPadding().clone()); + } + return clone; + } + + @Override + public Layer instantiate( + NeuralNetConfiguration conf, + Collection trainingListeners, + int layerIndex, + INDArray layerParamsView, + boolean initializeParams, + DataType networkDataType) { + setNetConfiguration(conf); + LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + + org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret = + new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType); + + ret.addTrainingListeners(trainingListeners); + ret.setIndex(layerIndex); + ret.setParamsViewArray(layerParamsView); + Map paramTable = initializer().init(this, layerParamsView, initializeParams); + ret.setParamTable(paramTable); + ret.setLayerConfiguration(lconf); + return ret; + } + + @Override + public ParamInitializer initializer() { + return DeconvolutionParamInitializer.getInstance(); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType == null || inputType.getType() != InputType.Type.CNN) { + throw new IllegalStateException( + "Invalid input for Convolution layer (layer name=\"" + + getName() + + "\"): Expected CNN input, got " + + inputType); } - private static final class Deconvolution2DBuilderImpl extends Deconvolution2DBuilder { - public Deconvolution2D build() { - Deconvolution2D l = new Deconvolution2D(this); - l.initializeConstraints(); - return l; - } + return InputTypeUtil.getOutputTypeDeconvLayer( + inputType, + getKernelSize(), + getStride(), + getPadding(), + getDilation(), + getConvolutionMode(), + nOut, + layerIndex, + getName(), + Deconvolution2DLayer.class); + } + + private static final class Deconvolution2DBuilderImpl + extends Deconvolution2DBuilder { + public Deconvolution2D build() { + Deconvolution2D l = new Deconvolution2D(this); + l.initializeConstraints(); + return l; } - public static abstract class Deconvolution2DBuilder> extends ConvolutionLayerBuilder { + } + public abstract static class Deconvolution2DBuilder< + C extends Deconvolution2D, B extends Deconvolution2DBuilder> + extends ConvolutionLayerBuilder { - - @Override - public B kernelSize(int... kernelSize) { - super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize")); - return self(); - } - @Override - public B stride(int... stride) { - super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride")); - return self(); - } - @Override - public B padding(int... padding) { - super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding")); - return self(); - } - @Override - public B dilation(int... dilation) { - super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation")); - return self(); - } - } - public boolean hasBias() { - return isHasBias(); + @Override + public B kernelSize(int... kernelSize) { + super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize")); + return self(); } @Override - public Deconvolution2D clone() { - Deconvolution2D clone = (Deconvolution2D) super.clone(); - if (clone.getKernelSize() != null) { - clone.setKernelSize( clone.getKernelSize().clone()); - } - if (clone.getStride() != null) { - clone.setStride( clone.getStride().clone()); - } - if (clone.getPadding() != null) { - clone.setPadding( clone.getPadding().clone()); - } - return clone; + public B stride(int... stride) { + super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride")); + return self(); } @Override - public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, - int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { - setNetConfiguration(conf); - LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut()); - LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); - runInheritance(); - - org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret = - new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType); - - ret.addTrainingListeners(trainingListeners); - ret.setIndex(layerIndex); - ret.setParamsViewArray(layerParamsView); - Map paramTable = initializer().init(this, layerParamsView, initializeParams); - ret.setParamTable(paramTable); - ret.setLayerConfiguration(lconf); - return ret; + public B padding(int... padding) { + super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding")); + return self(); } @Override - public ParamInitializer initializer() { - return DeconvolutionParamInitializer.getInstance(); + public B dilation(int... dilation) { + super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation")); + return self(); } - - @Override - public InputType getOutputType(int layerIndex, InputType inputType) { - if (inputType == null || inputType.getType() != InputType.Type.CNN) { - throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getName() - + "\"): Expected CNN input, got " + inputType); - } - - return InputTypeUtil.getOutputTypeDeconvLayer(inputType, getKernelSize(), getStride(), getPadding(), getDilation(), getConvolutionMode(), - nOut, layerIndex, getName(), Deconvolution2DLayer.class); - } - - + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java index 7044a3987..752435358 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java @@ -63,6 +63,7 @@ public class DenseLayer extends FeedForwardLayer { LayerValidation.assertNInNOutSet( "DenseLayerConfiguration", getName(), layerIndex, getNIn(), getNOut()); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + lconf.setNetConfiguration(conf); runInheritance(); org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret = diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java index e63f4efdd..5c04fa32c 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java @@ -31,6 +31,7 @@ import lombok.experimental.SuperBuilder; import lombok.extern.slf4j.Slf4j; import net.brutex.ai.dnn.api.ILayerConfiguration; import net.brutex.ai.dnn.api.LayerType; +import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.conf.InputPreProcessor; @@ -56,7 +57,7 @@ import org.nd4j.linalg.learning.regularization.Regularization; @NoArgsConstructor // @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") @Slf4j -@SuperBuilder +@SuperBuilder(toBuilder = true) public abstract class LayerConfiguration implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration @@ -66,10 +67,20 @@ public abstract class LayerConfiguration @Getter @Setter protected List biasConstraints; @Getter @Setter protected List constraints; @Getter @Setter protected IWeightNoise weightNoise; - @Builder.Default private @Getter @Setter LinkedHashSet variables = new LinkedHashSet<>(); + @Builder.Default private @Getter @Setter @NonNull LinkedHashSet variables = new LinkedHashSet<>(); @Getter @Setter private IDropout dropOut; /** The type of the layer, basically defines the base class and its properties */ @Builder.Default @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN; + + /** + * Number of parameters this layer has a result of its configuration + * @return number or parameters + */ + public long numParams() { + return initializer().numParams(this); + } + + /** * A reference to the neural net configuration. This field is excluded from json serialization as * well as from equals check to avoid circular referenced. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java index 990892655..8367bc965 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java @@ -22,6 +22,8 @@ package org.deeplearning4j.nn.conf.layers; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import java.util.*; + +import com.fasterxml.jackson.annotation.JsonProperty; import lombok.*; import lombok.experimental.SuperBuilder; import lombok.extern.jackson.Jacksonized; @@ -59,10 +61,12 @@ public class LocallyConnected1D extends SameDiffLayer { /** * @param nIn Number of inputs to the layer (input size) */ + @JsonProperty("nIn") private long nIn; /** * @param nOut Number of outputs (output size) */ + @JsonProperty("nOut") private long nOut; /** * @param activation Activation function for the layer diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java index a6242ea0c..f05fe3c36 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java @@ -34,6 +34,16 @@ import org.nd4j.linalg.learning.regularization.Regularization; @SuperBuilder public abstract class NoParamLayer extends LayerConfiguration { + /** + * Number of parameters this layer. This will always return 0 + * + * @return 0 + */ + @Override + public long numParams() { + return 0; + } + @Override public ParamInitializer initializer() { return EmptyParamInitializer.getInstance(); @@ -58,6 +68,7 @@ public abstract class NoParamLayer extends LayerConfiguration { /** * Will always return no-Op updater. + * * @return */ @Override @@ -65,7 +76,7 @@ public abstract class NoParamLayer extends LayerConfiguration { return Updater.NONE.getIUpdaterWithDefaultConfig(); } - public static abstract class NoParamLayerBuilder> - extends LayerConfigurationBuilder - {} + public abstract static class NoParamLayerBuilder< + C extends NoParamLayer, B extends NoParamLayerBuilder> + extends LayerConfigurationBuilder {} } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java index 0188b30d2..582c8b1b4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java @@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers; import java.util.Collection; import java.util.Map; import lombok.EqualsAndHashCode; +import lombok.NonNull; import lombok.ToString; import lombok.experimental.SuperBuilder; import lombok.extern.jackson.Jacksonized; @@ -35,6 +36,7 @@ import org.deeplearning4j.optimize.api.TrainingListener; import org.deeplearning4j.util.Convolution1DUtils; import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.ValidationUtils; +import org.jetbrains.annotations.NotNull; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; @@ -50,9 +52,91 @@ import org.nd4j.linalg.api.ndarray.INDArray; @ToString(callSuper = true) @EqualsAndHashCode(callSuper = true) @Jacksonized -@SuperBuilder +@SuperBuilder(builderMethodName = "innerBuilder") public class Subsampling1DLayer extends SubsamplingLayer { + public static Subsampling1DLayerBuilder builder() { + return innerBuilder(); + } + + public static Subsampling1DLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) { + return innerBuilder() + .poolingType(poolingType); + } + + public static Subsampling1DLayerBuilder builder(PoolingType poolingType) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()); + } + + public static Subsampling1DLayerBuilder builder(int... kernelSize) { + return innerBuilder() + .kernelSize(kernelSize); + } + + public static Subsampling1DLayerBuilder builder(int[] kernelSize, int[] stride) { + return innerBuilder() + .kernelSize(kernelSize) + .stride(stride); + } + + public static Subsampling1DLayerBuilder builder(int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .kernelSize(kernelSize) + .stride(stride) + .padding(padding); + } + + public static Subsampling1DLayerBuilder builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + ; + } + + public static Subsampling1DLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) { + return innerBuilder() + .poolingType(poolingType) + .kernelSize(kernelSize) + .stride(stride) + .padding(padding) + ; + } + + public static Subsampling1DLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) { + return innerBuilder() + .poolingType(poolingType) + .kernelSize(kernelSize) + ; + } + + public static Subsampling1DLayerBuilder builder(PoolingType poolingType, int[] kernelSize) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()) + .kernelSize(kernelSize) + ; + } + + + public static Subsampling1DLayerBuilder builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride) { + return innerBuilder() + .poolingType(poolingType) + .kernelSize(kernelSize) + .stride(stride) + ; + } + + public static Subsampling1DLayerBuilder builder(PoolingType poolingType, int[] kernelSize, int[] stride) { + return innerBuilder() + .poolingType(poolingType.toPoolingType()) + .kernelSize(kernelSize) + .stride(stride) + ; + } + + @Override public org.deeplearning4j.nn.api.Layer instantiate( NeuralNetConfiguration conf, @@ -176,20 +260,20 @@ public class Subsampling1DLayer extends SubsamplingLayer { * @return */ @Override - public B kernelSize(int... kernelSize) { - super.kernelSize( ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]); + public B kernelSize(int @NonNull ... kernelSize) { + super.kernelSize(ValidationUtils.validate1NonNegative(new int[]{kernelSize[0]}, "kernelSize")[0]); //fix width = 1 return self(); } @Override - public B stride(int... stride) { - super.stride( ValidationUtils.validate1NonNegative(stride, "stride")[0]); + public B stride(@NotNull int... stride) { + super.stride( ValidationUtils.validate1NonNegative(new int[]{stride[0]}, "stride")[0]); return self(); } @Override - public B padding(int... padding) { - super.padding( ValidationUtils.validate1NonNegative(padding, "padding")[0]); + public B padding(@NotNull int... padding) { + super.padding( ValidationUtils.validate1NonNegative(new int[]{padding[0]}, "padding")); return self(); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java index f102c912c..9dfb38036 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java @@ -27,10 +27,7 @@ import lombok.*; import lombok.experimental.SuperBuilder; import lombok.extern.jackson.Jacksonized; import org.deeplearning4j.nn.api.ParamInitializer; -import org.deeplearning4j.nn.conf.CNN2DFormat; -import org.deeplearning4j.nn.conf.ConvolutionMode; -import org.deeplearning4j.nn.conf.InputPreProcessor; -import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.*; import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport; @@ -84,7 +81,8 @@ public class SubsamplingLayer extends NoParamLayer { * @param padding padding in the height and width dimensions */ @Builder.Default protected int[] padding = new int[] {0, 0}; - protected int pnorm; + + protected int pnorm; @Builder.Default protected double eps = 1e-8; /** * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper @@ -104,6 +102,7 @@ public class SubsamplingLayer extends NoParamLayer { */ protected @Builder.Default CNN2DFormat dataFormat = CNN2DFormat.NCHW; // default value for legacy reasons + protected @Builder.Default RNNFormat rnnFormat = RNNFormat.NCW; /** * When doing average pooling, should the padding values be included in the divisor or not?
* Not applicable for max and p-norm pooling.
@@ -127,6 +126,7 @@ public class SubsamplingLayer extends NoParamLayer { * average pooling */ @Builder.Default protected boolean avgPoolIncludePadInDivisor = true; + /** * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated * convolutions, which are also known as atrous convolutions.
@@ -301,7 +301,7 @@ public class SubsamplingLayer extends NoParamLayer { public void setNIn(InputType inputType, boolean override) { // No op: subsampling layer doesn't have nIn value if (!defaultValueOverridden || override) { - this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); + this.rnnFormat = ((InputType.InputTypeRecurrent) inputType).getFormat(); defaultValueOverridden = true; } } @@ -355,14 +355,6 @@ public class SubsamplingLayer extends NoParamLayer { .build(); } - public int getPnorm() { - return pnorm; - } - - public double getEps() { - return eps; - } - public enum PoolingType { MAX, AVG, @@ -394,33 +386,33 @@ public class SubsamplingLayer extends NoParamLayer { return self(); } - public B eps(int eps) { + public B eps(double eps) { ValidationUtils.validateNonNegative(eps, "eps"); this.eps$value = eps; this.eps$set = true; return self(); } - public B kernelSize(int... kernelSize) { - this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"); + public B kernelSize(int @NonNull... kernelSize) { + this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize"); this.kernelSize$set = true; return self(); } - public B stride(int... stride) { - this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride"); + public B stride(int @NonNull ... stride) { + this.stride$value = ValidationUtils.validate2NonNegative(stride, true, "stride"); this.stride$set = true; return self(); } - public B padding(int... padding) { - this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding"); + public B padding(int @NonNull ... padding) { + this.padding$value = ValidationUtils.validate2NonNegative(padding, true, "padding"); this.padding$set = true; return self(); } - public B dilation(int... dilation) { - this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); + public B dilation(int @NonNull ... dilation) { + this.dilation$value = ValidationUtils.validate2NonNegative(dilation, true, "dilation"); this.dilation$set = true; return self(); } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java index 0f8e2aaeb..e8133dadb 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java @@ -74,6 +74,7 @@ public class FrozenLayer extends LayerConfiguration { boolean initializeParams, DataType networkDataType) { + innerConfiguration.setNetConfiguration(conf); // Need to be able to instantiate a layer, from a config - for JSON -> net type situations org.deeplearning4j.nn.api.Layer underlying = innerConfiguration.instantiate( diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java index 12239673e..6419df8c0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java @@ -20,6 +20,7 @@ package org.deeplearning4j.nn.conf.layers.samediff; +import com.fasterxml.jackson.annotation.JsonIgnore; import java.util.Collection; import java.util.List; import java.util.Map; @@ -52,7 +53,8 @@ import org.nd4j.linalg.learning.regularization.WeightDecay; @EqualsAndHashCode(callSuper = true, doNotUseGetters = true) @NoArgsConstructor @SuperBuilder -public abstract class AbstractSameDiffLayer extends LayerConfiguration { +public abstract class AbstractSameDiffLayer extends LayerConfiguration + implements org.deeplearning4j.nn.api.ITraininableLayerConfiguration { /** * The regularization for the parameters (excluding biases) - for example {@link WeightDecay} @@ -63,16 +65,14 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { * @param regularization Regularization to apply for the network parameters/weights (excluding * biases) */ - @Getter - protected List regularization; + @Getter protected List regularization; /** * The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the * regularization for the biases only - for example {@link WeightDecay} * * @param regularizationBias Regularization to apply for the network biases only */ - @Getter - protected List regularizationBias; + @Getter protected List regularizationBias; /** * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link * org.nd4j.linalg.learning.config.Nesterovs} @@ -87,21 +87,23 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { * @param biasUpdater Updater to use for bias parameters */ protected @Getter @Setter IUpdater biasUpdater; -@Getter @Setter - protected GradientNormalization gradientNormalization; -@Getter @Setter - protected double gradientNormalizationThreshold = Double.NaN; -@Getter @Setter - private SDLayerParams layerParams; + + @Getter @Setter protected GradientNormalization gradientNormalization; + @Getter @Setter protected double gradientNormalizationThreshold = Double.NaN; + @Getter @Setter private SDLayerParams layerParams; + + @Getter @Setter private DataType dataType; @Override public void runInheritance(@NotNull NeuralNetConfiguration conf) { super.runInheritance(conf); - if (this.biasUpdater == null ) this.biasUpdater = conf.getBiasUpdater(); + if (this.biasUpdater == null) this.biasUpdater = conf.getBiasUpdater(); if (this.updater == null) this.updater = conf.getUpdater(); - if (this.regularizationBias == null || regularizationBias.isEmpty()) this.regularizationBias = conf.getRegularizationBias(); - if (this.regularization == null || regularization.isEmpty()) this.regularization = conf.getRegularization(); - // if( this.weightInit == null) this.weightInit = conf.getWeightInit(); + if (this.regularizationBias == null || regularizationBias.isEmpty()) + this.regularizationBias = conf.getRegularizationBias(); + if (this.regularization == null || regularization.isEmpty()) + this.regularization = conf.getRegularization(); + // if( this.weightInit == null) this.weightInit = conf.getWeightInit(); if (this.gradientNormalization == null) this.gradientNormalization = conf.getGradientNormalization(); // if(this.weightInit == null) this.weightInit = conf.getWeightInit(); @@ -109,6 +111,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { this.gradientNormalizationThreshold = conf.getGradientNormalizationThreshold(); } } + @Override public List getRegularizationByParam(String paramName) { if (layerParams.isWeightParam(paramName)) { @@ -119,6 +122,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { return null; } + @JsonIgnore public SDLayerParams getLayerParams() { if (layerParams == null) { layerParams = new SDLayerParams(); @@ -138,7 +142,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { return null; } - /** * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String, * long...)} and {@link SDLayerParams#addBiasParam(String, long...)} @@ -207,7 +210,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration { fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array); } - /** * This method generates an "all ones" mask array for use in the SameDiff model when none is * provided. diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java index 85dbc685e..006b87250 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java @@ -52,8 +52,8 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; /** A layer with input and output, no parameters or gradients */ @NoArgsConstructor(force = true) @Slf4j -//@JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id") -//@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class") +// @JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id") +// @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class") public abstract class AbstractLayer implements Layer { private final @Getter List variables = new ArrayList<>(); @@ -80,10 +80,8 @@ public abstract class AbstractLayer impl protected DataType dataType; protected @Getter @Setter int iterationCount; protected @Getter @Setter int epochCount; - @JsonIgnore - private @Getter @Setter IModel net; - @JsonIgnore - @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration; + @JsonIgnore private @Getter @Setter IModel net; + @JsonIgnore @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration; public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) { //noinspection unchecked @@ -95,19 +93,18 @@ public abstract class AbstractLayer impl } this.dataType = dataType; if (layerConfiguration.getNetConfiguration() == null) { - throw new RuntimeException("You cannot create a layer from a layer configuration, that is not part of any neural network configuration."); - } + throw new RuntimeException( + "You cannot create a layer from a layer configuration, that is not part of any neural network configuration."); + } this.net = layerConfiguration.getNetConfiguration().getNet(); } public void addTrainingListeners(TrainingListener... listeners) { - if(listeners != null) - trainingListeners.addAll(List.of(listeners)); + if (listeners != null) trainingListeners.addAll(List.of(listeners)); } public void addTrainingListeners(Collection listeners) { - if(listeners != null) - trainingListeners.addAll(listeners); + if (listeners != null) trainingListeners.addAll(listeners); } @Override @@ -471,7 +468,7 @@ public abstract class AbstractLayer impl @Override public int getInputMiniBatchSize() { - if(input==null) return 0; + if (input == null) return 0; return (int) input.size(0); } @@ -565,8 +562,9 @@ public abstract class AbstractLayer impl */ @Override public void setParamTable(Map paramTable) { - log.warn("Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName()); - //throw new RuntimeException("Not implemented"); + log.warn( + "Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName()); + // throw new RuntimeException("Not implemented"); } /** @@ -578,7 +576,7 @@ public abstract class AbstractLayer impl */ @Override public Map getParamTable(boolean isBackprop) { -// throw new RuntimeException("Not implemented"); + // throw new RuntimeException("Not implemented"); return null; } @@ -590,7 +588,7 @@ public abstract class AbstractLayer impl */ @Override public INDArray getParams() { - //throw new RuntimeException("Not implemented"); + // throw new RuntimeException("Not implemented"); return null; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java index 8c092b4b1..926e8e887 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java @@ -662,6 +662,7 @@ public abstract class BaseLayer */ public boolean hasBias() { // Overridden by layers supporting no bias mode: dense, output, convolutional, embedding + //return true; return true; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java index 1b5d0ebbb..f0e21309e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java @@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration; import org.deeplearning4j.nn.conf.misc.DummyConfig; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -88,6 +89,8 @@ public class FrozenLayer extends BaseWrapperLayer { return underlying.activate(input, false, workspaceMgr); } + + @Override public void fit() { if (!logFit) { diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java index 2679bb665..03a395ec0 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java @@ -47,163 +47,188 @@ import org.nd4j.linalg.factory.Broadcast; import org.nd4j.linalg.factory.Nd4j; public class Convolution1DLayer extends ConvolutionLayer { - public Convolution1DLayer(LayerConfiguration conf, DataType dataType) { - super(conf, dataType); + public Convolution1DLayer(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + } + + @Override + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + if (epsilon.rank() != 3) + throw new DL4JInvalidInputException( + "Got rank " + + epsilon.rank() + + " array as epsilon for Convolution1D backprop with shape " + + Arrays.toString(epsilon.shape()) + + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + + layerId()); + Pair fwd = preOutput(false, true, workspaceMgr); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + INDArray delta = + afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params + + org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration(); + Conv1DConfig conf = + Conv1DConfig.builder() + .k(c.getKernelSize()[0]) + .s(c.getStride()[0]) + .d(c.getDilation()[0]) + .p(c.getPadding()[0]) + .dataFormat(Conv1DConfig.NCW) + .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) + .build(); + + INDArray w = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW); + + INDArray[] inputArrs; + INDArray[] outputArrs; + INDArray wg = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat()); + INDArray epsOut = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); + INDArray input = this.input.castTo(dataType); + if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { + input = input.permute(0, 2, 1); // NHWC to NCHW } - - @Override - public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(true); - if (epsilon.rank() != 3) - throw new DL4JInvalidInputException("Got rank " + epsilon.rank() - + " array as epsilon for Convolution1D backprop with shape " - + Arrays.toString(epsilon.shape()) - + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + layerId()); - Pair fwd = preOutput(false,true,workspaceMgr); - IActivation afn = getTypedLayerConfiguration().getActivationFn(); - INDArray delta = afn.backprop(fwd.getFirst(), epsilon).getFirst(); //TODO handle activation function params - - Convolution1D c = getTypedLayerConfiguration(); - Conv1DConfig conf = Conv1DConfig.builder() - .k(c.getKernelSize()[0]) - .s(c.getStride()[0]) - .d(c.getDilation()[0]) - .p(c.getPadding()[0]) - .dataFormat(Conv1DConfig.NCW) - .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) - .build(); - - INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( - getParam(ConvolutionParamInitializer.WEIGHT_KEY), - RNNFormat.NCW); - - INDArray[] inputArrs; - INDArray[] outputArrs; - INDArray wg = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( - gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), - getRnnDataFormat()); - INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); - INDArray input = this.input.castTo(dataType); - if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { - input = input.permute(0,2,1); //NHWC to NCHW - } - - if(getTypedLayerConfiguration().hasBias()) { - INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); - b = b.reshape(b.length()); - inputArrs = new INDArray[]{input, w, b, delta}; - INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); - bg = bg.reshape(bg.length()); - outputArrs = new INDArray[]{epsOut, wg, bg}; - } else { - inputArrs = new INDArray[]{input, w, delta}; - outputArrs = new INDArray[]{epsOut, wg}; - } - - Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf); - Nd4j.exec(op); - - Gradient retGradient = new DefaultGradient(); - if(getTypedLayerConfiguration().hasBias()) { - retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, gradientViews.get(ConvolutionParamInitializer.BIAS_KEY)); - } - retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), 'c'); - if (getRnnDataFormat() == RNNFormat.NWC) { - epsOut = epsOut.permute(0, 2, 1); - } - return new Pair<>(retGradient, epsOut); + if (getTypedLayerConfiguration().hasBias()) { + INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); + b = b.reshape(b.length()); + inputArrs = new INDArray[] {input, w, b, delta}; + INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); + bg = bg.reshape(bg.length()); + outputArrs = new INDArray[] {epsOut, wg, bg}; + } else { + inputArrs = new INDArray[] {input, w, delta}; + outputArrs = new INDArray[] {epsOut, wg}; } - @Override - protected Pair preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - Pair preOutput = super.preOutput(true, forBackprop, workspaceMgr); - INDArray p3d = preOutput.getFirst(); - INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1); - preOutput.setFirst(p); - return preOutput; + Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf); + Nd4j.exec(op); + + Gradient retGradient = new DefaultGradient(); + if (getTypedLayerConfiguration().hasBias()) { + retGradient.setGradientFor( + ConvolutionParamInitializer.BIAS_KEY, + gradientViews.get(ConvolutionParamInitializer.BIAS_KEY)); + } + retGradient.setGradientFor( + ConvolutionParamInitializer.WEIGHT_KEY, + gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), + 'c'); + if (getRnnDataFormat() == RNNFormat.NWC) { + epsOut = epsOut.permute(0, 2, 1); + } + return new Pair<>(retGradient, epsOut); + } + + @Override + protected Pair preOutput4d( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + Pair preOutput = super.preOutput(true, forBackprop, workspaceMgr); + INDArray p3d = preOutput.getFirst(); + INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1); + preOutput.setFirst(p); + return preOutput; + } + + @Override + protected Pair preOutput( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + + INDArray input = this.input.castTo(dataType); + if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { + input = input.permute(0, 2, 1); // NHWC to NCHW } - @Override - protected Pair preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(false); + org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration(); + Conv1DConfig conf = + Conv1DConfig.builder() + .k(c.getKernelSize()[0]) + .s(c.getStride()[0]) + .d(c.getDilation()[0]) + .p(c.getPadding()[0]) + .dataFormat(Conv1DConfig.NCW) + .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) + .build(); - INDArray input = this.input.castTo(dataType); - if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { - input = input.permute(0,2,1); //NHWC to NCHW - } + INDArray w = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW); - Convolution1D c = getTypedLayerConfiguration(); - Conv1DConfig conf = Conv1DConfig.builder() - .k(c.getKernelSize()[0]) - .s(c.getStride()[0]) - .d(c.getDilation()[0]) - .p(c.getPadding()[0]) - .dataFormat(Conv1DConfig.NCW) - .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) - .build(); - - - INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( - getParam(ConvolutionParamInitializer.WEIGHT_KEY) - ,RNNFormat.NCW); - - - INDArray[] inputs; - if(getTypedLayerConfiguration().hasBias()) { - INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); - b = b.reshape(b.length()); - inputs = new INDArray[]{input, w, b}; - } else { - inputs = new INDArray[]{input, w}; - } - - Conv1D op = new Conv1D(inputs, null, conf); - List outShape = op.calculateOutputShape(); - op.setOutputArgument(0, Nd4j.create(outShape.get(0), false)); - Nd4j.exec(op); - INDArray output = op.getOutputArgument(0); - - if(getRnnDataFormat() == RNNFormat.NWC) { - output = output.permute(0,2,1); - } - - return new Pair<>(output, null); + INDArray[] inputs; + if (getTypedLayerConfiguration().hasBias()) { + INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); + b = b.reshape(b.length()); + inputs = new INDArray[] {input, w, b}; + } else { + inputs = new INDArray[] {input, w}; } + Conv1D op = new Conv1D(inputs, null, conf); + List outShape = op.calculateOutputShape(); + op.setOutputArgument(0, Nd4j.create(outShape.get(0), false)); + Nd4j.exec(op); + INDArray output = op.getOutputArgument(0); - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { - INDArray act4d = super.activate(training, workspaceMgr); - INDArray act3d = act4d.rank() > 3 ? - act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d; - - if(maskArray != null) { - INDArray maskOut = feedForwardMaskArray(maskArray, MaskState.Active, (int)act3d.size(0)).getFirst(); - Preconditions.checkState(act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1), - "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s", - act3d.shape(), maskOut.shape()); - Broadcast.mul(act3d, maskOut, act3d, 0, 2); - } - - return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, act3d); //Should be zero copy most of the time + if (getRnnDataFormat() == RNNFormat.NWC) { + output = output.permute(0, 2, 1); } - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, - int minibatchSize) { - INDArray reduced = Convolution2DUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0], - getTypedLayerConfiguration().getStride()[0], getTypedLayerConfiguration().getPadding()[0], getTypedLayerConfiguration().getDilation()[0], - getTypedLayerConfiguration().getConvolutionMode()); - return new Pair<>(reduced, currentMaskState); + return new Pair<>(output, null); + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + INDArray act4d = super.activate(training, workspaceMgr); + INDArray act3d = + act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d; + + if (maskArray != null) { + INDArray maskOut = + feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst(); + Preconditions.checkState( + act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1), + "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s", + act3d.shape(), + maskOut.shape()); + Broadcast.mul(act3d, maskOut, act3d, 0, 2); } - @Override - public Convolution1D getTypedLayerConfiguration() { - return (Convolution1D)layerConfiguration; - } + return workspaceMgr.leverageTo( + ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time + } - private RNNFormat getRnnDataFormat(){ - return getTypedLayerConfiguration().getRnnDataFormat(); - } + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + INDArray reduced = + Convolution2DUtils.cnn1dMaskReduction( + maskArray, + getTypedLayerConfiguration().getKernelSize()[0], + getTypedLayerConfiguration().getStride()[0], + getTypedLayerConfiguration().getPadding()[0], + getTypedLayerConfiguration().getDilation()[0], + getTypedLayerConfiguration().getConvolutionMode()); + return new Pair<>(reduced, currentMaskState); + } + + private RNNFormat getRnnDataFormat() { + return getTypedLayerConfiguration().getRnnDataFormat(); + } + +/** +* + * @return +*/ + @Override + public Convolution1D getTypedLayerConfiguration() { + return (Convolution1D) super.getTypedLayerConfiguration(); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java new file mode 100644 index 000000000..91e4fbe3c --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java @@ -0,0 +1,226 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.nn.layers.convolution; + +import java.util.Arrays; +import java.util.List; +import org.deeplearning4j.exception.DL4JInvalidInputException; +import org.deeplearning4j.nn.api.MaskState; +import org.deeplearning4j.nn.conf.RNNFormat; +import org.deeplearning4j.nn.conf.layers.Convolution1DNew; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.gradient.DefaultGradient; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.params.ConvolutionParamInitializer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.Convolution1DUtils; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.base.Preconditions; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1D; +import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1DDerivative; +import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv1DConfig; +import org.nd4j.linalg.api.shape.LongShapeDescriptor; +import org.nd4j.linalg.factory.Broadcast; +import org.nd4j.linalg.factory.Nd4j; + +public class Convolution1DNewLayer + extends ConvolutionNewLayer { + public Convolution1DNewLayer(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + } + + @Override + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + if (epsilon.rank() != 3) + throw new DL4JInvalidInputException( + "Got rank " + + epsilon.rank() + + " array as epsilon for Convolution1D backprop with shape " + + Arrays.toString(epsilon.shape()) + + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + + layerId()); + Pair fwd = preOutput(false, true, workspaceMgr); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + INDArray delta = + afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params + + Convolution1DNew c = getTypedLayerConfiguration(); + Conv1DConfig conf = + Conv1DConfig.builder() + .k(c.getKernelSize()[0]) + .s(c.getStride()[0]) + .d(c.getDilation()[0]) + .p(c.getPadding()[0]) + .dataFormat(Conv1DConfig.NCW) + .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) + .build(); + + INDArray w = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW); + + INDArray[] inputArrs; + INDArray[] outputArrs; + INDArray wg = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat()); + INDArray epsOut = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); + INDArray input = this.input.castTo(dataType); + if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { + input = input.permute(0, 2, 1); // NHWC to NCHW + } + + if (getTypedLayerConfiguration().hasBias()) { + INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); + b = b.reshape(b.length()); + inputArrs = new INDArray[] {input, w, b, delta}; + INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); + bg = bg.reshape(bg.length()); + outputArrs = new INDArray[] {epsOut, wg, bg}; + } else { + inputArrs = new INDArray[] {input, w, delta}; + outputArrs = new INDArray[] {epsOut, wg}; + } + + Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf); + Nd4j.exec(op); + + Gradient retGradient = new DefaultGradient(); + if (getTypedLayerConfiguration().hasBias()) { + retGradient.setGradientFor( + ConvolutionParamInitializer.BIAS_KEY, + gradientViews.get(ConvolutionParamInitializer.BIAS_KEY)); + } + retGradient.setGradientFor( + ConvolutionParamInitializer.WEIGHT_KEY, + gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), + 'c'); + if (getRnnDataFormat() == RNNFormat.NWC) { + epsOut = epsOut.permute(0, 2, 1); + } + return new Pair<>(retGradient, epsOut); + } + + @Override + protected Pair preOutput4d( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + Pair preOutput = super.preOutput(true, forBackprop, workspaceMgr); + INDArray p3d = preOutput.getFirst(); + INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1); + preOutput.setFirst(p); + return preOutput; + } + + @Override + protected Pair preOutput( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + + INDArray input = this.input.castTo(dataType); + if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { + input = input.permute(0, 2, 1); // NHWC to NCHW + } + + Convolution1DNew c = getTypedLayerConfiguration(); + Conv1DConfig conf = + Conv1DConfig.builder() + .k(c.getKernelSize()[0]) + .s(c.getStride()[0]) + .d(c.getDilation()[0]) + .p(c.getPadding()[0]) + .dataFormat(Conv1DConfig.NCW) + .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) + .build(); + + INDArray w = + Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( + getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW); + + INDArray[] inputs; + if (getTypedLayerConfiguration().hasBias()) { + INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY); + b = b.reshape(b.length()); + inputs = new INDArray[] {input, w, b}; + } else { + inputs = new INDArray[] {input, w}; + } + + Conv1D op = new Conv1D(inputs, null, conf); + List outShape = op.calculateOutputShape(); + op.setOutputArgument(0, Nd4j.create(outShape.get(0), false)); + Nd4j.exec(op); + INDArray output = op.getOutputArgument(0); + + if (getRnnDataFormat() == RNNFormat.NWC) { + output = output.permute(0, 2, 1); + } + + return new Pair<>(output, null); + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + INDArray act4d = super.activate(training, workspaceMgr); + INDArray act3d = + act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d; + + if (maskArray != null) { + INDArray maskOut = + feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst(); + Preconditions.checkState( + act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1), + "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s", + act3d.shape(), + maskOut.shape()); + Broadcast.mul(act3d, maskOut, act3d, 0, 2); + } + + return workspaceMgr.leverageTo( + ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time + } + + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + INDArray reduced = + Convolution2DUtils.cnn1dMaskReduction( + maskArray, + getTypedLayerConfiguration().getKernelSize()[0], + getTypedLayerConfiguration().getStride()[0], + getTypedLayerConfiguration().getPadding()[0], + getTypedLayerConfiguration().getDilation()[0], + getTypedLayerConfiguration().getConvolutionMode()); + return new Pair<>(reduced, currentMaskState); + } + + private RNNFormat getRnnDataFormat() { + return getTypedLayerConfiguration().getRnnDataFormat(); + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java index 56cfd1be9..bcaa68930 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java @@ -20,7 +20,6 @@ package org.deeplearning4j.nn.layers.convolution; - import java.util.Arrays; import lombok.extern.slf4j.Slf4j; import org.deeplearning4j.common.config.DL4JClassLoading; @@ -29,6 +28,7 @@ import org.deeplearning4j.nn.api.MaskState; import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.layers.Convolution1DNew; import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.Gradient; @@ -51,551 +51,698 @@ import org.nd4j.linalg.exception.ND4JOpProfilerException; import org.nd4j.linalg.factory.Nd4j; @Slf4j -public class ConvolutionLayer extends BaseLayer { +public class ConvolutionLayer + extends BaseLayer { - protected INDArray i2d; - protected ConvolutionHelper helper = null; - protected int helperCountFail = 0; - protected ConvolutionMode convolutionMode; - protected transient INDArray dummyBias; //Used only when: hasBias == false AND helpers are used - protected transient INDArray dummyBiasGrad; //As above + protected INDArray i2d; + protected ConvolutionHelper helper = null; + protected int helperCountFail = 0; + protected ConvolutionMode convolutionMode; + protected transient INDArray dummyBias; // Used only when: hasBias == false AND helpers are used + protected transient INDArray dummyBiasGrad; // As above - public ConvolutionLayer(LayerConfiguration conf, DataType dataType) { - super(conf, dataType); - initializeHelper(); - convolutionMode = ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode(); + public ConvolutionLayer(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + initializeHelper(); + if (conf instanceof Convolution1DNew) { + convolutionMode = + ((Convolution1DNew) conf).getConvolutionMode(); + } else + if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) { + convolutionMode = + ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode(); } - void initializeHelper() { - String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); - if("CUDA".equalsIgnoreCase(backend)) { - helper = DL4JClassLoading.createNewInstance( - "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper", - ConvolutionHelper.class, - dataType); - log.debug("CudnnConvolutionHelper successfully initialized"); - if (!helper.checkSupported()) { - helper = null; - } - } else if("CPU".equalsIgnoreCase(backend)){ - helper = new MKLDNNConvHelper(dataType); - log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName()); - } + } - if (helper != null && !helper.checkSupported()) { - log.debug("Removed helper {} as not supported", helper.getClass()); - helper = null; - } + void initializeHelper() { + String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); + if ("CUDA".equalsIgnoreCase(backend)) { + helper = + DL4JClassLoading.createNewInstance( + "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper", + ConvolutionHelper.class, + dataType); + log.debug("CudnnConvolutionHelper successfully initialized"); + if (!helper.checkSupported()) { + helper = null; + } + } else if ("CPU".equalsIgnoreCase(backend)) { + helper = new MKLDNNConvHelper(dataType); + log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName()); } - @Override - public Type type() { - return Type.CONVOLUTIONAL; + if (helper != null && !helper.checkSupported()) { + log.debug("Removed helper {} as not supported", helper.getClass()); + helper = null; + } + } + + @Override + public Type type() { + return Type.CONVOLUTIONAL; + } + + @Override + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + INDArray weights = + getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr); + INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); + + INDArray input = this.input.castTo(dataType); // No op if correct type + if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType); + + INDArray origInput = input; + INDArray origEps = epsilon; + if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) { + input = input.permute(0, 3, 1, 2); // NHWC to NCHW + epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW } - @Override - public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(true); - INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr); - INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); + long miniBatch = input.size(0); + int inH = (int) input.size(2); + int inW = (int) input.size(3); - INDArray input = this.input.castTo(dataType); //No op if correct type - if(epsilon.dataType() != dataType) - epsilon = epsilon.castTo(dataType); + long outDepth = weights.size(0); + long inDepth = weights.size(1); + int kH = (int) weights.size(2); + int kW = (int) weights.size(3); - INDArray origInput = input; - INDArray origEps = epsilon; - if(getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) { - input = input.permute(0,3,1,2); //NHWC to NCHW - epsilon = epsilon.permute(0,3,1,2); //NHWC to NCHW + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); + int[] pad; + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + null, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Also performs validation + pad = + Convolution2DUtils.getSameModeTopLeftPadding( + outSize, new int[] {inH, inW}, kernel, strides, dilation); + } else { + pad = getTypedLayerConfiguration().getPadding(); + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + pad, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Also performs validation + } + + int outH = outSize[0]; + int outW = outSize[1]; + + INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); + INDArray weightGradView = + gradientViews.get( + ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW] + INDArray weightGradView2df = + Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false) + .transpose(); + + INDArray delta; + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + + Pair p = preOutput4d(true, true, workspaceMgr); + INDArray z = p.getFirst(); + CNN2DFormat f = getTypedLayerConfiguration().getConvFormat(); + if (f != CNN2DFormat.NCHW) { + z = z.permute(0, 3, 1, 2); // NHWC to NCHW + } + delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params + + if (helper != null + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + INDArray helperDelta = delta; + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) + helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC + + if (!hasBias() && !(helper instanceof MKLDNNConvHelper)) { + // MKL-DNN supports no bias, CuDNN doesn't + if (dummyBiasGrad == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); + } + } + biasGradView = dummyBiasGrad; + } + + Pair ret = null; + try { + ret = + helper.backpropGradient( + origInput, + weights, + bias, + helperDelta, + kernel, + strides, + pad, + biasGradView, + weightGradView, + afn, + getTypedLayerConfiguration().getCudnnAlgoMode(), + getTypedLayerConfiguration().getCudnnBwdFilterAlgo(), + getTypedLayerConfiguration().getCudnnBwdDataAlgo(), + convolutionMode, + dilation, + getTypedLayerConfiguration().getConvFormat(), + workspaceMgr); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; } - - long miniBatch = input.size(0); - int inH = (int) input.size(2); - int inW = (int) input.size(3); - - long outDepth = weights.size(0); - long inDepth = weights.size(1); - int kH = (int) weights.size(2); - int kW = (int) weights.size(3); - - int[] dilation = getTypedLayerConfiguration().getDilation(); - int[] kernel = getTypedLayerConfiguration().getKernelSize(); - int[] strides = getTypedLayerConfiguration().getStride(); - int[] pad; - int[] outSize; - if (convolutionMode == ConvolutionMode.Same) { - outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, CNN2DFormat.NCHW); //Also performs validation - pad = Convolution2DUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } } else { - pad = getTypedLayerConfiguration().getPadding(); - outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, CNN2DFormat.NCHW); //Also performs validation + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false", + e); } + } - int outH = outSize[0]; - int outW = outSize[1]; - - - INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); - INDArray weightGradView = gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY); //4d, c order. Shape: [outDepth,inDepth,kH,kW] - INDArray weightGradView2df = Shape - .newShapeNoCopy(weightGradView, new long[]{outDepth, inDepth * kH * kW}, false).transpose(); - - - - INDArray delta; - IActivation afn = getTypedLayerConfiguration().getActivationFn(); - - Pair p = preOutput4d(true, true, workspaceMgr); - INDArray z = p.getFirst(); - CNN2DFormat f = getTypedLayerConfiguration().getConvFormat(); - if(f != CNN2DFormat.NCHW){ - z = z.permute(0,3,1,2); //NHWC to NCHW - } - delta = afn.backprop(z, epsilon).getFirst(); //TODO handle activation function params - - if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { - INDArray helperDelta = delta; - if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) - helperDelta = delta.permute(0,2,3,1); //NCHW to NHWC - - if(!hasBias() && !(helper instanceof MKLDNNConvHelper)){ - //MKL-DNN supports no bias, CuDNN doesn't - if(dummyBiasGrad == null){ - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); - } - } - biasGradView = dummyBiasGrad; - } - - Pair ret = null; - try { - ret = helper.backpropGradient(origInput, weights, bias, helperDelta, kernel, strides, - pad, biasGradView, weightGradView, afn, - getTypedLayerConfiguration().getCudnnAlgoMode(), getTypedLayerConfiguration().getCudnnBwdFilterAlgo(), getTypedLayerConfiguration().getCudnnBwdDataAlgo(), - convolutionMode, dilation, getTypedLayerConfiguration().getConvFormat(), workspaceMgr); - } catch (ND4JOpProfilerException e){ - throw e; //NaN panic etc for debugging - } catch (Exception e){ - if(e.getMessage().contains("Failed to allocate")){ - //This is a memory exception - don't fallback to built-in implementation - throw e; - } - - if(getTypedLayerConfiguration().isCudnnAllowFallback()){ - helperCountFail++; - if(helper instanceof MKLDNNConvHelper){ - log.warn("MKL-DNN execution failed - falling back on built-in implementation",e); - } else { - log.warn("CuDNN execution failed - falling back on built-in implementation",e); - } - } else { - throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false", e); - } - } - - if (ret != null) { - //Backprop dropout, if present - INDArray gradPostDropout = ret.getRight(); - gradPostDropout = backpropDropOutIfPresent(gradPostDropout); - ret.setSecond(gradPostDropout); - return ret; - } - } - - delta = delta.permute(1, 0, 2, 3); //To shape: [outDepth,miniBatch,outH,outW] - - //Note: due to the permute in preOut, and the fact that we essentially do a preOut.muli(epsilon), this reshape - // should be zero-copy; only possible exception being sometimes with the "identity" activation case - INDArray delta2d = delta.reshape('c', outDepth, miniBatch * outH * outW); //Shape.newShapeNoCopy(delta,new int[]{outDepth,miniBatch*outH*outW},false); - - //Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation - //To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that - //to get old order from required order: permute(0,3,4,5,1,2) - INDArray im2col2d = p.getSecond(); //Re-use im2col2d array from forward pass if available; recalculate if not - if (im2col2d == null) { - INDArray col = Nd4j.createUninitialized(dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); - INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); - Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], dilation[0], dilation[1], - convolutionMode == ConvolutionMode.Same, col2); - //Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape - im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW); - } - - //Calculate weight gradients, using cc->c mmul. - //weightGradView2df is f order, but this is because it's transposed from c order - //Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c order, not usual f order - Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0); - - //Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally in c order for some reason) - INDArray wPermuted = weights.permute(3, 2, 1, 0); //Start with c order weights, switch order to f order - INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth); - - //Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format before col2im reduction) - //Note: cc -> f mmul here, then reshape to 6d in f order - INDArray epsNext2d = w2d.mmul(delta2d); //TODO can we reuse im2col array instead of allocating new result array? - INDArray eps6d = Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true); - - //Calculate epsilonNext by doing im2col reduction. - //Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW] - //currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first - eps6d = eps6d.permute(5, 2, 1, 0, 4, 3); - INDArray epsNextOrig = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, eps6d.dataType(), new long[] {inDepth, miniBatch, inH, inW}, 'c'); - - //Note: we are execute col2im in a way that the output array should be used in a stride 1 muli in the layer below... (same strides as zs/activations) - INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3); - Convolution.col2im(eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]); - - Gradient retGradient = new DefaultGradient(); - if(getTypedLayerConfiguration().hasBias()){ - delta2d.sum(biasGradView, 1); //biasGradView is initialized/zeroed first in sum op - retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView); - } - retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c'); - - weightNoiseParams.clear(); - - epsNext = backpropDropOutIfPresent(epsNext); - - if(getTypedLayerConfiguration().getConvFormat()!= CNN2DFormat.NCHW){ - epsNext = epsNext.permute(0,2,3,1); //NCHW to NHWC - } - - return new Pair<>(retGradient, epsNext); + if (ret != null) { + // Backprop dropout, if present + INDArray gradPostDropout = ret.getRight(); + gradPostDropout = backpropDropOutIfPresent(gradPostDropout); + ret.setSecond(gradPostDropout); + return ret; + } } - /** - * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain their standard - * non-4d preOutput method, while overriding this to return 4d activations (for use in backprop) without modifying - * the public API - */ - protected Pair preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - return preOutput(training, forBackprop, workspaceMgr); + delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW] + + // Note: due to the permute in preOut, and the fact that we essentially do a + // preOut.muli(epsilon), this reshape + // should be zero-copy; only possible exception being sometimes with the "identity" activation + // case + INDArray delta2d = + delta.reshape( + 'c', + outDepth, + miniBatch * outH + * outW); // Shape.newShapeNoCopy(delta,new + // int[]{outDepth,miniBatch*outH*outW},false); + + // Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input + // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation + // To get this: create an array of the order we want, permute it to the order required by im2col + // implementation, and then do im2col on that + // to get old order from required order: permute(0,3,4,5,1,2) + INDArray im2col2d = + p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not + if (im2col2d == null) { + INDArray col = + Nd4j.createUninitialized( + dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); + Convolution.im2col( + input, + kH, + kW, + strides[0], + strides[1], + pad[0], + pad[1], + dilation[0], + dilation[1], + convolutionMode == ConvolutionMode.Same, + col2); + // Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape + im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW); } - protected void validateInputRank() { - //Input validation: expect rank 4 matrix - if (input.rank() != 4) { - String layerName = layerConfiguration.getName(); - if (layerName == null) - layerName = "(not named)"; - throw new DL4JInvalidInputException("Got rank " + input.rank() - + " array as input to ConvolutionLayer (layer name = " + layerName + ", layer index = " - + index + ") with shape " + Arrays.toString(input.shape()) + ". " - + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]." - + (input.rank() == 2 - ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" - : "") - + " " + layerId()); - } + // Calculate weight gradients, using cc->c mmul. + // weightGradView2df is f order, but this is because it's transposed from c order + // Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c + // order, not usual f order + Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0); + + // Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally + // in c order for some reason) + INDArray wPermuted = + weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order + INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth); + + // Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format + // before col2im reduction) + // Note: cc -> f mmul here, then reshape to 6d in f order + INDArray epsNext2d = + w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array? + INDArray eps6d = + Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true); + + // Calculate epsilonNext by doing im2col reduction. + // Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW] + // currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first + eps6d = eps6d.permute(5, 2, 1, 0, 4, 3); + INDArray epsNextOrig = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, + eps6d.dataType(), + new long[] {inDepth, miniBatch, inH, inW}, + 'c'); + + // Note: we are execute col2im in a way that the output array should be used in a stride 1 muli + // in the layer below... (same strides as zs/activations) + INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3); + Convolution.col2im( + eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]); + + Gradient retGradient = new DefaultGradient(); + if (getTypedLayerConfiguration().hasBias()) { + delta2d.sum(biasGradView, 1); // biasGradView is initialized/zeroed first in sum op + retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView); + } + retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c'); + + weightNoiseParams.clear(); + + epsNext = backpropDropOutIfPresent(epsNext); + + if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) { + epsNext = epsNext.permute(0, 2, 3, 1); // NCHW to NHWC } - protected void validateInputDepth(long inDepth) { - CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); - int dim = format == CNN2DFormat.NHWC ? 3 : 1; - if (input.size(dim) != inDepth) { - String layerName = layerConfiguration.getName(); - if (layerName == null) - layerName = "(not named)"; + return new Pair<>(retGradient, epsNext); + } - String s = "Cannot do forward pass in Convolution layer (layer name = " + layerName - + ", layer index = " + index + "): input array channels does not match CNN layer configuration" - + " (data format = " + format + ", data input channels = " + input.size(dim) + ", " + getTypedLayerConfiguration().getConvFormat().dimensionNames() - + "=" + Arrays.toString(input.shape()) + "; expected" + " input channels = " + inDepth + ") " - + layerId(); + /** + * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain + * their standard non-4d preOutput method, while overriding this to return 4d activations (for use + * in backprop) without modifying the public API + */ + protected Pair preOutput4d( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + return preOutput(training, forBackprop, workspaceMgr); + } - int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3; - if(input.size(dimIfWrongFormat) == inDepth){ - //User might have passed NCHW data to a NHWC net, or vice versa? - s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG; - } + protected void validateInputRank() { + // Input validation: expect rank 4 matrix + if (input.rank() != 4) { + String layerName = layerConfiguration.getName(); + if (layerName == null) layerName = "(not named)"; + throw new DL4JInvalidInputException( + "Got rank " + + input.rank() + + " array as input to ConvolutionLayer (layer name = " + + layerName + + ", layer index = " + + index + + ") with shape " + + Arrays.toString(input.shape()) + + ". " + + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]." + + (input.rank() == 2 + ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" + : "") + + " " + + layerId()); + } + } + protected void validateInputDepth(long inDepth) { + CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); + int dim = format == CNN2DFormat.NHWC ? 3 : 1; + if (input.size(dim) != inDepth) { + String layerName = layerConfiguration.getName(); + if (layerName == null) layerName = "(not named)"; - throw new DL4JInvalidInputException(s); - } + String s = + "Cannot do forward pass in Convolution layer (layer name = " + + layerName + + ", layer index = " + + index + + "): input array channels does not match CNN layer configuration" + + " (data format = " + + format + + ", data input channels = " + + input.size(dim) + + ", " + + getTypedLayerConfiguration().getConvFormat().dimensionNames() + + "=" + + Arrays.toString(input.shape()) + + "; expected" + + " input channels = " + + inDepth + + ") " + + layerId(); + + int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3; + if (input.size(dimIfWrongFormat) == inDepth) { + // User might have passed NCHW data to a NHWC net, or vice versa? + s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG; + } + + throw new DL4JInvalidInputException(s); + } + } + + /** + * PreOutput method that also returns the im2col2d array (if being called for backprop), as this + * can be re-used instead of being calculated again. + * + * @param training Train or test time (impacts dropout) + * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return + * null for second pair entry. Note that it may still be null in the case of CuDNN and the + * like. + * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array + */ + protected Pair preOutput( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr); + INDArray weights = + getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr); + + validateInputRank(); + + INDArray input = this.input.castTo(dataType); + INDArray inputOrig = input; + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { + input = input.permute(0, 3, 1, 2).dup(); // NHWC to NCHW } - /** - * PreOutput method that also returns the im2col2d array (if being called for backprop), as this can be re-used - * instead of being calculated again. - * - * @param training Train or test time (impacts dropout) - * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return null for second - * pair entry. Note that it may still be null in the case of CuDNN and the like. - * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array - */ - protected Pair preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(false); - INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr); - INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr); + long miniBatch = input.size(0); + long outDepth = weights.size(0); + long inDepth = weights.size(1); + validateInputDepth(inDepth); - validateInputRank(); + long kH = weights.size(2); + long kW = weights.size(3); - INDArray input = this.input.castTo(dataType); - INDArray inputOrig = input; - if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { - input = input.permute(0,3,1,2).dup(); //NHWC to NCHW + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); + + int[] pad; + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + null, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method + + if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE) + throw new ND4JArraySizeException(); + int[] inWidthHeight; + // if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW) + // TODO: Switch hardcoded state later. For now, convolution is implemented as + // switch to NCHW then permute back for NWHC + inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)}; + + /* else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) { + inWidthHeight = new int[] {(int) input.size(1), (int) input.size(2)}; + } + else + throw new IllegalStateException("No data format configured!");*/ + pad = + Convolution2DUtils.getSameModeTopLeftPadding( + outSize, inWidthHeight, kernel, strides, dilation); + } else { + pad = getTypedLayerConfiguration().getPadding(); + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + pad, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method + } + + int outH = outSize[0]; + int outW = outSize[1]; + + if (helper != null + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + if (preOutput != null && forBackprop) { + return new Pair<>(preOutput, null); + } + + // For no-bias convolutional layers: use an empty (all 0s) value for biases + if (!hasBias()) { + if (dummyBias == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); + } + } + bias = dummyBias; + } + + INDArray ret = null; + try { + ret = + helper.preOutput( + inputOrig, + weights, + bias, + kernel, + strides, + pad, + getTypedLayerConfiguration().getCudnnAlgoMode(), + getTypedLayerConfiguration().getCudnnFwdAlgo(), + convolutionMode, + dilation, + getTypedLayerConfiguration().getConvFormat(), + workspaceMgr); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; } - long miniBatch = input.size(0); - long outDepth = weights.size(0); - long inDepth = weights.size(1); - validateInputDepth(inDepth); - - long kH = weights.size(2); - long kW = weights.size(3); - - - int[] dilation = getTypedLayerConfiguration().getDilation(); - int[] kernel = getTypedLayerConfiguration().getKernelSize(); - int[] strides = getTypedLayerConfiguration().getStride(); - - - - int[] pad; - int[] outSize; - if (convolutionMode == ConvolutionMode.Same) { - outSize = Convolution2DUtils.getOutputSize( - input, - kernel, - strides, - null, - convolutionMode, - dilation, - CNN2DFormat.NCHW); //Note: hardcoded to NCHW due to permute earlier in this method - - if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - int[] inWidthHeight; - // if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW) - //TODO: Switch hardcoded state later. For now, convolution is implemented as - //switch to NCHW then permute back for NWHC - inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)}; - - /* else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) { - inWidthHeight = new int[] {(int) input.size(1), (int) input.size(2)}; - } - else - throw new IllegalStateException("No data format configured!");*/ - pad = Convolution2DUtils.getSameModeTopLeftPadding( - outSize, - inWidthHeight, - kernel, - strides, - dilation); + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } } else { - pad = getTypedLayerConfiguration().getPadding(); - outSize = Convolution2DUtils.getOutputSize( - input, - kernel, - strides, - pad, - convolutionMode, - dilation, - CNN2DFormat.NCHW); //Note: hardcoded to NCHW due to permute earlier in this method + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", + e); } - - int outH = outSize[0]; - int outW = outSize[1]; - - - if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { - if (preOutput != null && forBackprop) { - return new Pair<>(preOutput, null); - } - - //For no-bias convolutional layers: use an empty (all 0s) value for biases - if(!hasBias()){ - if(dummyBias == null){ - try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { - dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); - } - } - bias = dummyBias; - } - - INDArray ret = null; - try { - ret = helper.preOutput(inputOrig, weights, bias, kernel, strides, pad, getTypedLayerConfiguration().getCudnnAlgoMode(), - getTypedLayerConfiguration().getCudnnFwdAlgo(), convolutionMode, dilation, getTypedLayerConfiguration().getConvFormat(), workspaceMgr); - } catch (ND4JOpProfilerException e){ - throw e; //NaN panic etc for debugging - } catch (Exception e){ - if(e.getMessage() != null && e.getMessage().contains("Failed to allocate")){ - //This is a memory exception - don't fallback to built-in implementation - throw e; - } - - if(getTypedLayerConfiguration().isCudnnAllowFallback()) { - helperCountFail++; - if(helper instanceof MKLDNNConvHelper) { - log.warn("MKL-DNN execution failed - falling back on built-in implementation",e); - } else { - log.warn("CuDNN execution failed - falling back on built-in implementation",e); - } - } else { - throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", e); - } - } - if (ret != null) { - return new Pair<>(ret, null); - } - } - - if (preOutput != null && i2d != null && forBackprop) { - return new Pair<>(preOutput, i2d); - } - - //im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation - //To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that - //to get old order from required order: permute(0,3,4,5,1,2) - //Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through the rows post-reshape - INDArray col = Nd4j.createUninitialized(weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); - int[] permute = new int[]{0, 3, 4, 5, 1, 2}; - INDArray col2 = col.permute(permute); - INDArray im2ColIn = input.castTo(col2.dataType()); //No op if already (for example) float - if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - Convolution.im2col( - im2ColIn, - (int)kH, - (int)kW, - strides[0], strides[1], - pad[0], pad[1], - dilation[0], dilation[1], - convolutionMode == ConvolutionMode.Same, - col2); - - - INDArray im2col2d = Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false); - - //Current order of weights: [depthOut,depthIn,kH,kW], c order - //Permute to give [kW,kH,depthIn,depthOut], f order - //Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless weights aren't in c order for some reason - INDArray permutedW = weights.permute(3, 2, 1, 0); - INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth); - - //Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut] - INDArray z = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, weights.dataType(), new long[]{im2col2d.size(0), reshapedW.size(1)}, 'f'); - im2col2d.mmuli(reshapedW, z); - - //Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is [miniBatch*outH*outW,depthOut] -> addiRowVector - if(getTypedLayerConfiguration().hasBias()){ - z.addiRowVector(bias); - } - - //Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order: [miniBatch,outDepth,outH,outW]; - z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true); - z = z.permute(2, 3, 1, 0); - - if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { - try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { - i2d = im2col2d.unsafeDuplication(); - } - } - - if(getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { - z = z.permute(0,2,3,1); //NCHW to NHWC - z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z); - } - - return new Pair<>(z, forBackprop ? im2col2d : null); + } + if (ret != null) { + return new Pair<>(ret, null); + } } - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { - if (input == null) { - throw new IllegalArgumentException("Cannot perform forward pass with null input " + layerId()); + if (preOutput != null && i2d != null && forBackprop) { + return new Pair<>(preOutput, i2d); + } + + // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input + // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation + // To get this: create an array of the order we want, permute it to the order required by im2col + // implementation, and then do im2col on that + // to get old order from required order: permute(0,3,4,5,1,2) + // Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through + // the rows post-reshape + INDArray col = + Nd4j.createUninitialized( + weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + int[] permute = new int[] {0, 3, 4, 5, 1, 2}; + INDArray col2 = col.permute(permute); + INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float + if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException(); + Convolution.im2col( + im2ColIn, + (int) kH, + (int) kW, + strides[0], + strides[1], + pad[0], + pad[1], + dilation[0], + dilation[1], + convolutionMode == ConvolutionMode.Same, + col2); + + INDArray im2col2d = + Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false); + + // Current order of weights: [depthOut,depthIn,kH,kW], c order + // Permute to give [kW,kH,depthIn,depthOut], f order + // Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless + // weights aren't in c order for some reason + INDArray permutedW = weights.permute(3, 2, 1, 0); + INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth); + + // Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut] + INDArray z = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATIONS, + weights.dataType(), + new long[] {im2col2d.size(0), reshapedW.size(1)}, + 'f'); + im2col2d.mmuli(reshapedW, z); + + // Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is + // [miniBatch*outH*outW,depthOut] -> addiRowVector + if (getTypedLayerConfiguration().hasBias()) { + z.addiRowVector(bias); + } + + // Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order: + // [miniBatch,outDepth,outH,outW]; + z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true); + z = z.permute(2, 3, 1, 0); + + if (training + && cacheMode != CacheMode.NONE + && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) + && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { + try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { + i2d = im2col2d.unsafeDuplication(); + } + } + + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { + z = z.permute(0, 2, 3, 1); // NCHW to NHWC + z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z); + } + + return new Pair<>(z, forBackprop ? im2col2d : null); + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + if (input == null) { + throw new IllegalArgumentException( + "Cannot perform forward pass with null input " + layerId()); + } + + if (cacheMode == null) cacheMode = CacheMode.NONE; + + applyDropOutIfNecessary(training, workspaceMgr); + + INDArray z = preOutput(training, false, workspaceMgr).getFirst(); + + // we do cache only if cache workspace exists. Skip otherwise + if (training + && cacheMode != CacheMode.NONE + && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) + && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { + try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { + preOutput = z.unsafeDuplication(); + } + } + + // String afn = conf.getLayer().getActivationFunction(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + + if (helper != null + && Shape.strideDescendingCAscendingF(z) + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + INDArray ret = null; + try { + ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; } - if (cacheMode == null) - cacheMode = CacheMode.NONE; - - applyDropOutIfNecessary(training, workspaceMgr); - - INDArray z = preOutput(training, false, workspaceMgr).getFirst(); - - // we do cache only if cache workspace exists. Skip otherwise - if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { - try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { - preOutput = z.unsafeDuplication(); - } + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } + } else { + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", + e); } + } - //String afn = conf.getLayer().getActivationFunction(); - IActivation afn = getTypedLayerConfiguration().getActivationFn(); - - if (helper != null && Shape.strideDescendingCAscendingF(z) && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { - INDArray ret = null; - try { - ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training); - } catch (ND4JOpProfilerException e){ - throw e; //NaN panic etc for debugging - } catch (Exception e) { - if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) { - //This is a memory exception - don't fallback to built-in implementation - throw e; - } - - if (getTypedLayerConfiguration().isCudnnAllowFallback()) { - helperCountFail++; - if (helper instanceof MKLDNNConvHelper) { - log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); - } else { - log.warn("CuDNN execution failed - falling back on built-in implementation", e); - } - } else { - throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", e); - } - } - - if (ret != null) { - return ret; - } - } - - INDArray activation = afn.getActivation(z, training); - return activation; + if (ret != null) { + return ret; + } } - @Override - public boolean hasBias() { - return getTypedLayerConfiguration().hasBias(); - } - - @Override - public boolean isPretrainLayer() { - return false; - } - - @Override - public LayerHelper getHelper() { - return helper; - } - - @Override - public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public void setParamsTable(INDArray paramsTable) { - //Override, as base layer does f order parameter flattening by default - setParams(paramsTable, 'c'); - } - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { - if (maskArray == null) { - //For same mode (with stride 1): output activations size is always same size as input activations size -> mask array is same size - return new Pair<>(maskArray, currentMaskState); - } - - INDArray outMask = Convolution2DUtils.cnn2dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize(), getTypedLayerConfiguration().getStride(), - getTypedLayerConfiguration().getPadding(), getTypedLayerConfiguration().getDilation(), getTypedLayerConfiguration().getConvolutionMode()); - return new Pair<>(outMask, currentMaskState); + INDArray activation = afn.getActivation(z, training); + return activation; + } + + @Override + public boolean hasBias() { + return getTypedLayerConfiguration().hasBias(); + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public LayerHelper getHelper() { + return helper; + } + + @Override + public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public void setParamsTable(INDArray paramsTable) { + // Override, as base layer does f order parameter flattening by default + setParams(paramsTable, 'c'); + } + + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + if (maskArray == null) { + // For same mode (with stride 1): output activations size is always same size as input + // activations size -> mask array is same size + return new Pair<>(maskArray, currentMaskState); } + INDArray outMask = + Convolution2DUtils.cnn2dMaskReduction( + maskArray, + getTypedLayerConfiguration().getKernelSize(), + getTypedLayerConfiguration().getStride(), + getTypedLayerConfiguration().getPadding(), + getTypedLayerConfiguration().getDilation(), + getTypedLayerConfiguration().getConvolutionMode()); + return new Pair<>(outMask, currentMaskState); + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java new file mode 100644 index 000000000..18606f39d --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java @@ -0,0 +1,753 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.nn.layers.convolution; + +import java.util.Arrays; + +import lombok.extern.slf4j.Slf4j; +import org.deeplearning4j.common.config.DL4JClassLoading; +import org.deeplearning4j.exception.DL4JInvalidInputException; +import org.deeplearning4j.nn.api.MaskState; +import org.deeplearning4j.nn.conf.CNN2DFormat; +import org.deeplearning4j.nn.conf.CacheMode; +import org.deeplearning4j.nn.conf.ConvolutionMode; +import org.deeplearning4j.nn.conf.layers.Convolution1DNew; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.gradient.DefaultGradient; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.BaseLayer; +import org.deeplearning4j.nn.layers.LayerHelper; +import org.deeplearning4j.nn.layers.mkldnn.MKLDNNConvHelper; +import org.deeplearning4j.nn.params.ConvolutionParamInitializer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.deeplearning4j.util.Convolution2DUtils; +import org.nd4j.common.primitives.Pair; +import org.nd4j.linalg.activations.IActivation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.memory.MemoryWorkspace; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.shape.Shape; +import org.nd4j.linalg.convolution.Convolution; +import org.nd4j.linalg.exception.ND4JArraySizeException; +import org.nd4j.linalg.exception.ND4JOpProfilerException; +import org.nd4j.linalg.factory.Nd4j; + +@Slf4j +public class ConvolutionNewLayer< + LayerConf_T extends org.deeplearning4j.nn.conf.layers.Convolution1DNew> + extends BaseLayer { + + protected INDArray i2d; + protected ConvolutionHelper helper = null; + protected int helperCountFail = 0; + protected ConvolutionMode convolutionMode; + protected transient INDArray dummyBias; // Used only when: hasBias == false AND helpers are used + protected transient INDArray dummyBiasGrad; // As above + + + public ConvolutionNewLayer(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + initializeHelper(); + if (conf instanceof Convolution1DNew) { + convolutionMode = ((Convolution1DNew) conf).getConvolutionMode(); + } else if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) { + convolutionMode = + ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode(); + } + } + + void initializeHelper() { + String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); + if ("CUDA".equalsIgnoreCase(backend)) { + helper = + DL4JClassLoading.createNewInstance( + "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper", + ConvolutionHelper.class, + dataType); + log.debug("CudnnConvolutionHelper successfully initialized"); + if (!helper.checkSupported()) { + helper = null; + } + } else if ("CPU".equalsIgnoreCase(backend)) { + helper = new MKLDNNConvHelper(dataType); + log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName()); + } + + if (helper != null && !helper.checkSupported()) { + log.debug("Removed helper {} as not supported", helper.getClass()); + helper = null; + } + } + + @Override + public Type type() { + return Type.CONVOLUTIONAL; + } + +/** +* + * @return +*/ + @Override + public Convolution1DNew getTypedLayerConfiguration() { + return super.getTypedLayerConfiguration(); + } + + @Override + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + INDArray weights = + getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr); + INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); + + INDArray input = this.input.castTo(dataType); // No op if correct type + if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType); + + INDArray origInput = input; + INDArray origEps = epsilon; + if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) { + input = input.permute(0, 3, 1, 2); // NHWC to NCHW + epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW + } + + long miniBatch = input.size(0); + int inH = (int) input.size(2); + int inW = (int) input.size(3); + + long outDepth = weights.size(0); + long inDepth = weights.size(1); + int kH = (int) weights.size(2); + int kW = (int) weights.size(3); + + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); + int[] pad; + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + null, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Also performs validation + pad = + Convolution2DUtils.getSameModeTopLeftPadding( + outSize, new int[] {inH, inW}, kernel, strides, dilation); + } else { + pad = getTypedLayerConfiguration().getPadding(); + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + pad, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Also performs validation + } + + int outH = outSize[0]; + int outW = outSize[1]; + + INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); + INDArray weightGradView = + gradientViews.get( + ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW] + INDArray weightGradView2df = + Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false) + .transpose(); + + INDArray delta; + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + + Pair p = preOutput4d(true, true, workspaceMgr); + INDArray z = p.getFirst(); + CNN2DFormat f = getTypedLayerConfiguration().getConvFormat(); + if (f != CNN2DFormat.NCHW) { + z = z.permute(0, 3, 1, 2); // NHWC to NCHW + } + delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params + + if (helper != null + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + INDArray helperDelta = delta; + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) + helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC + + if (!hasBias() && !(helper instanceof MKLDNNConvHelper)) { + // MKL-DNN supports no bias, CuDNN doesn't + if (dummyBiasGrad == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); + } + } + biasGradView = dummyBiasGrad; + } + + Pair ret = null; + try { + ret = + helper.backpropGradient( + origInput, + weights, + bias, + helperDelta, + kernel, + strides, + pad, + biasGradView, + weightGradView, + afn, + getTypedLayerConfiguration().getCudnnAlgoMode(), + getTypedLayerConfiguration().getCudnnBwdFilterAlgo(), + getTypedLayerConfiguration().getCudnnBwdDataAlgo(), + convolutionMode, + dilation, + getTypedLayerConfiguration().getConvFormat(), + workspaceMgr); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; + } + + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } + } else { + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false", + e); + } + } + + if (ret != null) { + // Backprop dropout, if present + INDArray gradPostDropout = ret.getRight(); + gradPostDropout = backpropDropOutIfPresent(gradPostDropout); + ret.setSecond(gradPostDropout); + return ret; + } + } + + delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW] + + // Note: due to the permute in preOut, and the fact that we essentially do a + // preOut.muli(epsilon), this reshape + // should be zero-copy; only possible exception being sometimes with the "identity" activation + // case + INDArray delta2d = + delta.reshape('c', outDepth, miniBatch * outH * outW); // Shape.newShapeNoCopy(delta,new + // int[]{outDepth,miniBatch*outH*outW},false); + + // Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input + // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation + // To get this: create an array of the order we want, permute it to the order required by im2col + // implementation, and then do im2col on that + // to get old order from required order: permute(0,3,4,5,1,2) + INDArray im2col2d = + p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not + if (im2col2d == null) { + INDArray col = + Nd4j.createUninitialized( + dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); + Convolution.im2col( + input, + kH, + kW, + strides[0], + strides[1], + pad[0], + pad[1], + dilation[0], + dilation[1], + convolutionMode == ConvolutionMode.Same, + col2); + // Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape + im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW); + } + + // Calculate weight gradients, using cc->c mmul. + // weightGradView2df is f order, but this is because it's transposed from c order + // Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c + // order, not usual f order + Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0); + + // Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally + // in c order for some reason) + INDArray wPermuted = + weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order + INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth); + + // Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format + // before col2im reduction) + // Note: cc -> f mmul here, then reshape to 6d in f order + INDArray epsNext2d = + w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array? + INDArray eps6d = + Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true); + + // Calculate epsilonNext by doing im2col reduction. + // Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW] + // currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first + eps6d = eps6d.permute(5, 2, 1, 0, 4, 3); + INDArray epsNextOrig = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATION_GRAD, + eps6d.dataType(), + new long[] {inDepth, miniBatch, inH, inW}, + 'c'); + + // Note: we are execute col2im in a way that the output array should be used in a stride 1 muli + // in the layer below... (same strides as zs/activations) + INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3); + Convolution.col2im( + eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]); + + Gradient retGradient = new DefaultGradient(); + if (getTypedLayerConfiguration().hasBias()) { + delta2d.sum(biasGradView, 1); // biasGradView is initialized/zeroed first in sum op + retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView); + } + retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c'); + + weightNoiseParams.clear(); + + epsNext = backpropDropOutIfPresent(epsNext); + + if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) { + epsNext = epsNext.permute(0, 2, 3, 1); // NCHW to NHWC + } + + return new Pair<>(retGradient, epsNext); + } + + /** + * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain + * their standard non-4d preOutput method, while overriding this to return 4d activations (for use + * in backprop) without modifying the public API + */ + protected Pair preOutput4d( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + return preOutput(training, forBackprop, workspaceMgr); + } + + protected void validateInputRank() { + // Input validation: expect rank 4 matrix + if (input.rank() != 4) { + String layerName = layerConfiguration.getName(); + if (layerName == null) layerName = "(not named)"; + throw new DL4JInvalidInputException( + "Got rank " + + input.rank() + + " array as input to ConvolutionLayer (layer name = " + + layerName + + ", layer index = " + + index + + ") with shape " + + Arrays.toString(input.shape()) + + ". " + + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]." + + (input.rank() == 2 + ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" + : "") + + " " + + layerId()); + } + } + + protected void validateInputDepth(long inDepth) { + CNN2DFormat format = getTypedLayerConfiguration().getConvFormat(); + int dim = format == CNN2DFormat.NHWC ? 3 : 1; + if (input.size(dim) != inDepth) { + String layerName = layerConfiguration.getName(); + if (layerName == null) layerName = "(not named)"; + + String s = + "Cannot do forward pass in Convolution layer (layer name = " + + layerName + + ", layer index = " + + index + + "): input array channels does not match CNN layer configuration" + + " (data format = " + + format + + ", data input channels = " + + input.size(dim) + + ", " + + getTypedLayerConfiguration().getConvFormat().dimensionNames() + + "=" + + Arrays.toString(input.shape()) + + "; expected" + + " input channels = " + + inDepth + + ") " + + layerId(); + + int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3; + if (input.size(dimIfWrongFormat) == inDepth) { + // User might have passed NCHW data to a NHWC net, or vice versa? + s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG; + } + + throw new DL4JInvalidInputException(s); + } + } + + /** + * PreOutput method that also returns the im2col2d array (if being called for backprop), as this + * can be re-used instead of being calculated again. + * + * @param training Train or test time (impacts dropout) + * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return + * null for second pair entry. Note that it may still be null in the case of CuDNN and the + * like. + * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array + */ + protected Pair preOutput( + boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr); + INDArray weights = + getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr); + + validateInputRank(); + + INDArray input = this.input.castTo(dataType); + INDArray inputOrig = input; + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { + input = input.permute(0, 3, 1, 2).dup(); // NHWC to NCHW + } + + long miniBatch = input.size(0); + long outDepth = weights.size(0); + long inDepth = weights.size(1); + validateInputDepth(inDepth); + + long kH = weights.size(2); + long kW = weights.size(3); + + int[] dilation = getTypedLayerConfiguration().getDilation(); + int[] kernel = getTypedLayerConfiguration().getKernelSize(); + int[] strides = getTypedLayerConfiguration().getStride(); + + int[] pad; + int[] outSize; + if (convolutionMode == ConvolutionMode.Same) { + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + null, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method + + if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE) + throw new ND4JArraySizeException(); + int[] inWidthHeight; + // if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW) + // TODO: Switch hardcoded state later. For now, convolution is implemented as + // switch to NCHW then permute back for NWHC + inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)}; + + /* else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) { + inWidthHeight = new int[] {(int) input.size(1), (int) input.size(2)}; + } + else + throw new IllegalStateException("No data format configured!");*/ + pad = + Convolution2DUtils.getSameModeTopLeftPadding( + outSize, inWidthHeight, kernel, strides, dilation); + } else { + pad = getTypedLayerConfiguration().getPadding(); + outSize = + Convolution2DUtils.getOutputSize( + input, + kernel, + strides, + pad, + convolutionMode, + dilation, + CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method + } + + int outH = outSize[0]; + int outW = outSize[1]; + + if (helper != null + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + if (preOutput != null && forBackprop) { + return new Pair<>(preOutput, null); + } + + // For no-bias convolutional layers: use an empty (all 0s) value for biases + if (!hasBias()) { + if (dummyBias == null) { + try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) { + dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut()); + } + } + bias = dummyBias; + } + + INDArray ret = null; + try { + ret = + helper.preOutput( + inputOrig, + weights, + bias, + kernel, + strides, + pad, + getTypedLayerConfiguration().getCudnnAlgoMode(), + getTypedLayerConfiguration().getCudnnFwdAlgo(), + convolutionMode, + dilation, + getTypedLayerConfiguration().getConvFormat(), + workspaceMgr); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; + } + + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } + } else { + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", + e); + } + } + if (ret != null) { + return new Pair<>(ret, null); + } + } + + if (preOutput != null && i2d != null && forBackprop) { + return new Pair<>(preOutput, i2d); + } + + // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input + // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation + // To get this: create an array of the order we want, permute it to the order required by im2col + // implementation, and then do im2col on that + // to get old order from required order: permute(0,3,4,5,1,2) + // Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through + // the rows post-reshape + INDArray col = + Nd4j.createUninitialized( + weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); + int[] permute = new int[] {0, 3, 4, 5, 1, 2}; + INDArray col2 = col.permute(permute); + INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float + if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException(); + Convolution.im2col( + im2ColIn, + (int) kH, + (int) kW, + strides[0], + strides[1], + pad[0], + pad[1], + dilation[0], + dilation[1], + convolutionMode == ConvolutionMode.Same, + col2); + + INDArray im2col2d = + Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false); + + // Current order of weights: [depthOut,depthIn,kH,kW], c order + // Permute to give [kW,kH,depthIn,depthOut], f order + // Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless + // weights aren't in c order for some reason + INDArray permutedW = weights.permute(3, 2, 1, 0); + INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth); + + // Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut] + INDArray z = + workspaceMgr.createUninitialized( + ArrayType.ACTIVATIONS, + weights.dataType(), + new long[] {im2col2d.size(0), reshapedW.size(1)}, + 'f'); + im2col2d.mmuli(reshapedW, z); + + // Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is + // [miniBatch*outH*outW,depthOut] -> addiRowVector + if (getTypedLayerConfiguration().hasBias()) { + z.addiRowVector(bias); + } + + // Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order: + // [miniBatch,outDepth,outH,outW]; + z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true); + z = z.permute(2, 3, 1, 0); + + if (training + && cacheMode != CacheMode.NONE + && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) + && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { + try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { + i2d = im2col2d.unsafeDuplication(); + } + } + + if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) { + z = z.permute(0, 2, 3, 1); // NCHW to NHWC + z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z); + } + + return new Pair<>(z, forBackprop ? im2col2d : null); + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + if (input == null) { + throw new IllegalArgumentException( + "Cannot perform forward pass with null input " + layerId()); + } + + if (cacheMode == null) cacheMode = CacheMode.NONE; + + applyDropOutIfNecessary(training, workspaceMgr); + + INDArray z = preOutput(training, false, workspaceMgr).getFirst(); + + // we do cache only if cache workspace exists. Skip otherwise + if (training + && cacheMode != CacheMode.NONE + && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) + && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { + try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { + preOutput = z.unsafeDuplication(); + } + } + + // String afn = conf.getLayer().getActivationFunction(); + IActivation afn = getTypedLayerConfiguration().getActivationFn(); + + if (helper != null + && Shape.strideDescendingCAscendingF(z) + && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { + INDArray ret = null; + try { + ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training); + } catch (ND4JOpProfilerException e) { + throw e; // NaN panic etc for debugging + } catch (Exception e) { + if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) { + // This is a memory exception - don't fallback to built-in implementation + throw e; + } + + if (getTypedLayerConfiguration().isCudnnAllowFallback()) { + helperCountFail++; + if (helper instanceof MKLDNNConvHelper) { + log.warn("MKL-DNN execution failed - falling back on built-in implementation", e); + } else { + log.warn("CuDNN execution failed - falling back on built-in implementation", e); + } + } else { + throw new RuntimeException( + "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", + e); + } + } + + if (ret != null) { + return ret; + } + } + + INDArray activation = afn.getActivation(z, training); + return activation; + } + + @Override + public boolean hasBias() { + return getTypedLayerConfiguration().hasBias(); + } + + @Override + public boolean isPretrainLayer() { + return false; + } + + @Override + public LayerHelper getHelper() { + return helper; + } + + @Override + public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public void setParamsTable(INDArray paramsTable) { + // Override, as base layer does f order parameter flattening by default + setParams(paramsTable, 'c'); + } + + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + if (maskArray == null) { + // For same mode (with stride 1): output activations size is always same size as input + // activations size -> mask array is same size + return new Pair<>(maskArray, currentMaskState); + } + + INDArray outMask = + Convolution2DUtils.cnn2dMaskReduction( + maskArray, + getTypedLayerConfiguration().getKernelSize(), + getTypedLayerConfiguration().getStride(), + getTypedLayerConfiguration().getPadding(), + getTypedLayerConfiguration().getDilation(), + getTypedLayerConfiguration().getConvolutionMode()); + return new Pair<>(outMask, currentMaskState); + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java index 799fafb78..11f48357e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java @@ -47,7 +47,7 @@ public class DenseLayer extends BaseLayer { +public class RnnOutputLayer + extends BaseOutputLayer { - public RnnOutputLayer(LayerConfiguration conf, DataType dataType) { - super(conf, dataType); + public RnnOutputLayer(LayerConfiguration conf, DataType dataType) { + super(conf, dataType); + } + + @Override + public Pair backpropGradient( + INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(true); + if (input.rank() != 3) { + throw new UnsupportedOperationException( + "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." + + " Got input with rank " + + input.rank() + + " and shape " + + Arrays.toString(input.shape()) + + " - " + + layerId()); } - @Override - public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(true); - if (input.rank() != 3) { - throw new UnsupportedOperationException( - "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." + - " Got input with rank " + input.rank() + " and shape " + Arrays.toString(input.shape()) + " - " + layerId()); - } + RNNFormat format = getTypedLayerConfiguration().getDataFormat(); + int td = (format == RNNFormat.NCW) ? 2 : 1; //either NCW or NWC + Preconditions.checkState( + labels.rank() == 3, + "Expected rank 3 labels array, got label array with shape %ndShape", + labels); + Preconditions.checkState( + input.size(td) == labels.size(td), + "Sequence lengths do not match for RnnOutputLayer input and labels:" + + "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - " + + "mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape.\n", + input, "\n\n", + labels); - RNNFormat format = getTypedLayerConfiguration().getDataFormat(); - int td = (format == RNNFormat.NCW) ? 2 : 1; - Preconditions.checkState(labels.rank() == 3, "Expected rank 3 labels array, got label array with shape %ndShape", labels); - Preconditions.checkState(input.size(td) == labels.size(td), "Sequence lengths do not match for RnnOutputLayer input and labels:" + - "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape", input, labels); - - - INDArray inputTemp = input; - if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){ - this.input = input.permute(0, 2, 1); - } - - this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM); - - applyDropOutIfNecessary(true, workspaceMgr); //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients - - Pair gradAndEpsilonNext = super.backpropGradient(epsilon, workspaceMgr); //Also applies dropout - this.input = inputTemp; - INDArray epsilon2d = gradAndEpsilonNext.getSecond(); - - INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD); - if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){ - epsilon3d = epsilon3d.permute(0, 2, 1); - } - weightNoiseParams.clear(); - - //epsilon3d = backpropDropOutIfPresent(epsilon3d); - return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d); + INDArray inputTemp = input; + if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { + this.input = input.permute(0, 2, 1); } - /**{@inheritDoc} - */ - @Override - public double f1Score(INDArray examples, INDArray labels) { - if (examples.rank() == 3) - examples = TimeSeriesUtils.reshape3dTo2d(examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS); - if (labels.rank() == 3) - labels = TimeSeriesUtils.reshape3dTo2d(labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS); - return super.f1Score(examples, labels); + this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM); + + applyDropOutIfNecessary( + true, + workspaceMgr); // Edge case: we skip OutputLayer forward pass during training as this isn't + // required to calculate gradients + + Pair gradAndEpsilonNext = + super.backpropGradient(epsilon, workspaceMgr); // Also applies dropout + this.input = inputTemp; + INDArray epsilon2d = gradAndEpsilonNext.getSecond(); + + INDArray epsilon3d = + TimeSeriesUtils.reshape2dTo3d( + epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD); + if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { + epsilon3d = epsilon3d.permute(0, 2, 1); + } + weightNoiseParams.clear(); + + // epsilon3d = backpropDropOutIfPresent(epsilon3d); + return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d); + } + + /** {@inheritDoc} */ + @Override + public double f1Score(INDArray examples, INDArray labels) { + if (examples.rank() == 3) + examples = + TimeSeriesUtils.reshape3dTo2d( + examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS); + if (labels.rank() == 3) + labels = + TimeSeriesUtils.reshape3dTo2d( + labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS); + return super.f1Score(examples, labels); + } + + public INDArray getInput() { + return input; + } + + @Override + public Layer.Type type() { + return Layer.Type.RECURRENT; + } + + @Override + protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) { + assertInputSet(false); + if (input.rank() == 3) { + // Case when called from RnnOutputLayer + INDArray inputTemp = input; + input = + (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) + ? input.permute(0, 2, 1) + : input; + input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM); + INDArray out = super.preOutput(training, workspaceMgr); + this.input = inputTemp; + return out; + } else { + // Case when called from BaseOutputLayer + INDArray out = super.preOutput(training, workspaceMgr); + return out; + } + } + + @Override + protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) { + INDArray labels = this.labels; + if (labels.rank() == 3) { + labels = + (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) + ? labels.permute(0, 2, 1) + : labels; + return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType); + } + return labels; + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + INDArray input = this.input; + if (input.rank() != 3) + throw new UnsupportedOperationException( + "Input must be rank 3. Got input with rank " + input.rank() + " " + layerId()); + INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr); + INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr); + + applyDropOutIfNecessary(training, workspaceMgr); + if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { + input = input.permute(0, 2, 1); + } + INDArray input2d = + TimeSeriesUtils.reshape3dTo2d( + input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM); + + INDArray act2d = + getTypedLayerConfiguration() + .getActivationFn() + .getActivation(input2d.mmul(W).addiRowVector(b), training); + if (maskArray != null) { + if (!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())) { + // Per output masking + act2d.muli(maskArray.castTo(act2d.dataType())); + } else { + // Per time step masking + act2d.muliColumnVector(maskArray.castTo(act2d.dataType())); + } } - public INDArray getInput() { - return input; + INDArray ret = + TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS); + if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { + ret = ret.permute(0, 2, 1); + } + return ret; + } + + @Override + public void setMaskArray(INDArray maskArray) { + if (maskArray != null) { + // Two possible cases: + // (a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector) + // (b) per output masking - rank 3 mask array -> reshape to rank 2 ( + if (maskArray.rank() == 2) { + this.maskArray = + TimeSeriesUtils.reshapeTimeSeriesMaskToVector( + maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); + } else if (maskArray.rank() == 3) { + this.maskArray = + TimeSeriesUtils.reshape3dTo2d( + maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); + } else { + throw new UnsupportedOperationException( + "Invalid mask array: must be rank 2 or 3 (got: rank " + + maskArray.rank() + + ", shape = " + + Arrays.toString(maskArray.shape()) + + ") " + + layerId()); + } + } else { + this.maskArray = null; + } + } + + @Override + public Pair feedForwardMaskArray( + INDArray maskArray, MaskState currentMaskState, int minibatchSize) { + + // If the *input* mask array is present and active, we should use it to mask the output + if (maskArray != null && currentMaskState == MaskState.Active) { + this.inputMaskArray = + TimeSeriesUtils.reshapeTimeSeriesMaskToVector( + maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); + this.inputMaskArrayState = currentMaskState; + } else { + this.inputMaskArray = null; + this.inputMaskArrayState = null; } - @Override - public Layer.Type type() { - return Layer.Type.RECURRENT; + return null; // Last layer in network + } + + /** + * Compute the score for each example individually, after labels and input have been set. + * + * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include + * regularization) + * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith + * example + */ + @Override + public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) { + // For RNN: need to sum up the score over each time step before returning. + + if (input == null || labels == null) + throw new IllegalStateException( + "Cannot calculate score without input and labels " + layerId()); + INDArray preOut = preOutput2d(false, workspaceMgr); + + ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction(); + INDArray scoreArray = + lossFunction.computeScoreArray( + getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), + preOut, + getTypedLayerConfiguration().getActivationFn(), + maskArray); + // scoreArray: shape [minibatch*timeSeriesLength, 1] + // Reshape it to [minibatch, timeSeriesLength] then sum over time step + + INDArray scoreArrayTs = + TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int) input.size(0)); + INDArray summedScores = scoreArrayTs.sum(true, 1); + + if (fullNetRegTerm != 0.0) { + summedScores.addi(fullNetRegTerm); } - @Override - protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) { - assertInputSet(false); - if (input.rank() == 3) { - //Case when called from RnnOutputLayer - INDArray inputTemp = input; - input = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? input.permute(0, 2, 1):input; - input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM); - INDArray out = super.preOutput(training, workspaceMgr); - this.input = inputTemp; - return out; - } else { - //Case when called from BaseOutputLayer - INDArray out = super.preOutput(training, workspaceMgr); - return out; - } - } - - @Override - protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) { - INDArray labels = this.labels; - if (labels.rank() == 3){ - labels = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? labels.permute(0, 2, 1) : labels; - return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType); - } - return labels; - } - - @Override - public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { - INDArray input = this.input; - if (input.rank() != 3) - throw new UnsupportedOperationException( - "Input must be rank 3. Got input with rank " + input.rank() + " " + layerId()); - INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr); - INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr); - - applyDropOutIfNecessary(training, workspaceMgr); - if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){ - input = input.permute(0, 2, 1); - } - INDArray input2d = TimeSeriesUtils.reshape3dTo2d(input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM); - - INDArray act2d = getTypedLayerConfiguration().getActivationFn().getActivation(input2d.mmul(W).addiRowVector(b), training); - if (maskArray != null) { - if(!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())){ - //Per output masking - act2d.muli(maskArray.castTo(act2d.dataType())); - } else { - //Per time step masking - act2d.muliColumnVector(maskArray.castTo(act2d.dataType())); - } - } - - INDArray ret = TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS); - if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){ - ret = ret.permute(0, 2, 1); - } - return ret; - } - - @Override - public void setMaskArray(INDArray maskArray) { - if (maskArray != null) { - //Two possible cases: - //(a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector) - //(b) per output masking - rank 3 mask array -> reshape to rank 2 ( - if (maskArray.rank() == 2) { - this.maskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); - } else if (maskArray.rank() == 3) { - this.maskArray = TimeSeriesUtils.reshape3dTo2d(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); - } else { - throw new UnsupportedOperationException( - "Invalid mask array: must be rank 2 or 3 (got: rank " + maskArray.rank() + ", shape = " - + Arrays.toString(maskArray.shape()) + ") " + layerId()); - } - } else { - this.maskArray = null; - } - } - - @Override - public Pair feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, - int minibatchSize) { - - //If the *input* mask array is present and active, we should use it to mask the output - if (maskArray != null && currentMaskState == MaskState.Active) { - this.inputMaskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); - this.inputMaskArrayState = currentMaskState; - } else { - this.inputMaskArray = null; - this.inputMaskArrayState = null; - } - - return null; //Last layer in network - } - - /**Compute the score for each example individually, after labels and input have been set. - * - * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include regularization) - * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example - */ - @Override - public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) { - //For RNN: need to sum up the score over each time step before returning. - - if (input == null || labels == null) - throw new IllegalStateException("Cannot calculate score without input and labels " + layerId()); - INDArray preOut = preOutput2d(false, workspaceMgr); - - ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction(); - INDArray scoreArray = - lossFunction.computeScoreArray(getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), preOut, - getTypedLayerConfiguration().getActivationFn(), maskArray); - //scoreArray: shape [minibatch*timeSeriesLength, 1] - //Reshape it to [minibatch, timeSeriesLength] then sum over time step - - INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0)); - INDArray summedScores = scoreArrayTs.sum(true, 1); - - if (fullNetRegTerm != 0.0) { - summedScores.addi(fullNetRegTerm); - } - - return summedScores; - } + return summedScores; + } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java index 7c5c621a9..817d62778 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java @@ -47,7 +47,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; @Data -public abstract class BaseWrapperLayer extends AbstractLayer { +public abstract class BaseWrapperLayer extends AbstractLayer { protected Layer underlying; @@ -57,8 +57,8 @@ public abstract class BaseWrapperLayer extends AbstractLayer { } @Override - public BaseLayerConfiguration getTypedLayerConfiguration() { - return (BaseLayerConfiguration) underlying.getLayerConfiguration(); + public LayerConf_T getTypedLayerConfiguration() { + return (LayerConf_T) underlying.getLayerConfiguration(); } /** diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 5342864a2..7cc08a62d 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -712,7 +712,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork if (layer_conf instanceof BaseLayerConfiguration) ((BaseLayerConfiguration) layer_conf).setDataType(netDtype); - nParamsPerLayer[i] = layer_conf.initializer().numParams(layer_conf); + nParamsPerLayer[i] = layer_conf.numParams(); paramLength += nParamsPerLayer[i]; } log.debug( diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java new file mode 100644 index 000000000..e09c2610d --- /dev/null +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java @@ -0,0 +1,183 @@ +/* + * ****************************************************************************** + * * + * * + * * This program and the accompanying materials are made available under the + * * terms of the Apache License, Version 2.0 which is available at + * * https://www.apache.org/licenses/LICENSE-2.0. + * * + * * See the NOTICE file distributed with this work for additional + * * information regarding copyright ownership. + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * * License for the specific language governing permissions and limitations + * * under the License. + * * + * * SPDX-License-Identifier: Apache-2.0 + * ***************************************************************************** + */ + +package org.deeplearning4j.nn.params; + + +import java.util.*; +import lombok.val; +import org.deeplearning4j.nn.api.AbstractParamInitializer; +import org.deeplearning4j.nn.conf.layers.Convolution1DNew; +import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; +import org.deeplearning4j.nn.conf.layers.LayerConfiguration; +import org.deeplearning4j.nn.weights.WeightInitUtil; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.indexing.NDArrayIndex; + +public class ConvolutionNewParamInitializer extends AbstractParamInitializer { + + private static final ConvolutionNewParamInitializer INSTANCE = new ConvolutionNewParamInitializer(); + + public static ConvolutionNewParamInitializer getInstance() { + return INSTANCE; + } + + + public final static String WEIGHT_KEY = DefaultParamInitializer.WEIGHT_KEY; + public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY; + + @Override + public long numParams(LayerConfiguration l) { + return l.numParams(); + } + + @Override + public List paramKeys(LayerConfiguration layer) { + ConvolutionLayer layerConf = + (ConvolutionLayer) layer; + if(layerConf.hasBias()){ + return Arrays.asList(WEIGHT_KEY, BIAS_KEY); + } else { + return weightKeys(layer); + } + } + + @Override + public List weightKeys(LayerConfiguration layer) { + return Collections.singletonList(WEIGHT_KEY); + } + + @Override + public List biasKeys(LayerConfiguration layer) { + ConvolutionLayer layerConf = + (ConvolutionLayer) layer; + if(layerConf.hasBias()){ + return Collections.singletonList(BIAS_KEY); + } else { + return Collections.emptyList(); + } + } + + @Override + public boolean isWeightParam(LayerConfiguration layer, String key) { + return WEIGHT_KEY.equals(key); + } + + @Override + public boolean isBiasParam(LayerConfiguration layer, String key) { + return BIAS_KEY.equals(key); + } + + @Override + public Map init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) { + Convolution1DNew layer = (Convolution1DNew) conf; + if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2"); + + Map params = Collections.synchronizedMap(new LinkedHashMap()); + + Convolution1DNew layerConf = + (Convolution1DNew) conf; + + val nOut = layerConf.getNOut(); + + if(layer.hasBias()){ + //Standard case + INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut)); + INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf))); + params.put(BIAS_KEY, createBias(conf, biasView, initializeParams)); + params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); + conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY); + conf.getNetConfiguration().addNetWideVariable(BIAS_KEY); + } else { + INDArray weightView = paramsView; + params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); + conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY); + } + + return params; + } + + @Override + public Map getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) { + + Convolution1DNew layerConf = + (Convolution1DNew) conf; + + int[] kernel = layerConf.getKernelSize(); + val nIn = layerConf.getNIn(); + val nOut = layerConf.getNOut(); + + Map out = new LinkedHashMap<>(); + if(layerConf.hasBias()){ + //Standard case + INDArray biasGradientView = gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut)); + INDArray weightGradientView = + gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf))) + .reshape('c', nOut, nIn, kernel[0], kernel[1]); + out.put(BIAS_KEY, biasGradientView); + out.put(WEIGHT_KEY, weightGradientView); + } else { + INDArray weightGradientView = gradientView.reshape('c', nOut, nIn, kernel[0], kernel[1]); + out.put(WEIGHT_KEY, weightGradientView); + } + return out; + } + + //1 bias per feature map + protected INDArray createBias(LayerConfiguration conf, INDArray biasView, boolean initializeParams) { + //the bias is a 1D tensor -- one bias per output feature map + Convolution1DNew layerConf = + (Convolution1DNew) conf; + if (initializeParams) + biasView.assign(layerConf.getBiasInit()); + return biasView; + } + + + protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) { + /* + Create a 4d weight matrix of: + (number of kernels, num input channels, kernel height, kernel width) + Note c order is used specifically for the CNN weights, as opposed to f order elsewhere + Inputs to the convolution layer are: + (batch size, num input feature maps, image height, image width) + */ + Convolution1DNew layerConf = + (Convolution1DNew) conf; + if (initializeParams) { + int[] kernel = layerConf.getKernelSize(); + int[] stride = layerConf.getStride(); + + val inputDepth = layerConf.getNIn(); + val outputDepth = layerConf.getNOut(); + + double fanIn = inputDepth * kernel[0] * kernel[1]; + double fanOut = outputDepth * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]); + + val weightsShape = new long[] {outputDepth, inputDepth, kernel[0], kernel[1]}; + + return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView); + } else { + int[] kernel = layerConf.getKernelSize(); + return WeightInitUtil.reshapeWeights( + new long[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]}, weightView, 'c'); + } + } +} diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java index 2f0e9bef5..e0d1e8b9e 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java @@ -20,7 +20,6 @@ package org.deeplearning4j.util; - import java.util.Arrays; import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.exception.DL4JInvalidInputException; @@ -35,281 +34,332 @@ import org.nd4j.linalg.exception.ND4JArraySizeException; public class Convolution1DUtils { - private static final int ONE = 1; + private static final int ONE = 1; + private Convolution1DUtils() {} - private Convolution1DUtils() { + public static int getOutputSize( + INDArray inputData, int kernel, int strides, int padding, ConvolutionMode convolutionMode) { + return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE); + } + + /** + * Returns true if the given layer has an {@link RNNFormat}. This is true for: {@link + * Convolution1D}, {@link Subsampling1DLayer} {@link SimpleRnn} {@link LSTM} {@link + * EmbeddingSequenceLayer} + * + * @param layer the layer to test + * @return true if the input layer has an rnn format false otherwise + */ + public static boolean hasRnnDataFormat(LayerConfiguration layer) { + return layer instanceof Convolution1D + || layer instanceof Convolution1D + || layer instanceof Subsampling1DLayer + || layer instanceof SimpleRnn + || layer instanceof LSTM + || layer instanceof EmbeddingSequenceLayer; + } + + /** + * Get the {@link RNNFormat} for the given layer. Throws an {@link IllegalArgumentException} if a + * layer doesn't have an rnn format + * + * @param layer the layer to get the format for + * @return the format for the layer + */ + public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) { + Preconditions.checkState( + hasRnnDataFormat(layer), + "ILayer of type " + + layer.getClass().getName() + + " and name " + + layer.getName() + + " does not have an RNNFormat"); + if (layer instanceof SimpleRnn) { + SimpleRnn simpleRnn = (SimpleRnn) layer; + return simpleRnn.getDataFormat(); + } else if (layer instanceof Convolution1D) { + Convolution1D convolution1D = (Convolution1D) layer; + return convolution1D.getRnnDataFormat(); + } else if (layer instanceof Convolution1D) { + Convolution1D convolution1D = (Convolution1D) layer; + return convolution1D.getRnnDataFormat(); + } else if (layer instanceof Subsampling1DLayer) { + Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer; + return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC; + } else if (layer instanceof LSTM) { + LSTM lstm = (LSTM) layer; + return lstm.getDataFormat(); + } else if (layer instanceof EmbeddingSequenceLayer) { + EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer; + return embeddingSequenceLayer.getOutputDataFormat(); + } else { + throw new IllegalArgumentException( + "Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName()); + } + } + + /** + * Reshapes the given weight array or weight gradient to work with the specified {@link RNNFormat} + * + * @param w the weight array or gradient + * @param rnnFormat the {@link RNNFormat} to use + * @return the reshaped array. + */ + public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) { + + if (rnnFormat == RNNFormat.NWC) + w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2)) + .permute(2, 1, 0); // [oC, iC, k, 1] to [k, iC, oC] + else { + w = w.reshape(w.ordering(), w.size(2), w.size(1), w.size(0)); } + return w; + } - public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding, - ConvolutionMode convolutionMode) { - return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE); + /** + * Get the output size (height) for the given input data and CNN1D configuration + * + * @param inH Input size (height, or channels). + * @param kernel Kernel size + * @param strides Stride + * @param padding Padding + * @param convolutionMode Convolution mode (Same, Strict, Truncate) + * @param dilation Kernel dilation + * @return Output size (width) + */ + public static long getOutputSize( + long inH, + int kernel, + int strides, + int padding, + ConvolutionMode convolutionMode, + int dilation) { + long eKernel = effectiveKernelSize(kernel, dilation); + if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) { + return (int) Math.ceil(inH / ((double) strides)); + } + return (inH - eKernel + 2L * padding) / strides + 1; + } + + /** + * Get the output size (height) for the given input data and CNN1D configuration + * + * @param inputData Input data + * @param kernel Kernel size + * @param strides Stride + * @param padding Padding + * @param convolutionMode Convolution mode (Same, Strict, Truncate) + * @param dilation Kernel dilation + * @return Output size (width) + */ + public static int getOutputSize( + INDArray inputData, + int kernel, + int strides, + int padding, + ConvolutionMode convolutionMode, + int dilation) { + if (inputData.size(2) > Integer.MAX_VALUE) throw new ND4JArraySizeException(); + int inH = (int) inputData.size(2); + int eKernel = effectiveKernelSize(kernel, dilation); + boolean atrous = (eKernel == kernel); + validateShapes(inputData, eKernel, strides, padding, convolutionMode, dilation, inH, atrous); + + if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) { + int outH = (int) Math.ceil(inH / ((double) strides)); + return outH; } - /** - * Returns true if the given layer has an - * {@link RNNFormat}. - * This is true for: - * {@link Convolution1D}, - * {@link Subsampling1DLayer} - * {@link SimpleRnn} - * {@link LSTM} - * {@link EmbeddingSequenceLayer} - * @param layer the layer to test - * @return true if the input layer has an rnn format - * false otherwise - */ - public static boolean hasRnnDataFormat(LayerConfiguration layer) { - return layer instanceof Convolution1D || - layer instanceof Convolution1D || - layer instanceof Subsampling1DLayer || - layer instanceof SimpleRnn || - layer instanceof LSTM || - layer instanceof EmbeddingSequenceLayer; + int outH = (inH - eKernel + 2 * padding) / strides + 1; + return outH; + } + + public static void validateShapes( + INDArray inputData, + int eKernel, + int strides, + int padding, + ConvolutionMode convolutionMode, + int dilation, + int inShape, + boolean atrous) { + + int inH = inShape; + boolean t = convolutionMode == ConvolutionMode.Truncate; + + if (t && (eKernel <= 0 || eKernel > inH + 2 * padding)) { + StringBuilder sb = new StringBuilder(); + sb.append("Invalid input data or configuration: "); + if (atrous) sb.append("effective "); + sb.append("kernel height and input height must satisfy 0 < "); + if (atrous) sb.append("effective "); + sb.append("kernel height <= input height + 2 * padding height. \nGot "); + if (atrous) sb.append("effective "); + sb.append("kernel height = ") + .append(eKernel) + .append(", input height = ") + .append(inH) + .append(" and padding height = ") + .append(padding) + .append(" which do not satisfy 0 < ") + .append(eKernel) + .append(" <= ") + .append(inH + 2 * padding) + .append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation)); + + throw new DL4JInvalidInputException(sb.toString()); } - /** - * Get the {@link RNNFormat} for the given layer. - * Throws an {@link IllegalArgumentException} - * if a layer doesn't have an rnn format - * @param layer the layer to get the format for - * @return the format for the layer - */ - public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) { - Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getName() + " does not have an RNNFormat"); - if(layer instanceof SimpleRnn) { - SimpleRnn simpleRnn = (SimpleRnn) layer; - return simpleRnn.getDataFormat(); - } else if(layer instanceof Convolution1D) { - Convolution1D convolution1D = (Convolution1D) layer; - return convolution1D.getRnnDataFormat(); - } else if(layer instanceof Convolution1D) { - Convolution1D convolution1D = (Convolution1D) layer; - return convolution1D.getRnnDataFormat(); - } else if(layer instanceof Subsampling1DLayer) { - Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer; - return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC; - } else if(layer instanceof LSTM) { - LSTM lstm = (LSTM) layer; - return lstm.getDataFormat(); - } else if(layer instanceof EmbeddingSequenceLayer) { - EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer; - return embeddingSequenceLayer.getOutputDataFormat(); - } - else { - throw new IllegalArgumentException("Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName()); - } + if (convolutionMode == ConvolutionMode.Strict) { + if ((inH - eKernel + 2 * padding) % strides != 0) { + double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0; + String str = String.format("%.2f", d); + int truncated = (int) d; + int sameSize = (int) Math.ceil(inH / ((double) strides)); + + String sb = + "Invalid input data or configuration: Combination of kernel size, " + + "stride and padding are not " + + "valid for given input height, using ConvolutionMode.Strict\n" + + "ConvolutionMode.Strict requires: output height = (input height - kernelSize + " + + "2*padding)/stride + 1 to be an integer. Got: (" + + inH + + " - " + + eKernel + + " + 2*" + + padding + + ")/" + + strides + + " + 1 = " + + str + + "\n" + + "See \"Constraints on strides\" at http://cs231n.github." + + "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n" + + "To truncate/crop the input, such that output height = floor(" + + str + + ") = " + + truncated + + ", use ConvolutionType.Truncate.\n" + + "Alternatively use ConvolutionType.Same, which will use padding to give an " + + "output height of ceil(" + + inH + + "/" + + strides + + ")=" + + sameSize + + getCommonErrorMsg(inputData, eKernel, strides, padding, dilation); + + throw new DL4JInvalidConfigException(sb); + } } + } - /** - * Reshapes the given weight - * array or weight gradient - * to work with the specified - * {@link RNNFormat} - * @param w the weight array or gradient - * @param rnnFormat the {@link RNNFormat} to use - * @return the reshaped array. - */ - public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) { - if(rnnFormat == RNNFormat.NWC) - w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2)).permute(2, 1, 0); //[oC, iC, k, 1] to [k, iC, oC] - else { - w = w.reshape(w.ordering(),w.size(2),w.size(1),w.size(0)); - } - - return w; + public static int effectiveKernelSize(int kernel, int dilation) { + // Determine the effective kernel size, accounting for dilation + // http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions + if (dilation == 1) { + return kernel; + } else { + return kernel + (kernel - 1) * (dilation - 1); } + } - - /** - * Get the output size (height) for the given input data and CNN1D configuration - * - * @param inH Input size (height, or channels). - * @param kernel Kernel size - * @param strides Stride - * @param padding Padding - * @param convolutionMode Convolution mode (Same, Strict, Truncate) - * @param dilation Kernel dilation - * @return Output size (width) - */ - public static long getOutputSize(long inH, int kernel, int strides, int padding, - ConvolutionMode convolutionMode, int dilation) { - long eKernel = effectiveKernelSize(kernel, dilation); - if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) { - return (int) Math.ceil(inH / ((double) strides)); - } - return (inH - eKernel + 2L * padding) / strides + 1; + private static String getCommonErrorMsg( + INDArray inputData, int kernel, int strides, int padding, int dilation) { + String s = + "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]=" + + Arrays.toString(inputData.shape()) + + ", inputKernel=" + + kernel; + if (dilation != 1) { + int effectiveKernel = effectiveKernelSize(kernel, dilation); + s += ", effectiveKernelGivenDilation=" + effectiveKernel; } + return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation; + } - /** - * Get the output size (height) for the given input data and CNN1D configuration - * - * @param inputData Input data - * @param kernel Kernel size - * @param strides Stride - * @param padding Padding - * @param convolutionMode Convolution mode (Same, Strict, Truncate) - * @param dilation Kernel dilation - * @return Output size (width) - */ - public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding, - ConvolutionMode convolutionMode, int dilation) { - if (inputData.size(2) > Integer.MAX_VALUE) - throw new ND4JArraySizeException(); - int inH = (int) inputData.size(2); - int eKernel = effectiveKernelSize(kernel, dilation); - boolean atrous = (eKernel == kernel); - validateShapes(inputData, eKernel, strides, padding, convolutionMode, dilation, inH, atrous); - - if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) { - int outH = (int) Math.ceil(inH / ((double) strides)); - return outH; - } - - int outH = (inH - eKernel + 2 * padding) / strides + 1; - return outH; + /** Check that the convolution mode is consistent with the padding specification */ + public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) { + if (mode == ConvolutionMode.Same) { + boolean nullPadding = padding == 0; + if (!nullPadding) + throw new IllegalArgumentException( + "Padding cannot be used when using the `same' convolution mode"); } + } - public static void validateShapes(INDArray inputData, int eKernel, int strides, int padding, - ConvolutionMode convolutionMode, int dilation, int inShape, - boolean atrous) { + /** + * Get top padding for same mode only. + * + * @param outSize Output size (length 2 array, height dimension first) + * @param inSize Input size (length 2 array, height dimension first) + * @param kernel Kernel size (length 2 array, height dimension first) + * @param strides Strides (length 2 array, height dimension first) + * @param dilation Dilation (length 2 array, height dimension first) + * @return Top left padding (length 2 array, height dimension first) + */ + public static int getSameModeTopLeftPadding( + int outSize, int inSize, int kernel, int strides, int dilation) { + int eKernel = effectiveKernelSize(kernel, dilation); + // Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2 + int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2; + Preconditions.checkState( + outPad >= 0, + "Invalid padding values calculated: %s - " + + "layer configuration is invalid? Input size %s, output size %s, kernel %s, " + + "strides %s, dilation %s", + outPad, + inSize, + outSize, + kernel, + strides, + dilation); + return outPad; + } - int inH = inShape; - boolean t = convolutionMode == ConvolutionMode.Truncate; + public static int getSameModeBottomRightPadding( + int outSize, int inSize, int kernel, int strides, int dilation) { + int eKernel = effectiveKernelSize(kernel, dilation); + int totalPad = ((outSize - 1) * strides + eKernel - inSize); + int tlPad = totalPad / 2; + int brPad = totalPad - tlPad; + Preconditions.checkState( + brPad >= 0, + "Invalid padding values (right) calculated: %s - " + + "layer configuration is invalid? Input size %s, output size %s, kernel %s, " + + "strides %s, dilation %s", + brPad, + inSize, + outSize, + kernel, + strides, + dilation); + return brPad; + } - if (t && (eKernel <= 0 || eKernel > inH + 2 * padding)) { - StringBuilder sb = new StringBuilder(); - sb.append("Invalid input data or configuration: "); - if (atrous) sb.append("effective "); - sb.append("kernel height and input height must satisfy 0 < "); - if (atrous) sb.append("effective "); - sb.append("kernel height <= input height + 2 * padding height. \nGot "); - if (atrous) sb.append("effective "); - sb.append("kernel height = ").append(eKernel).append(", input height = ").append(inH) - .append(" and padding height = ").append(padding).append(" which do not satisfy 0 < ") - .append(eKernel).append(" <= ").append(inH + 2 * padding) - .append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation)); - - throw new DL4JInvalidInputException(sb.toString()); - } - - - if (convolutionMode == ConvolutionMode.Strict) { - if ((inH - eKernel + 2 * padding) % strides != 0) { - double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0; - String str = String.format("%.2f", d); - int truncated = (int) d; - int sameSize = (int) Math.ceil(inH / ((double) strides)); - - String sb = "Invalid input data or configuration: Combination of kernel size, " + - "stride and padding are not " + - "valid for given input height, using ConvolutionMode.Strict\n" + - "ConvolutionMode.Strict requires: output height = (input height - kernelSize + " + - "2*padding)/stride + 1 to be an integer. Got: (" + - inH + " - " + eKernel + " + 2*" + padding + ")/" + - strides + " + 1 = " + - str + "\n" + "See \"Constraints on strides\" at http://cs231n.github." + - "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n" + - "To truncate/crop the input, such that output height = floor(" + - str + ") = " + - truncated + ", use ConvolutionType.Truncate.\n" + - "Alternatively use ConvolutionType.Same, which will use padding to give an " + - "output height of ceil(" + - inH + "/" + strides + ")=" + sameSize + - getCommonErrorMsg(inputData, eKernel, strides, padding, dilation); - - throw new DL4JInvalidConfigException(sb); - } - } + /** + * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for + * kernel size and stride, and values >= 0 for padding. + * + * @param kernel Kernel size to check + * @param stride Stride to check + * @param padding Padding to check + */ + public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) { + if (kernel <= 0) { + throw new IllegalStateException( + "Invalid kernel size: value must be positive (> 0). Got: " + kernel); } - - public static int effectiveKernelSize(int kernel, int dilation) { - //Determine the effective kernel size, accounting for dilation - //http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions - if (dilation == 1) { - return kernel; - } else { - return kernel + (kernel - 1) * (dilation - 1); - } + if (stride <= 0) { + throw new IllegalStateException( + "Invalid kernel size: value must be positive (> 0). Got: " + stride); } - - private static String getCommonErrorMsg(INDArray inputData, int kernel, int strides, int padding, int dilation) { - String s = "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]=" + Arrays.toString(inputData.shape()) - + ", inputKernel=" + kernel; - if (dilation != 1) { - int effectiveKernel = effectiveKernelSize(kernel, dilation); - s += ", effectiveKernelGivenDilation=" + effectiveKernel; - } - return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation; + if (padding < 0) { + throw new IllegalStateException( + "Invalid kernel size: value must be positive (> 0). Got: " + padding); } - - - /** - * Check that the convolution mode is consistent with the padding specification - */ - public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) { - if (mode == ConvolutionMode.Same) { - boolean nullPadding = padding == 0; - if (!nullPadding) - throw new IllegalArgumentException("Padding cannot be used when using the `same' convolution mode"); - - } - } - - /** - * Get top padding for same mode only. - * - * @param outSize Output size (length 2 array, height dimension first) - * @param inSize Input size (length 2 array, height dimension first) - * @param kernel Kernel size (length 2 array, height dimension first) - * @param strides Strides (length 2 array, height dimension first) - * @param dilation Dilation (length 2 array, height dimension first) - * @return Top left padding (length 2 array, height dimension first) - */ - public static int getSameModeTopLeftPadding(int outSize, int inSize, int kernel, int strides, int dilation) { - int eKernel = effectiveKernelSize(kernel, dilation); - //Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2 - int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2; - Preconditions.checkState(outPad >= 0, "Invalid padding values calculated: %s - " + - "layer configuration is invalid? Input size %s, output size %s, kernel %s, " + - "strides %s, dilation %s", outPad, inSize, outSize, kernel, strides, dilation); - return outPad; - } - - public static int getSameModeBottomRightPadding(int outSize, int inSize, int kernel, int strides, int dilation) { - int eKernel = effectiveKernelSize(kernel, dilation); - int totalPad = ((outSize - 1) * strides + eKernel - inSize); - int tlPad = totalPad / 2; - int brPad = totalPad - tlPad; - Preconditions.checkState(brPad >= 0, "Invalid padding values (right) calculated: %s - " + - "layer configuration is invalid? Input size %s, output size %s, kernel %s, " + - "strides %s, dilation %s", brPad, inSize, outSize, kernel, strides, dilation); - return brPad; - } - - /** - * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for kernel size and - * stride, and values >= 0 for padding. - * - * @param kernel Kernel size to check - * @param stride Stride to check - * @param padding Padding to check - */ - public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) { - - if (kernel <= 0) { - throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + kernel); - } - if (stride <= 0) { - throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + stride); - - } - if (padding < 0) { - throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + padding); - } - } - - + } }