Fixing tests

Signed-off-by: brian <brian@brutex.de>
2023-07-25 10:59:46 +02:00 · 2023-07-25 10:59:46 +02:00 · 4dc5a116b6
commit 4dc5a116b6
parent 997143b9dd
41 changed files with 4285 additions and 1309 deletions
--- a/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java
+++ b/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java
@ -0,0 +1,167 @@
+/*
+ *
+ *    ******************************************************************************
+ *    *
+ *    * This program and the accompanying materials are made available under the
+ *    * terms of the Apache License, Version 2.0 which is available at
+ *    * https://www.apache.org/licenses/LICENSE-2.0.
+ *    *
+ *    *  See the NOTICE file distributed with this work for additional
+ *    *  information regarding copyright ownership.
+ *    * Unless required by applicable law or agreed to in writing, software
+ *    * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *    * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *    * License for the specific language governing permissions and limitations
+ *    * under the License.
+ *    *
+ *    * SPDX-License-Identifier: Apache-2.0
+ *    *****************************************************************************
+ *
+ */
+
+package net.brutex.ai.nd4j.tests;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import lombok.extern.slf4j.Slf4j;
+import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator;
+import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
+import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.layers.DenseLayer;
+import org.deeplearning4j.nn.conf.layers.OutputLayer;
+import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
+import org.junit.jupiter.api.Test;
+import org.nd4j.common.primitives.Pair;
+import org.nd4j.evaluation.classification.Evaluation;
+import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.lossfunctions.LossFunctions;
+
+@Slf4j
+public class ExploreParamsTest {
+
+  @Test
+  public void testParam() {
+    NeuralNetConfiguration conf =
+        NeuralNetConfiguration.builder()
+            .seed(12345)
+                .dataType(DataType.DOUBLE)
+            .layer(
+                DenseLayer.builder().nIn(4).nOut(30).name("1. Dense").activation(Activation.TANH))
+           .layer(DenseLayer.builder().nIn(30).nOut(10).name("2. Dense"))
+            //  .layer(FrozenLayer.builder(DenseLayer.builder().nOut(6).build()).build())
+
+            .layer(
+                OutputLayer.builder()
+                    .nOut(3)
+                    .lossFunction(LossFunctions.LossFunction.MSE)
+                    .activation(Activation.SOFTMAX))
+            .build();
+    MultiLayerNetwork nn = new MultiLayerNetwork(conf);
+    nn.init();
+    log.info(nn.summary());
+    // INDArray input = Nd4j.rand(10,4);
+    INDArray labels = Nd4j.zeros(9, 3);
+
+    INDArray input =
+        Nd4j.create(
+            new double[][] {
+              {5.15, 3.5, 1.4, 0.21},     // setosa
+              {4.9, 3.2, 1.4, 0.2},       // setosa
+              {4.7, 3.2, 1.23, 0.2},      // setosa
+              {7, 3.25, 4.7, 1.41},       // versicolor
+              {6.4, 3.2, 4.54, 1.5},      // versicolor
+              {6.9, 3.1, 4.92, 1.5},      // versicolor
+              {7.7, 3, 6.1, 2.3},         // virginica
+              {6.3, 3.4, 5.6, 2.45},      // virginica
+              {6.4, 3.12, 5.5, 1.8}       // virginica
+            });
+
+    labels.putScalar(0, 1);
+    labels.putScalar(3, 1);
+    labels.putScalar(6, 1);
+    labels.putScalar(10, 1);
+    labels.putScalar(13, 1);
+    labels.putScalar(16, 1);
+    labels.putScalar(20, 1);
+    labels.putScalar(23, 1);
+    labels.putScalar(26, 1);
+
+    IrisDataSetIterator iter = new IrisDataSetIterator();
+    //Iterable<Pair<INDArray, INDArray>> it = List.of(new Pair<INDArray, INDArray>(input, labels));
+    List l = new ArrayList<>();
+    for (int i=0; i< input.rows(); i++) {
+      l.add(new Pair(input.getRow(i), labels.getRow(i)));
+    }
+    Iterable<Pair<INDArray, INDArray>> it = l;
+    INDArrayDataSetIterator diter = new INDArrayDataSetIterator(it, 1);
+
+    for (int i = 0; i < 100; i++) {
+      // nn.fit(input, labels);
+      // nn.fit( input, labels);
+      nn.fit(diter);
+      // nn.feedForward(input);
+      if(i%20==0) log.info("Score: {}", nn.getScore());
+    }
+
+    Evaluation eval = nn.evaluate(iter, List.of("setosa", "vericolor", "virginica"));
+
+    log.info("\n{}", eval.stats());
+  }
+
+  @Test
+  public void testParam2() throws IOException {
+    NeuralNetConfiguration conf =
+            NeuralNetConfiguration.builder()
+                    .seed(12345)
+                    .layer(
+                            DenseLayer.builder().nIn(784).nOut(20).name("1. Dense"))
+                    .layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense"))
+                    .layer(
+                            OutputLayer.builder()
+                                    .nOut(10)
+                                    .lossFunction(LossFunctions.LossFunction.MSE)
+                                    .activation(Activation.SOFTMAX))
+                    .build();
+    MultiLayerNetwork nn = new MultiLayerNetwork(conf);
+    nn.init();
+    log.info(nn.summary());
+
+    NeuralNetConfiguration conf2 =
+            NeuralNetConfiguration.builder()
+                    .seed(12345)
+                    .layer(
+                            DenseLayer.builder().nIn(784).nOut(20).name("1. Dense").dropOut(0.7))
+                    .layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense"))
+                    .layer(
+                            OutputLayer.builder()
+                                    .nOut(10)
+                                    .lossFunction(LossFunctions.LossFunction.MSE)
+                                    .activation(Activation.SOFTMAX))
+                    .build();
+    MultiLayerNetwork nn2 = new MultiLayerNetwork(conf2);
+    nn2.init();
+    log.info(nn2.summary());
+
+
+
+    MnistDataSetIterator iter = new MnistDataSetIterator(10, 500);
+    MnistDataSetIterator iter2 = new MnistDataSetIterator(10, 50);
+
+
+    for (int i = 0; i < 200; i++) {
+      nn.fit(iter);
+      nn2.fit(iter);
+      if(i%20==0) log.info("Score: {} vs. {}", nn.getScore(), nn2.getScore());
+    }
+
+    Evaluation eval = nn.evaluate(iter2);
+    Evaluation eval2 = nn2.evaluate(iter2);
+
+    log.info("\n{} \n{}", eval.stats(), eval2.stats());
+  }
+}
--- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java
+++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java
@ -45,6 +45,7 @@ import org.datavec.image.transform.PipelineImageTransform;
 import org.datavec.image.transform.ResizeImageTransform;
 import org.datavec.image.transform.ShowImageTransform;
 import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
+import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
 import org.deeplearning4j.nn.conf.GradientNormalization;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.distribution.Distribution;
@ -65,6 +66,7 @@ import org.deeplearning4j.optimize.listeners.ScoreToChartListener;
 import org.junit.jupiter.api.Test;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.activations.impl.ActivationLReLU;
+import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.dataset.DataSet;
 import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
@ -80,11 +82,11 @@ public class App {

  private static final int X_DIM = 20 ;
  private static final int Y_DIM = 20;
-  private static final int CHANNELS = 1;
-  private static final int batchSize = 10;
+  private static final int CHANNELS = 3;
+  private static final int batchSize = 50;
  private static final int INPUT = 128;

-  private static final int OUTPUT_PER_PANEL = 4;
+  private static final int OUTPUT_PER_PANEL = 16;

  private static final int ARRAY_SIZE_PER_SAMPLE = X_DIM*Y_DIM*CHANNELS;
  private static final IUpdater UPDATER = Adam.builder().learningRate(LEARNING_RATE).beta1(0.5).build();
@ -146,7 +148,7 @@ public class App {
        ActivationLayer.builder(new ActivationLReLU(0.2)).build(),
        DropoutLayer.builder(1 - 0.5).build(),

-        OutputLayer.builder().name("dis-output").lossFunction(LossFunction.XENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build()
+        OutputLayer.builder().name("dis-output").lossFunction(LossFunction.MCXENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build()
    };
  }

@ -196,6 +198,7 @@ public class App {
        .activation( Activation.IDENTITY )
        .layersFromArray(  layers  )
        .inputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS))
+            .dataType(DataType.FLOAT)
        .build();
 ((NeuralNetConfiguration) conf).init();
    return conf;
@ -212,9 +215,9 @@ public class App {
    log.info("\u001B[32m  Some \u001B[1m green \u001B[22m text \u001B[0m \u001B[7m Inverted\u001B[0m   ");
    Nd4j.getMemoryManager().setAutoGcWindow(500);

-//    MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45);
-  //  FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/flowers"), NativeImageLoader.getALLOWED_FORMATS());
-    FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/humans"), NativeImageLoader.getALLOWED_FORMATS());
+   //MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45);
+   //FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/flowers"), NativeImageLoader.getALLOWED_FORMATS());
+   FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/humans"), NativeImageLoader.getALLOWED_FORMATS());


    ImageTransform transform = new ColorConversionTransform(new Random(42), 7 );
@ -223,7 +226,7 @@ public class App {
    ImageTransform transform3 = new ResizeImageTransform(X_DIM, Y_DIM);

    ImageTransform tr = new PipelineImageTransform.Builder()
-        .addImageTransform(transform) //convert to GREY SCALE
+        //.addImageTransform(transform) //convert to GREY SCALE
        .addImageTransform(transform3)
        //.addImageTransform(transform2)
        .build();
@ -270,10 +273,10 @@ public class App {
        break;
        }

-        if(i%20 == 0) {
-         // frame2 = visualize(new INDArray[]{real}, batchSize,
-         //     frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images
-        }
+        //if(i%20 == 0) {
+         frame2 = visualize(new INDArray[]{real}, batchSize,
+         frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images
+        //}
       real.divi(255f);

 //        int batchSize = (int) real.shape()[0];
@ -290,7 +293,7 @@ public class App {
        DataSet data = DataSet.merge(Arrays.asList(realSet, fakeSet));

        dis.fit(data);
-        dis.fit(data);
+        //dis.fit(data);

        // Update the discriminator in the GAN network
        updateGan(gen, dis, gan);
@ -298,7 +301,7 @@ public class App {
        //gan.fit(new DataSet(Nd4j.rand(batchSize, INPUT), Nd4j.zeros(batchSize, 1)));
        gan.fit(new DataSet(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1)));

-
+        //Visualize and reporting
        if (j % 10 == 1) {
          System.out.println("Iteration " + j + " Visualizing...");
          INDArray[] samples = batchSize > OUTPUT_PER_PANEL ? new INDArray[OUTPUT_PER_PANEL] : new INDArray[batchSize];
@ -320,11 +323,16 @@ public class App {
          frame = visualize(samples, 1, frame == null ? new JFrame() : frame, false); //each samples only has 1 image, thus batchElements=1
        }
      }
-      trainData.reset();
+      if (trainData.resetSupported()) {
+          trainData.reset();
+      } else {
+          log.error("Trainingdata {} does not support reset.", trainData.toString());
+      }
+        // Copy the GANs generator to gen.
+        updateGen(gen, gan);
    }

-    // Copy the GANs generator to gen.
-    updateGen(gen, gan);
+

    gen.save(new File("mnist-mlp-generator.dlj"));
  }
@ -383,7 +391,12 @@ public class App {
  }

  private static JLabel getImage(INDArray tensor, int batchElement, boolean isOrig) {
-    final BufferedImage bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY);
+    final BufferedImage bi;
+    if(CHANNELS>1) {
+        bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_INT_RGB); //need to change here based on channels
+    } else {
+        bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY); //need to change here based on channels
+    }
    final int imageSize = X_DIM * Y_DIM;
    final int offset = batchElement * imageSize;
    int pxl = offset * CHANNELS; //where to start in the INDArray
--- a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java
+++ b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java
@ -24,12 +24,14 @@ package net.brutex.gan;
 import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
 import org.deeplearning4j.nn.conf.GradientNormalization;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.ActivationLayer;
 import org.deeplearning4j.nn.conf.layers.DenseLayer;
 import org.deeplearning4j.nn.conf.layers.DropoutLayer;
 import org.deeplearning4j.nn.conf.layers.OutputLayer;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.nn.weights.WeightInit;
+import org.junit.jupiter.api.Test;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.activations.impl.ActivationLReLU;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -98,7 +100,10 @@ public class MnistSimpleGAN {

    return new MultiLayerNetwork(discConf);
  }
-
+  @Test
+  public void runTest() throws Exception {
+    main(null);
+  }
  public static void main(String[] args) throws Exception {
    GAN gan = new GAN.Builder()
        .generator(MnistSimpleGAN::getGenerator)
@ -108,6 +113,7 @@ public class MnistSimpleGAN {
        .updater(UPDATER)
        .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
        .gradientNormalizationThreshold(100)
+
        .build();

    Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000);
--- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java
+++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java
@ -2385,11 +2385,15 @@ public interface INDArray extends Serializable, AutoCloseable {
     */
    long[] stride();

-    /**
-     * Return the ordering (fortran or c  'f' and 'c' respectively) of this ndarray
-     * @return the ordering of this ndarray
-     */
-    char ordering();
+  /**
+   * Return the ordering (fortran or c  'f' and 'c' respectively) of this ndarray <br/><br/>
+   * C Is Contiguous layout. Mathematically speaking, row major.<br/>
+   * F Is Fortran contiguous layout. Mathematically speaking, column major.<br/>
+   * {@see https://en.wikipedia.org/wiki/Row-_and_column-major_order}<br/>
+   *
+   * @return the ordering of this ndarray
+   */
+  char ordering();

    /**
     * Returns the size along a specified dimension
--- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
+++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
@ -5121,7 +5121,7 @@ public class Nd4j {
            Nd4j.backend = backend;
            updateNd4jContext();
            props = Nd4jContext.getInstance().getConf();
-            logger.info("Properties for Nd4jContext " + props);
+            log.debug("Properties for Nd4jContext {}", props);
            PropertyParser pp = new PropertyParser(props);

            String otherDtype = pp.toString(ND4JSystemProperties.DTYPE);
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java
@ -122,7 +122,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp()).seed(12345L)
                    .dist(new NormalDistribution(0, 2)).list()
-                    .layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
+                    .layer(0, Convolution2D.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
                            .activation(Activation.IDENTITY).build())
                    .layer(1,BatchNormalization.builder().useLogStd(useLogStd).build())
                    .layer(2, ActivationLayer.builder().activation(Activation.TANH).build())
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java
@ -91,7 +91,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
                  .updater(new NoOp())
                  .dist(new NormalDistribution(0, 1))
                  .convolutionMode(ConvolutionMode.Same)
-                  .list()
                  .layer(
                      Convolution1D.builder()
                          .activation(afn)
@ -435,7 +434,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
                    .updater(new NoOp())
                    .dist(new NormalDistribution(0, 1))
                    .convolutionMode(ConvolutionMode.Same)
-                    .list()
                    .layer(
                        0,
                        Convolution1D.builder()
@ -461,6 +459,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
                            .stride(stride)
                            .padding(padding)
                            .pnorm(pnorm)
+                                .name("SubsamplingLayer")
                            .build())
                    .layer(
                        3,
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java
@ -0,0 +1,811 @@
+/*
+ *  ******************************************************************************
+ *  *
+ *  *
+ *  * This program and the accompanying materials are made available under the
+ *  * terms of the Apache License, Version 2.0 which is available at
+ *  * https://www.apache.org/licenses/LICENSE-2.0.
+ *  *
+ *  *  See the NOTICE file distributed with this work for additional
+ *  *  information regarding copyright ownership.
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  * License for the specific language governing permissions and limitations
+ *  * under the License.
+ *  *
+ *  * SPDX-License-Identifier: Apache-2.0
+ *  *****************************************************************************
+ */
+
+package org.deeplearning4j.gradientcheck;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import lombok.extern.slf4j.Slf4j;
+import org.deeplearning4j.BaseDL4JTest;
+import org.deeplearning4j.TestUtils;
+import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator;
+import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
+import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
+import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.layers.*;
+import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D;
+import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
+import org.deeplearning4j.util.Convolution1DUtils;
+import org.junit.jupiter.api.Test;
+import org.nd4j.common.primitives.Pair;
+import org.nd4j.evaluation.classification.Evaluation;
+import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.dataset.DataSet;
+import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.NoOp;
+import org.nd4j.linalg.lossfunctions.LossFunctions;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+@Slf4j
+public class CNN1DNewGradientCheckTest extends BaseDL4JTest {
+  private static final boolean PRINT_RESULTS = true;
+  private static final boolean RETURN_ON_FIRST_FAILURE = false;
+  private static final double DEFAULT_EPS = 1e-6;
+  private static final double DEFAULT_MAX_REL_ERROR = 1e-3;
+  private static final double DEFAULT_MIN_ABS_ERROR = 1e-8;
+
+  static {
+    Nd4j.setDataType(DataType.DOUBLE);
+  }
+
+  @Test
+  public void testCnn1D() {
+    int minibatchSize = 4;
+    int[] dataChannels = {4, 10}; //the input
+    int[] kernels = {2,4,5,8};
+    int stride = 2;
+    int padding = 3;
+    int seriesLength = 300;
+
+    for (int kernel : kernels) {
+      for (int dChannels : dataChannels) {
+        int numLabels = ((seriesLength + (2 * padding) - kernel) / stride) + 1;
+        final NeuralNetConfiguration conf =
+            NeuralNetConfiguration.builder()
+                .dataType(DataType.DOUBLE)
+                .updater(new NoOp())
+                .dist(new NormalDistribution(0, 1))
+                .convolutionMode(ConvolutionMode.Same)
+                .layer(
+                    Convolution1DNew.builder()
+                        .activation(Activation.RELU)
+                        .kernelSize(kernel)
+                        .stride(stride)
+                        .padding(padding)
+                        .nIn(dChannels) // channels
+                        .nOut(3)
+                        .rnnDataFormat(RNNFormat.NCW)
+                        .build())
+                .layer(
+                    RnnOutputLayer.builder()
+                        .lossFunction(LossFunctions.LossFunction.MCXENT)
+                        .activation(Activation.SOFTMAX)
+                        .nOut(4)
+                        .build())
+                .inputType(InputType.recurrent(dChannels, seriesLength))
+                .build();
+
+        INDArray input = Nd4j.rand(minibatchSize, dChannels, seriesLength);
+        INDArray labels = Nd4j.zeros(minibatchSize, 4, numLabels);
+        for (int i = 0; i < minibatchSize; i++) {
+          for (int j = 0; j < numLabels; j++) {
+            labels.putScalar(new int[] {i, i % 4, j}, 1.0);
+          }
+        }
+        final MultiLayerNetwork net = new MultiLayerNetwork(conf);
+        net.init();
+        String msg =
+            "Minibatch="
+                + minibatchSize
+                + ", activationFn="
+                + Activation.RELU
+                + ", kernel = "
+                + kernel;
+
+        System.out.println(msg);
+        for (int j = 0; j < net.getnLayers(); j++)
+          System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams());
+/**
+        List<Pair<INDArray, INDArray>> iter = new java.util.ArrayList<>(Collections.emptyList());
+        iter.add(new Pair<>(input, labels));
+        for(int x=0;x<100; x++) net.fit(input, labels);
+        Evaluation eval = net.evaluate(new INDArrayDataSetIterator(iter,2), Arrays.asList(new String[]{"One", "Two", "Three", "Four"}));
+        // net.fit(input, labels);
+        eval.eval(labels, net.output(input));
+
+ **/
+        boolean gradOK =
+            GradientCheckUtil.checkGradients(
+                net,
+                DEFAULT_EPS,
+                DEFAULT_MAX_REL_ERROR,
+                DEFAULT_MIN_ABS_ERROR,
+                PRINT_RESULTS,
+                RETURN_ON_FIRST_FAILURE,
+                input,
+                labels);
+
+        assertTrue(gradOK, msg);
+        TestUtils.testModelSerialization(net);
+      }
+      }
+
+
+  }
+
+
+  @Test
+  public void testCnn1DWithLocallyConnected1D() {
+    Nd4j.getRandom().setSeed(1337);
+
+    int[] minibatchSizes = {2, 3};
+    int length = 25;
+    int convNIn = 18;
+    int convNOut1 = 3;
+    int convNOut2 = 4;
+    int finalNOut = 4;
+
+    int[] kernels = {1,2,4};
+    int stride = 1;
+    int padding = 0;
+
+    Activation[] activations = {Activation.SIGMOID};
+
+    for (Activation afn : activations) {
+      for (int minibatchSize : minibatchSizes) {
+        for (int kernel : kernels) {
+          INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
+          INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
+          for (int i = 0; i < minibatchSize; i++) {
+            for (int j = 0; j < length; j++) {
+              labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
+            }
+          }
+
+          NeuralNetConfiguration conf =
+              NeuralNetConfiguration.builder()
+                  .dataType(DataType.DOUBLE)
+                  .updater(new NoOp())
+                  .dist(new NormalDistribution(0, 1))
+                  .convolutionMode(ConvolutionMode.Same)
+                  .layer(
+                      Convolution1DNew.builder()
+                          .activation(afn)
+                          .kernelSize(kernel)
+                          .stride(stride)
+                          .padding(padding)
+                          .nIn(convNIn)
+                          .nOut(convNOut1)
+                          .rnnDataFormat(RNNFormat.NCW)
+                          .build())
+                  .layer(
+                      LocallyConnected1D.builder()
+                          .activation(afn)
+                          .kernelSize(kernel)
+                          .stride(stride)
+                          .padding(padding)
+                          .nIn(convNOut1)
+                          .nOut(convNOut2)
+                          .hasBias(false)
+                          .build())
+                  .layer(
+                      RnnOutputLayer.builder()
+                          .lossFunction(LossFunctions.LossFunction.MCXENT)
+                          .activation(Activation.SOFTMAX)
+                          .nOut(finalNOut)
+                          .build())
+                  .inputType(InputType.recurrent(convNIn, length))
+                  .build();
+
+          String json = conf.toJson();
+          NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
+          assertEquals(conf, c2);
+
+          MultiLayerNetwork net = new MultiLayerNetwork(conf);
+          net.init();
+
+          String msg =
+              "Minibatch=" + minibatchSize + ", activationFn=" + afn + ", kernel = " + kernel;
+
+          if (PRINT_RESULTS) {
+            System.out.println(msg);
+            //                        for (int j = 0; j < net.getnLayers(); j++)
+            //                            System.out.println("ILayer " + j + " # params: " +
+            // net.getLayer(j).numParams());
+          }
+
+          boolean gradOK =
+              GradientCheckUtil.checkGradients(
+                  net,
+                  DEFAULT_EPS,
+                  DEFAULT_MAX_REL_ERROR,
+                  DEFAULT_MIN_ABS_ERROR,
+                  PRINT_RESULTS,
+                  RETURN_ON_FIRST_FAILURE,
+                  input,
+                  labels);
+
+          assertTrue(gradOK, msg);
+
+          TestUtils.testModelSerialization(net);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testCnn1DWithCropping1D() {
+    Nd4j.getRandom().setSeed(1337);
+
+    int[] minibatchSizes = {1, 3};
+    int length = 7;
+    int convNIn = 2;
+    int convNOut1 = 3;
+    int convNOut2 = 4;
+    int finalNOut = 4;
+
+    int[] kernels = {1, 2, 4};
+    int stride = 1;
+
+    int padding = 0;
+    int cropping = 1;
+    int croppedLength = length - 2 * cropping;
+
+    Activation[] activations = {Activation.SIGMOID};
+    SubsamplingLayer.PoolingType[] poolingTypes =
+        new SubsamplingLayer.PoolingType[] {
+          SubsamplingLayer.PoolingType.MAX,
+          SubsamplingLayer.PoolingType.AVG,
+          SubsamplingLayer.PoolingType.PNORM
+        };
+
+    for (Activation afn : activations) {
+      for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
+        for (int minibatchSize : minibatchSizes) {
+          for (int kernel : kernels) {
+            INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
+            INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, croppedLength);
+            for (int i = 0; i < minibatchSize; i++) {
+              for (int j = 0; j < croppedLength; j++) {
+                labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
+              }
+            }
+
+            NeuralNetConfiguration conf =
+                NeuralNetConfiguration.builder()
+                    .dataType(DataType.DOUBLE)
+                    .updater(new NoOp())
+                    .dist(new NormalDistribution(0, 1))
+                    .convolutionMode(ConvolutionMode.Same)
+                    .layer(
+                        Convolution1DNew.builder()
+                            .activation(afn)
+                            .kernelSize(kernel)
+                            .stride(stride)
+                            .padding(padding)
+                            .nOut(convNOut1)
+                            .build())
+                    .layer(Cropping1D.builder(cropping).build())
+                    .layer(
+                        Convolution1DNew.builder()
+                            .activation(afn)
+                            .kernelSize(kernel)
+                            .stride(stride)
+                            .padding(padding)
+                            .nOut(convNOut2)
+                            .build())
+                    .layer(
+                        RnnOutputLayer.builder()
+                            .lossFunction(LossFunctions.LossFunction.MCXENT)
+                            .activation(Activation.SOFTMAX)
+                            .nOut(finalNOut)
+                            .build())
+                    .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
+                    .build();
+
+            String json = conf.toJson();
+            NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
+            assertEquals(conf, c2);
+
+            MultiLayerNetwork net = new MultiLayerNetwork(conf);
+            net.init();
+
+            String msg =
+                "PoolingType="
+                    + poolingType
+                    + ", minibatch="
+                    + minibatchSize
+                    + ", activationFn="
+                    + afn
+                    + ", kernel = "
+                    + kernel;
+
+            if (PRINT_RESULTS) {
+              System.out.println(msg);
+              //                            for (int j = 0; j < net.getnLayers(); j++)
+              //                                System.out.println("ILayer " + j + " # params: " +
+              // net.getLayer(j).numParams());
+            }
+
+            boolean gradOK =
+                GradientCheckUtil.checkGradients(
+                    net,
+                    DEFAULT_EPS,
+                    DEFAULT_MAX_REL_ERROR,
+                    DEFAULT_MIN_ABS_ERROR,
+                    PRINT_RESULTS,
+                    RETURN_ON_FIRST_FAILURE,
+                    input,
+                    labels);
+
+            assertTrue(gradOK, msg);
+
+            TestUtils.testModelSerialization(net);
+          }
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testCnn1DWithZeroPadding1D() {
+    Nd4j.getRandom().setSeed(1337);
+
+    int[] minibatchSizes = {1, 3};
+    int length = 7;
+    int convNIn = 2;
+    int convNOut1 = 3;
+    int convNOut2 = 4;
+    int finalNOut = 4;
+
+    int[] kernels = {1, 2, 4};
+    int stride = 1;
+    int pnorm = 2;
+
+    int padding = 0;
+    int zeroPadding = 2;
+    int paddedLength = length + 2 * zeroPadding;
+
+    Activation[] activations = {Activation.SIGMOID};
+    SubsamplingLayer.PoolingType[] poolingTypes =
+        new SubsamplingLayer.PoolingType[] {
+          SubsamplingLayer.PoolingType.MAX,
+          SubsamplingLayer.PoolingType.AVG,
+          SubsamplingLayer.PoolingType.PNORM
+        };
+
+    for (Activation afn : activations) {
+      for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
+        for (int minibatchSize : minibatchSizes) {
+          for (int kernel : kernels) {
+            INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
+            INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, paddedLength);
+            for (int i = 0; i < minibatchSize; i++) {
+              for (int j = 0; j < paddedLength; j++) {
+                labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
+              }
+            }
+
+            NeuralNetConfiguration conf =
+                NeuralNetConfiguration.builder()
+                    .dataType(DataType.DOUBLE)
+                    .updater(new NoOp())
+                    .dist(new NormalDistribution(0, 1))
+                    .convolutionMode(ConvolutionMode.Same)
+                    .layer(
+                        Convolution1DNew.builder()
+                            .activation(afn)
+                            .kernelSize(2, kernel)
+                            .stride(stride)
+                            .padding(padding)
+                            .nOut(convNOut1)
+                            .build())
+                    .layer(ZeroPadding1DLayer.builder(zeroPadding).build())
+                    .layer(
+                        Convolution1DNew.builder()
+                            .activation(afn)
+                            .kernelSize(kernel)
+                            .stride(stride)
+                            .padding(padding)
+                            .nOut(convNOut2)
+                            .build())
+                    .layer(ZeroPadding1DLayer.builder(0).build())
+                    .layer(
+                        Subsampling1DLayer.builder(poolingType)
+                            .kernelSize(kernel)
+                            .stride(stride)
+                            .padding(padding)
+                            .pnorm(pnorm)
+                            .build())
+                    .layer(
+                        RnnOutputLayer.builder()
+                            .lossFunction(LossFunctions.LossFunction.MCXENT)
+                            .activation(Activation.SOFTMAX)
+                            .nOut(finalNOut)
+                            .build())
+                    .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
+                    .build();
+
+            String json = conf.toJson();
+            NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
+            assertEquals(conf, c2);
+
+            MultiLayerNetwork net = new MultiLayerNetwork(conf);
+            net.init();
+
+            String msg =
+                "PoolingType="
+                    + poolingType
+                    + ", minibatch="
+                    + minibatchSize
+                    + ", activationFn="
+                    + afn
+                    + ", kernel = "
+                    + kernel;
+
+            if (PRINT_RESULTS) {
+              System.out.println(msg);
+              //                            for (int j = 0; j < net.getnLayers(); j++)
+              //                                System.out.println("ILayer " + j + " # params: " +
+              // net.getLayer(j).numParams());
+            }
+
+            boolean gradOK =
+                GradientCheckUtil.checkGradients(
+                    net,
+                    DEFAULT_EPS,
+                    DEFAULT_MAX_REL_ERROR,
+                    DEFAULT_MIN_ABS_ERROR,
+                    PRINT_RESULTS,
+                    RETURN_ON_FIRST_FAILURE,
+                    input,
+                    labels);
+
+            assertTrue(gradOK, msg);
+            TestUtils.testModelSerialization(net);
+          }
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testCnn1DWithSubsampling1D() {
+    Nd4j.getRandom().setSeed(12345);
+
+    int[] minibatchSizes = {1, 3};
+    int length = 7;
+    int convNIn = 2;
+    int convNOut1 = 3;
+    int convNOut2 = 4;
+    int finalNOut = 4;
+
+    int[] kernels = {1, 2, 4};
+    int stride = 1;
+    int padding = 0;
+    int pnorm = 2;
+
+    Activation[] activations = {Activation.SIGMOID, Activation.TANH};
+    SubsamplingLayer.PoolingType[] poolingTypes =
+        new SubsamplingLayer.PoolingType[] {
+          SubsamplingLayer.PoolingType.MAX,
+          SubsamplingLayer.PoolingType.AVG,
+          SubsamplingLayer.PoolingType.PNORM
+        };
+
+    for (Activation afn : activations) {
+      for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
+        for (int minibatchSize : minibatchSizes) {
+          for (int kernel : kernels) {
+            INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
+            INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
+            for (int i = 0; i < minibatchSize; i++) {
+              for (int j = 0; j < length; j++) {
+                labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
+              }
+            }
+
+            NeuralNetConfiguration conf =
+                NeuralNetConfiguration.builder()
+                    .dataType(DataType.DOUBLE)
+                    .updater(new NoOp())
+                    .dist(new NormalDistribution(0, 1))
+                    .convolutionMode(ConvolutionMode.Same)
+                    .layer(
+                        0,
+                        Convolution1DNew.builder()
+                            .activation(afn)
+                            .kernelSize(kernel)
+                            .stride(stride)
+                            .padding(padding)
+                            .nOut(convNOut1)
+                            .build())
+                    .layer(
+                        1,
+                        Convolution1DNew.builder()
+                            .activation(afn)
+                            .kernelSize(kernel)
+                            .stride(stride)
+                            .padding(padding)
+                            .nOut(convNOut2)
+                            .build())
+                    .layer(
+                        2,
+                        Subsampling1DLayer.builder(poolingType)
+                            .kernelSize(kernel)
+                            .stride(stride)
+                            .padding(padding)
+                            .pnorm(pnorm)
+                            .name("SubsamplingLayer")
+                            .build())
+                    .layer(
+                        3,
+                        RnnOutputLayer.builder()
+                            .lossFunction(LossFunctions.LossFunction.MCXENT)
+                            .activation(Activation.SOFTMAX)
+                            .nOut(finalNOut)
+                            .build())
+                    .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
+                    .build();
+
+            String json = conf.toJson();
+            NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
+            assertEquals(conf, c2);
+
+            MultiLayerNetwork net = new MultiLayerNetwork(conf);
+            net.init();
+
+            String msg =
+                "PoolingType="
+                    + poolingType
+                    + ", minibatch="
+                    + minibatchSize
+                    + ", activationFn="
+                    + afn
+                    + ", kernel = "
+                    + kernel;
+
+            if (PRINT_RESULTS) {
+              System.out.println(msg);
+              //                            for (int j = 0; j < net.getnLayers(); j++)
+              //                                System.out.println("ILayer " + j + " # params: " +
+              // net.getLayer(j).numParams());
+            }
+
+            boolean gradOK =
+                GradientCheckUtil.checkGradients(
+                    net,
+                    DEFAULT_EPS,
+                    DEFAULT_MAX_REL_ERROR,
+                    DEFAULT_MIN_ABS_ERROR,
+                    PRINT_RESULTS,
+                    RETURN_ON_FIRST_FAILURE,
+                    input,
+                    labels);
+
+            assertTrue(gradOK, msg);
+            TestUtils.testModelSerialization(net);
+          }
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testCnn1dWithMasking() {
+    int length = 12;
+    int convNIn = 2;
+    int convNOut1 = 3;
+    int convNOut2 = 4;
+    int finalNOut = 3;
+
+    int pnorm = 2;
+
+    SubsamplingLayer.PoolingType[] poolingTypes =
+        new SubsamplingLayer.PoolingType[] {
+          SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG
+        };
+
+    for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
+      for (ConvolutionMode cm :
+          new ConvolutionMode[] {ConvolutionMode.Same, ConvolutionMode.Truncate}) {
+        for (int stride : new int[] {1, 2}) {
+          String s = cm + ", stride=" + stride + ", pooling=" + poolingType;
+          log.info("Starting test: " + s);
+          Nd4j.getRandom().setSeed(12345);
+
+          NeuralNetConfiguration conf =
+              NeuralNetConfiguration.builder()
+                  .dataType(DataType.DOUBLE)
+                  .updater(new NoOp())
+                  .activation(Activation.TANH)
+                  .dist(new NormalDistribution(0, 1))
+                  .convolutionMode(cm)
+                  .seed(12345)
+                  .layer(
+                      Convolution1DNew.builder()
+                          .kernelSize(2)
+                          .rnnDataFormat(RNNFormat.NCW)
+                          .stride(stride)
+                          .nIn(convNIn)
+                          .nOut(convNOut1)
+                          .build())
+                  .layer(
+                      Subsampling1DLayer.builder(poolingType)
+                          .kernelSize(2)
+                          .stride(stride)
+                          .pnorm(pnorm)
+                          .build())
+                  .layer(
+                      Convolution1DNew.builder()
+                          .kernelSize(2)
+                          .rnnDataFormat(RNNFormat.NCW)
+                          .stride(stride)
+                          .nIn(convNOut1)
+                          .nOut(convNOut2)
+                          .build())
+                  .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build())
+                  .layer(
+                      OutputLayer.builder()
+                          .lossFunction(LossFunctions.LossFunction.MCXENT)
+                          .activation(Activation.SOFTMAX)
+                          .nOut(finalNOut)
+                          .build())
+                  .inputType(InputType.recurrent(convNIn, length))
+                  .build();
+
+          MultiLayerNetwork net = new MultiLayerNetwork(conf);
+          net.init();
+
+          INDArray f = Nd4j.rand(2, convNIn, length);
+          INDArray fm = Nd4j.create(2, length);
+          fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
+          fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, 6)).assign(1);
+
+          INDArray label = TestUtils.randomOneHot(2, finalNOut);
+
+          boolean gradOK =
+              GradientCheckUtil.checkGradients(
+                  new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm));
+
+          assertTrue(gradOK, s);
+          TestUtils.testModelSerialization(net);
+
+          // TODO also check that masked step values don't impact forward pass, score or gradients
+
+          DataSet ds = new DataSet(f, label, fm, null);
+          double scoreBefore = net.score(ds);
+          net.setInput(f);
+          net.setLabels(label);
+          net.setLayerMaskArrays(fm, null);
+          net.computeGradientAndScore();
+          INDArray gradBefore = net.getFlattenedGradients().dup();
+          f.putScalar(1, 0, 10, 10.0);
+          f.putScalar(1, 1, 11, 20.0);
+          double scoreAfter = net.score(ds);
+          net.setInput(f);
+          net.setLabels(label);
+          net.setLayerMaskArrays(fm, null);
+          net.computeGradientAndScore();
+          INDArray gradAfter = net.getFlattenedGradients().dup();
+
+          assertEquals(scoreBefore, scoreAfter, 1e-6);
+          assertEquals(gradBefore, gradAfter);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testCnn1Causal() throws Exception {
+    int convNIn = 2;
+    int convNOut1 = 3;
+    int convNOut2 = 4;
+    int finalNOut = 3;
+
+    int[] lengths = {11, 12, 13, 9, 10, 11};
+    int[] kernels = {2, 3, 2, 4, 2, 3};
+    int[] dilations = {1, 1, 2, 1, 2, 1};
+    int[] strides = {1, 2, 1, 2, 1, 1};
+    boolean[] masks = {false, true, false, true, false, true};
+    boolean[] hasB = {true, false, true, false, true, true};
+    for (int i = 0; i < lengths.length; i++) {
+      int length = lengths[i];
+      int k = kernels[i];
+      int d = dilations[i];
+      int st = strides[i];
+      boolean mask = masks[i];
+      boolean hasBias = hasB[i];
+      // TODO has bias
+      String s = "k=" + k + ", s=" + st + " d=" + d + ", seqLen=" + length;
+      log.info("Starting test: " + s);
+      Nd4j.getRandom().setSeed(12345);
+
+      NeuralNetConfiguration conf =
+          NeuralNetConfiguration.builder()
+              .dataType(DataType.DOUBLE)
+              .updater(new NoOp())
+              .activation(Activation.TANH)
+              .weightInit(new NormalDistribution(0, 1))
+              .seed(12345)
+              .layer(
+                  Convolution1DNew.builder()
+                      .kernelSize(k)
+                      .dilation(d)
+                      .hasBias(hasBias)
+                      .convolutionMode(ConvolutionMode.Causal)
+                      .stride(st)
+                      .nOut(convNOut1)
+                      .build())
+              .layer(
+                  Convolution1DNew.builder()
+                      .kernelSize(k)
+                      .dilation(d)
+                      .convolutionMode(ConvolutionMode.Causal)
+                      .stride(st)
+                      .nOut(convNOut2)
+                      .build())
+              .layer(
+                  RnnOutputLayer.builder()
+                      .lossFunction(LossFunctions.LossFunction.MCXENT)
+                      .activation(Activation.SOFTMAX)
+                      .nOut(finalNOut)
+                      .build())
+              .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
+              .build();
+
+      MultiLayerNetwork net = new MultiLayerNetwork(conf);
+      net.init();
+
+      INDArray f = Nd4j.rand(DataType.DOUBLE, 2, convNIn, length);
+      INDArray fm = null;
+      if (mask) {
+        fm = Nd4j.create(2, length);
+        fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
+        fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, length - 2)).assign(1);
+      }
+
+      long outSize1 = Convolution1DUtils.getOutputSize(length, k, st, 0, ConvolutionMode.Causal, d);
+      long outSize2 =
+          Convolution1DUtils.getOutputSize(outSize1, k, st, 0, ConvolutionMode.Causal, d);
+
+      INDArray label = TestUtils.randomOneHotTimeSeries(2, finalNOut, (int) outSize2);
+
+      String msg =
+              "Minibatch="
+                      + 1
+                      + ", activationFn="
+                      + Activation.RELU
+                      + ", kernel = "
+                      + k;
+
+      System.out.println(msg);
+      for (int j = 0; j < net.getnLayers(); j++)
+        System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams());
+
+      boolean gradOK =
+          GradientCheckUtil.checkGradients(
+              new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm));
+
+      assertTrue(gradOK, s);
+      TestUtils.testModelSerialization(net);
+    }
+  }
+}
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
@ -108,8 +108,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                  .updater(new NoOp())
                  .weightInit(WeightInit.XAVIER)
                  .seed(12345L)
-                  .list()
-                  .layer(0, ConvolutionLayer.builder(1, 1).nOut(6).activation(afn).build())
+
+                  .layer(0, Convolution2D.builder().kernelSize(1).stride(1).nOut(6).activation(afn).build())
                  .layer(1, OutputLayer.builder(lf).activation(outputActivation).nOut(3).build())
                  .inputType(InputType.convolutionalFlat(1, 4, 1));

--- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java
+++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java
@ -24,6 +24,7 @@ import lombok.val;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.layers.Convolution2D;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
@ -85,7 +86,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution {
        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                enforceTrainingConfig, conf, kerasMajorVersion);

-        val builder = ConvolutionLayer.builder().name(this.name)
+        val builder = Convolution2D.builder().name(this.name)
                .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                .activation(getIActivationFromConfig(layerConfig, conf))
                .weightInit(init)
--- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
+++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
@ -28,6 +28,7 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.layers.Convolution2D;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
@ -95,7 +96,7 @@ public class KerasConvolution2D extends KerasConvolution {
        LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);

-        final var builder = ConvolutionLayer.builder().name(this.name)
+        final var builder = Convolution2D.builder().name(this.name)
                .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                .activation(getIActivationFromConfig(layerConfig, conf))
                .weightInit(init)
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java
@ -222,6 +222,14 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
    // TODO do not put inside self to avoid serialization issues
    // innerConfigurations.add(0, this); //put this configuration at first place

+
+    getLayerConfigurations().stream()
+            .forEach(
+                    lconf ->
+                            lconf.setNetConfiguration(
+                                    this)); // set this as net config for all layers (defined in here, not stacked
+
+
    /**
     * Inherit network wide configuration setting to those layer configurations that do not have an
     * individual setting (nor a default)
@ -230,11 +238,6 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
      lconf.runInheritance();
    }

-    getLayerConfigurations().stream()
-        .forEach(
-            lconf ->
-                lconf.setNetConfiguration(
-                    this)); // set this as net config for all layers (defined in here, not stacked

    // Validate BackpropType setting
    if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH)
@ -326,7 +329,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
          LayerConfiguration layer = getFlattenedLayerConfigurations().get(i - 1);
          // convolution 1d is an edge case where it has rnn input type but the filters
          // should be the output
-          if (layer instanceof Convolution1D) {
+          if (layer instanceof Convolution1D || layer instanceof Convolution1DNew) {
            if (l instanceof DenseLayer && getInputType() instanceof InputType.InputTypeRecurrent) {
              FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l;
              if (getInputType() instanceof InputType.InputTypeRecurrent) {
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java
@ -21,7 +21,13 @@

 package org.deeplearning4j.nn.conf;

+/**
+ * N is the batch size<br/>
+ * C is the number of feature maps (that is,, number of channels)<br/>
+ * H is the image height (not used for 1D conv as this is an RNN format<br/>
+ * W is the image width<br/>
+ * **/
 public enum RNNFormat implements DataFormat {
-    NCW,
-    NWC
+    /** n=batch size; c=channels/ features; w=width **/ NCW,
+    /** n=batch size; w=width; c=channels/ features **/ NWC
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java
@ -0,0 +1,142 @@
+/*
+ *
+ *    ******************************************************************************
+ *    *
+ *    * This program and the accompanying materials are made available under the
+ *    * terms of the Apache License, Version 2.0 which is available at
+ *    * https://www.apache.org/licenses/LICENSE-2.0.
+ *    *
+ *    *  See the NOTICE file distributed with this work for additional
+ *    *  information regarding copyright ownership.
+ *    * Unless required by applicable law or agreed to in writing, software
+ *    * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *    * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *    * License for the specific language governing permissions and limitations
+ *    * under the License.
+ *    *
+ *    * SPDX-License-Identifier: Apache-2.0
+ *    *****************************************************************************
+ *
+ */
+
+package org.deeplearning4j.nn.conf.layers;
+
+import java.util.Arrays;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import lombok.*;
+import lombok.experimental.Accessors;
+import lombok.experimental.SuperBuilder;
+import lombok.extern.slf4j.Slf4j;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
+import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.util.ValidationUtils;
+
+/**
+ * ConvolutionLayer nIn in the input layer is the number of channels nOut is the number of filters
+ * to be used in the net or in other words the channels The builder specifies the filter/kernel
+ * size, the stride and padding The pooling layer takes the kernel size
+ *
+ * <p>Supports multiple dimensions: In 1D CNN, kernel moves in 1 direction. Input and output data of
+ * 1D CNN is 2 dimensional. Mostly used on Time-Series data.
+ *
+ * <p>In 2D CNN, kernel moves in 2 directions. Input and output data of 2D CNN is 3 dimensional.
+ * Mostly used on Image data.
+ *
+ * <p>In 3D CNN, kernel moves in 3 directions. Input and output data of 3D CNN is 4 dimensional.
+ * Mostly used on 3D Image data (MRI, CT Scans, Video).
+ */
+@ToString(callSuper = true)
+@NoArgsConstructor
+@EqualsAndHashCode(callSuper = true)
+@Slf4j
+@SuperBuilder
+public abstract class AbstractConvolutionLayer extends FeedForwardLayer {
+  /** The kernel of this convolution with size in each n-dimensions */
+  @Getter private int[] kernelSize;
+  /** The stride */
+  @Getter private int[] stride;
+  /** The padding */
+  @Getter private int[] padding;
+  /** The dilation */
+  @Getter private int[] dilation;
+  /** If true (default): include bias parameters in the model. False: no bias. */
+  @Builder.Default
+  @Getter
+  @Accessors(fluent = true)
+  @Setter
+  private boolean hasBias = true;
+  /**
+   * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
+   * details Default is {@link ConvolutionMode}.Truncate.
+   */
+  @Builder.Default @Getter @Setter
+  private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
+  /**
+   * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
+   * be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
+   * false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
+   */
+  @Getter @Setter @Builder.Default private boolean cudnnAllowFallback = true;
+
+  /** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
+  @Getter @Setter @Builder.Default private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST;
+
+  @Getter @Setter private ConvolutionLayer.FwdAlgo cudnnFwdAlgo;
+  @Getter @Setter private ConvolutionLayer.BwdFilterAlgo cudnnBwdFilterAlgo;
+  @Getter @Setter private ConvolutionLayer.BwdDataAlgo cudnnBwdDataAlgo;
+
+  /**
+   * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
+   * See {@link CNN2DFormat} for more details.<br>
+   * Default: NCHW
+   *
+   * @param format Format for activations (in and out)
+   */
+  @Builder.Default @Getter @Setter
+  private CNN2DFormat convFormat =
+          CNN2DFormat.NCHW; // default value for legacy serialization reasons
+
+
+  /**
+   * Number of parameters this layer has a result of its configuration.
+   *
+   * @return number or parameters
+   */
+  @Override
+  public long numParams() {
+    var kern = 1;
+    for (int i : getKernelSize()) {
+      kern = kern * i;
+    }
+    return nIn * nOut * kern + (hasBias() ? nOut : 0);
+  }
+
+  public abstract static class AbstractConvolutionLayerBuilder<
+          C extends AbstractConvolutionLayer, B extends AbstractConvolutionLayerBuilder<C, B>>
+      extends FeedForwardLayerBuilder<C, B> {
+
+    public B kernelSize(int @NonNull ... kernelSize) {
+      if (this.kernelSize != null) {
+        log.warn("You are setting the kernel more than once, last call with override prior calls.");
+      }
+      this.kernelSize = kernelSize;
+      return self();
+    }
+
+    public B stride(int @NonNull ... stride) {
+      this.stride = stride;
+      return self();
+    }
+
+    public B padding(int @NonNull ... padding) {
+      this.padding = padding;
+      return self();
+    }
+
+    public B dilation(int @NonNull ... dilation) {
+      this.dilation = dilation;
+      return self();
+    }
+  }
+}
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java
@ -52,6 +52,16 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
@SuperBuilder
 public abstract class BaseLayerConfiguration extends LayerConfiguration
    implements ITraininableLayerConfiguration, Serializable, Cloneable {
+  /**
+   * Number of parameters this layer has a result of its configuration. This default implementation
+   * calls {@link #initializer()}.numParams( this ).
+   *
+   * @return number or parameters
+   */
+  @Override
+  public long numParams() {
+    return initializer().numParams(this);
+  }

  /**
   * Set constraints to be applied to all layers. Default: no constraints.<br>
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java
@ -45,6 +45,7 @@ import org.nd4j.linalg.factory.Nd4j;
@NoArgsConstructor
 public class CapsuleLayer extends SameDiffLayer {

+
    private static final String WEIGHT_PARAM = "weight";
    private static final String BIAS_PARAM = "bias";
    /**
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java
@ -36,22 +36,17 @@ import org.deeplearning4j.util.ValidationUtils;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;

-/*
+
 //TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
 //Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
- * This approach treats a multivariate time series with L timesteps and
+/**
+ * This approach treats a multivariate time series with L time steps and
 * P variables as an L x 1 x P image (L rows high, 1 column wide, P
 * channels deep). The kernel should be H<L pixels high and W=1 pixels
 * wide.
-
-In 1D CNN, kernel moves in 1 direction.
-Input and output data of 1D CNN is 2 dimensional. Mostly used on Time-Series data.
-
-In 2D CNN, kernel moves in 2 directions.
-Input and output data of 2D CNN is 3 dimensional. Mostly used on Image data.
-
-In 3D CNN, kernel moves in 3 directions.
-Input and output data of 3D CNN is 4 dimensional. Mostly used on 3D Image data (MRI, CT Scans, Video).
+ *
+ * In 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions.
+ * Input and output data of 1D CNN is 2-dimensional. Mostly used on Time-Series data.
 */
@Data
@ToString(callSuper = true)
@ -223,7 +218,7 @@ public class Convolution1D extends ConvolutionLayer {
  }

  public abstract static class Convolution1DBuilder<
-          C extends ConvolutionLayer, B extends Convolution1DBuilder<C, B>>
+          C extends Convolution1D, B extends Convolution1DBuilder<C, B>>
      extends ConvolutionLayerBuilder<C, B> {

    @Override
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java
@ -0,0 +1,250 @@
+/*
+ *  ******************************************************************************
+ *  *
+ *  *
+ *  * This program and the accompanying materials are made available under the
+ *  * terms of the Apache License, Version 2.0 which is available at
+ *  * https://www.apache.org/licenses/LICENSE-2.0.
+ *  *
+ *  *  See the NOTICE file distributed with this work for additional
+ *  *  information regarding copyright ownership.
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  * License for the specific language governing permissions and limitations
+ *  * under the License.
+ *  *
+ *  * SPDX-License-Identifier: Apache-2.0
+ *  *****************************************************************************
+ */
+
+package org.deeplearning4j.nn.conf.layers;
+
+import java.util.Collection;
+import java.util.Map;
+import lombok.*;
+import lombok.experimental.SuperBuilder;
+import lombok.extern.jackson.Jacksonized;
+import lombok.extern.slf4j.Slf4j;
+import org.deeplearning4j.nn.api.ParamInitializer;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
+import org.deeplearning4j.nn.conf.InputPreProcessor;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
+import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
+import org.deeplearning4j.nn.params.ConvolutionNewParamInitializer;
+import org.deeplearning4j.optimize.api.TrainingListener;
+import org.deeplearning4j.util.Convolution1DUtils;
+import org.deeplearning4j.util.ValidationUtils;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+
+// TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
+// Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
+
+/**
+ * This approach treats a multivariate time series with L time steps and P variables as an L x 1 x P
+ * image (L rows high, 1 column wide, P channels deep). The kernel should be H<L pixels high and W=1
+ * pixels wide.
+ *
+ * <p>In 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions. Input and output data of
+ * 1D CNN is 2-dimensional. Mostly used on Time-Series data.
+ */
+@Data
+@Slf4j
+@ToString(callSuper = true)
+@EqualsAndHashCode(callSuper = true)
+@Jacksonized
+@SuperBuilder
+public class Convolution1DNew extends AbstractConvolutionLayer {
+
+  /**
+   * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
+   * See {@link CNN2DFormat} for more details.<br>
+   * Default: NCHW
+   *
+   * @param format Format for activations (in and out)
+   */
+  @Builder.Default
+  protected CNN2DFormat dataFormat =
+      CNN2DFormat.NCHW; // default value for legacy serialization reasons
+
+  @Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW;
+
+  @Override
+  public ParamInitializer initializer() {
+    return ConvolutionNewParamInitializer.getInstance();
+  }
+
+  @Override
+  public org.deeplearning4j.nn.api.Layer instantiate(
+      NeuralNetConfiguration conf,
+      Collection<TrainingListener> trainingListeners,
+      int layerIndex,
+      INDArray layerParamsView,
+      boolean initializeParams,
+      DataType networkDataType) {
+    setNetConfiguration(conf);
+    LayerValidation.assertNInNOutSet("Convolution1D", getName(), layerIndex, getNIn(), getNOut());
+    LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+    /*
+       Object ret;
+       try {
+       ret = lconf.getCanConfigure()
+               .getConstructor(LayerConfiguration.class, DataType.class)
+               .newInstance(new Object[] { lconf, networkDataType });
+       } catch (Exception e) {
+         throw new RuntimeException(e);
+
+    */
+    org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer ret =
+        new org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer(lconf, networkDataType);
+
+    ret.addTrainingListeners(trainingListeners);
+    ret.setIndex(layerIndex);
+    ret.setParamsViewArray(layerParamsView);
+    Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
+    ret.setParamTable(paramTable);
+    ret.setLayerConfiguration(this);
+    return ret;
+  }
+
+  @Override
+  public InputType getOutputType(int layerIndex, InputType inputType) {
+    if (inputType == null || inputType.getType() != InputType.Type.RNN) {
+      throw new IllegalStateException(
+          "Invalid input for 1D CNN layer (layer index = "
+              + layerIndex
+              + ", layer name = \""
+              + getName()
+              + "\"): expect RNN input type with size > 0. Got: "
+              + inputType);
+    }
+    InputType.InputTypeRecurrent it = (InputType.InputTypeRecurrent) inputType;
+    long inputTsLength = it.getTimeSeriesLength();
+    long outLength;
+    if (inputTsLength < 0) {
+      // Probably: user did InputType.recurrent(x) without specifying sequence length
+      outLength = -1;
+    } else {
+      outLength =
+          Convolution1DUtils.getOutputSize(
+              inputTsLength,
+              getKernelSize()[0],
+              getStride()[0],
+              getPadding()[0],
+              getConvolutionMode(),
+              getDilation()[0]);
+    }
+
+    return InputType.recurrent(nOut, outLength, rnnDataFormat);
+  }
+
+  @Override
+  public void setNIn(InputType inputType, boolean override) {
+    if (inputType == null || inputType.getType() != InputType.Type.RNN) {
+      throw new IllegalStateException(
+          "Invalid input for 1D CNN layer (layer name = \""
+              + getName()
+              + "\"): expect RNN input type with size > 0. Got: "
+              + inputType);
+    }
+
+    InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
+    if (nIn <= 0 || override) {
+      this.nIn = r.getSize();
+    }
+    if (this.rnnDataFormat == null || override) this.rnnDataFormat = r.getFormat();
+
+    if (this.dataFormat == null || override)
+      this.dataFormat = rnnDataFormat == RNNFormat.NCW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC;
+  }
+
+  @Override
+  public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
+    if (inputType == null) {
+      throw new IllegalStateException(
+          "Invalid input for Convolution1D layer (layer name=\""
+              + getName()
+              + "\"): input is null");
+    }
+
+    return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat, getName());
+  }
+
+  /**
+   * This is a report of the estimated memory consumption for the given layer
+   *
+   * @param inputType Input type to the layer. Memory consumption is often a function of the input
+   *     type
+   * @return Memory report for the layer
+   */
+  @Override
+  public LayerMemoryReport getMemoryReport(InputType inputType) {
+    return null;
+  }
+
+  protected boolean allowCausal() {
+    return true;
+  }
+
+  private static final class Convolution1DNewBuilderImpl
+      extends Convolution1DNewBuilder<Convolution1DNew, Convolution1DNewBuilderImpl> {
+    public Convolution1DNew build() {
+      Convolution1DNew l = new Convolution1DNew(this);
+      if (l.getDilation() == null) {
+        dilation(1, 1);
+      }
+      if (l.getPadding() == null) {
+        padding(0);
+      }
+      l = new Convolution1DNew(this);
+
+      Convolution1DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()[0]);
+      Convolution1DUtils.validateCnn1DKernelStridePadding(
+          l.getKernelSize()[0], l.getStride()[0], l.getPadding()[0]);
+      l.initializeConstraints();
+      return l;
+    }
+  }
+
+  public abstract static class Convolution1DNewBuilder<
+          C extends Convolution1DNew, B extends Convolution1DNewBuilder<C, B>>
+      extends AbstractConvolutionLayerBuilder<C, B> {
+    private int dimensions(Class arrayType) {
+      return arrayType.isArray() ? 1 + dimensions(arrayType.getComponentType()) : 0;
+    }
+
+    @Override
+    public B kernelSize(int @NonNull ... kernel) {
+      // Todo, we always provide arrays, but only first element is really used
+      if (dimensions(kernel.getClass()) > 1)
+        log.warn(
+            "Kernel size has '{}' dimensions, only using first dimensions for 1D convolution layer.",
+            dimensions(kernel.getClass()));
+      super.kernelSize(
+          ValidationUtils.validate1NonNegative(new int[] {kernel[0]}, "kernelSize")[0], 1);
+      return self();
+    }
+
+    public B padding(int @NonNull ... padding) {
+      // Todo, we always provide arrays, but only first element is really used
+      super.padding(ValidationUtils.validate1NonNegative(new int[] {padding[0]}, "padding"));
+
+      return self();
+    }
+
+    public B dilation(int @NonNull ... dilation) {
+      // Todo, we always provide arrays, but only first element is really used
+      super.dilation(ValidationUtils.validate1NonNegative(new int[] {dilation[0]}, "dilation"));
+      return self();
+    }
+
+    public B stride(int @NonNull ... stride) {
+      // Todo, we always provide arrays, but only first element is really used
+      super.stride(ValidationUtils.validate1NonNegative(new int[] {stride[0]}, "stride")[0], 1);
+      return self();
+    }
+  }
+}
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
@ -61,6 +61,23 @@ import org.nd4j.linalg.api.ndarray.INDArray;
@EqualsAndHashCode(callSuper = true)
@SuperBuilder(builderMethodName = "innerBuilder")
 public class ConvolutionLayer extends FeedForwardLayer {
+
+  public static ConvolutionLayerBuilder<?, ?> builder() {
+    return innerBuilder();
+  }
+
+  public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
+    return innerBuilder().kernelSize(kernelSize);
+  }
+
+  public static ConvolutionLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
+    return innerBuilder().kernelSize(kernelSize).stride(stride);
+  }
+
+  public static ConvolutionLayerBuilder<?, ?> builder(
+          int[] kernelSize, int[] stride, int[] padding) {
+    return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding);
+  }
  /**
   * Size of the convolution rows/columns
   *
@ -122,23 +139,6 @@ public class ConvolutionLayer extends FeedForwardLayer {
  @Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter
  private boolean defaultValueOverriden = false;

-  public static ConvolutionLayerBuilder<?, ?> builder() {
-    return innerBuilder();
-  }
-
-  public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
-    return innerBuilder().kernelSize(kernelSize);
-  }
-
-  public static ConvolutionLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
-    return innerBuilder().kernelSize(kernelSize).stride(stride);
-  }
-
-  public static ConvolutionLayerBuilder<?, ?> builder(
-      int[] kernelSize, int[] stride, int[] padding) {
-    return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding);
-  }
-
  public boolean hasBias() {
    return hasBias;
  }
@ -429,6 +429,7 @@ public class ConvolutionLayer extends FeedForwardLayer {
    }
  }

+  /*
  private static final class ConvolutionLayerBuilderImpl
      extends ConvolutionLayerBuilder<ConvolutionLayer, ConvolutionLayerBuilderImpl> {
    public ConvolutionLayer build() {
@ -473,6 +474,6 @@ public class ConvolutionLayer extends FeedForwardLayer {
      return l;
    }
  }
-
+*/

 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java
@ -38,110 +38,131 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;

 /**
- * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of filters to be used
- * in the net or in other words the channels The builder specifies the filter/kernel size, the stride and padding
- * The pooling layer takes the kernel size
+ * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of
+ * filters to be used in the net or in other words the channels The builder specifies the
+ * filter/kernel size, the stride and padding The pooling layer takes the kernel size
 */
@Data
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
-@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder")
+@Jacksonized
+@SuperBuilder
 public class Deconvolution2D extends ConvolutionLayer {

+  @Builder.Default private CNN2DFormat format = CNN2DFormat.NCHW;

-@Builder.Default
-private CNN2DFormat format = CNN2DFormat.NCHW;
-    protected boolean allowCausal() {
-        //Causal convolution - allowed for 1D only
-        return false;
+  protected boolean allowCausal() {
+    // Causal convolution - allowed for 1D only
+    return false;
+  }
+
+  public boolean hasBias() {
+    return isHasBias();
+  }
+
+  @Override
+  public Deconvolution2D clone() {
+    Deconvolution2D clone = (Deconvolution2D) super.clone();
+    if (clone.getKernelSize() != null) {
+      clone.setKernelSize(clone.getKernelSize().clone());
+    }
+    if (clone.getStride() != null) {
+      clone.setStride(clone.getStride().clone());
+    }
+    if (clone.getPadding() != null) {
+      clone.setPadding(clone.getPadding().clone());
+    }
+    return clone;
+  }
+
+  @Override
+  public Layer instantiate(
+      NeuralNetConfiguration conf,
+      Collection<TrainingListener> trainingListeners,
+      int layerIndex,
+      INDArray layerParamsView,
+      boolean initializeParams,
+      DataType networkDataType) {
+    setNetConfiguration(conf);
+    LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut());
+    LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+    runInheritance();
+
+    org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret =
+        new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType);
+
+    ret.addTrainingListeners(trainingListeners);
+    ret.setIndex(layerIndex);
+    ret.setParamsViewArray(layerParamsView);
+    Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
+    ret.setParamTable(paramTable);
+    ret.setLayerConfiguration(lconf);
+    return ret;
+  }
+
+  @Override
+  public ParamInitializer initializer() {
+    return DeconvolutionParamInitializer.getInstance();
+  }
+
+  @Override
+  public InputType getOutputType(int layerIndex, InputType inputType) {
+    if (inputType == null || inputType.getType() != InputType.Type.CNN) {
+      throw new IllegalStateException(
+          "Invalid input for Convolution layer (layer name=\""
+              + getName()
+              + "\"): Expected CNN input, got "
+              + inputType);
    }

-    private static final class Deconvolution2DBuilderImpl extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
-        public Deconvolution2D build() {
-            Deconvolution2D l = new Deconvolution2D(this);
-            l.initializeConstraints();
-            return l;
-        }
+    return InputTypeUtil.getOutputTypeDeconvLayer(
+        inputType,
+        getKernelSize(),
+        getStride(),
+        getPadding(),
+        getDilation(),
+        getConvolutionMode(),
+        nOut,
+        layerIndex,
+        getName(),
+        Deconvolution2DLayer.class);
+  }
+
+  private static final class Deconvolution2DBuilderImpl
+      extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
+    public Deconvolution2D build() {
+      Deconvolution2D l = new Deconvolution2D(this);
+      l.initializeConstraints();
+      return l;
    }
-    public static abstract class Deconvolution2DBuilder<C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>> extends ConvolutionLayerBuilder<C, B> {
+  }

+  public abstract static class Deconvolution2DBuilder<
+          C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>>
+      extends ConvolutionLayerBuilder<C, B> {

-
-        @Override
-        public B kernelSize(int... kernelSize) {
-            super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
-            return self();
-        }
-        @Override
-        public B stride(int... stride) {
-            super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
-            return self();
-        }
-        @Override
-        public B padding(int... padding) {
-            super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
-            return self();
-        }
-        @Override
-        public B dilation(int... dilation) {
-            super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
-            return self();
-        }
-    }
-    public boolean hasBias() {
-        return isHasBias();
+    @Override
+    public B kernelSize(int... kernelSize) {
+      super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
+      return self();
    }

    @Override
-    public Deconvolution2D clone() {
-        Deconvolution2D clone = (Deconvolution2D) super.clone();
-        if (clone.getKernelSize() != null) {
-            clone.setKernelSize( clone.getKernelSize().clone());
-        }
-        if (clone.getStride() != null) {
-            clone.setStride( clone.getStride().clone());
-        }
-        if (clone.getPadding() != null) {
-            clone.setPadding( clone.getPadding().clone());
-        }
-        return clone;
+    public B stride(int... stride) {
+      super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
+      return self();
    }

    @Override
-    public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
-                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
-        setNetConfiguration(conf);
-        LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut());
-        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
-        runInheritance();
-
-        org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret =
-                        new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType);
-
-        ret.addTrainingListeners(trainingListeners);
-        ret.setIndex(layerIndex);
-        ret.setParamsViewArray(layerParamsView);
-        Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
-        ret.setParamTable(paramTable);
-        ret.setLayerConfiguration(lconf);
-        return ret;
+    public B padding(int... padding) {
+      super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
+      return self();
    }

    @Override
-    public ParamInitializer initializer() {
-        return DeconvolutionParamInitializer.getInstance();
+    public B dilation(int... dilation) {
+      super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
+      return self();
    }
-
-    @Override
-    public InputType getOutputType(int layerIndex, InputType inputType) {
-        if (inputType == null || inputType.getType() != InputType.Type.CNN) {
-            throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getName()
-                            + "\"): Expected CNN input, got " + inputType);
-        }
-
-        return InputTypeUtil.getOutputTypeDeconvLayer(inputType, getKernelSize(), getStride(), getPadding(), getDilation(), getConvolutionMode(),
-                        nOut, layerIndex, getName(), Deconvolution2DLayer.class);
-    }
-
-
+  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java
@ -63,6 +63,7 @@ public class DenseLayer extends FeedForwardLayer {
    LayerValidation.assertNInNOutSet(
        "DenseLayerConfiguration", getName(), layerIndex, getNIn(), getNOut());
    LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+    lconf.setNetConfiguration(conf);
    runInheritance();

    org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret =
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java
@ -31,6 +31,7 @@ import lombok.experimental.SuperBuilder;
 import lombok.extern.slf4j.Slf4j;
 import net.brutex.ai.dnn.api.ILayerConfiguration;
 import net.brutex.ai.dnn.api.LayerType;
+import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.ParamInitializer;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
@ -56,7 +57,7 @@ import org.nd4j.linalg.learning.regularization.Regularization;
@NoArgsConstructor
 // @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id")
@Slf4j
-@SuperBuilder
+@SuperBuilder(toBuilder = true)
 public abstract class LayerConfiguration
    implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration

@ -66,10 +67,20 @@ public abstract class LayerConfiguration
  @Getter @Setter protected List<LayerConstraint> biasConstraints;
  @Getter @Setter protected List<LayerConstraint> constraints;
  @Getter @Setter protected IWeightNoise weightNoise;
-  @Builder.Default private @Getter @Setter LinkedHashSet<String> variables = new LinkedHashSet<>();
+  @Builder.Default private @Getter @Setter @NonNull LinkedHashSet<String> variables = new LinkedHashSet<>();
  @Getter @Setter private IDropout dropOut;
  /** The type of the layer, basically defines the base class and its properties */
  @Builder.Default @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN;
+
+  /**
+   * Number of parameters this layer has a result of its configuration
+   * @return number or parameters
+   */
+  public long numParams() {
+    return initializer().numParams(this);
+  }
+
+
  /**
   * A reference to the neural net configuration. This field is excluded from json serialization as
   * well as from equals check to avoid circular referenced.
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java
@ -22,6 +22,8 @@ package org.deeplearning4j.nn.conf.layers;

 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import java.util.*;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
 import lombok.*;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
@ -59,10 +61,12 @@ public class LocallyConnected1D extends SameDiffLayer {
  /**
   * @param nIn Number of inputs to the layer (input size)
   */
+  @JsonProperty("nIn")
  private long nIn;
  /**
   * @param nOut Number of outputs (output size)
   */
+  @JsonProperty("nOut")
  private long nOut;
  /**
   * @param activation Activation function for the layer
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java
@ -34,6 +34,16 @@ import org.nd4j.linalg.learning.regularization.Regularization;
@SuperBuilder
 public abstract class NoParamLayer extends LayerConfiguration {

+  /**
+   * Number of parameters this layer. This will always return 0
+   *
+   * @return 0
+   */
+  @Override
+  public long numParams() {
+    return 0;
+  }
+
  @Override
  public ParamInitializer initializer() {
    return EmptyParamInitializer.getInstance();
@ -58,6 +68,7 @@ public abstract class NoParamLayer extends LayerConfiguration {

  /**
   * Will always return no-Op updater.
+   *
   * @return
   */
  @Override
@ -65,7 +76,7 @@ public abstract class NoParamLayer extends LayerConfiguration {
    return Updater.NONE.getIUpdaterWithDefaultConfig();
  }

-  public static abstract class NoParamLayerBuilder<C extends NoParamLayer, B extends NoParamLayerBuilder<C,B>>
-    extends LayerConfigurationBuilder<C,B>
-  {}
+  public abstract static class NoParamLayerBuilder<
+          C extends NoParamLayer, B extends NoParamLayerBuilder<C, B>>
+      extends LayerConfigurationBuilder<C, B> {}
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java
@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers;
 import java.util.Collection;
 import java.util.Map;
 import lombok.EqualsAndHashCode;
+import lombok.NonNull;
 import lombok.ToString;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
@ -35,6 +36,7 @@ import org.deeplearning4j.optimize.api.TrainingListener;
 import org.deeplearning4j.util.Convolution1DUtils;
 import org.deeplearning4j.util.Convolution2DUtils;
 import org.deeplearning4j.util.ValidationUtils;
+import org.jetbrains.annotations.NotNull;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;

@ -50,9 +52,91 @@ import org.nd4j.linalg.api.ndarray.INDArray;
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
@Jacksonized
-@SuperBuilder
+@SuperBuilder(builderMethodName = "innerBuilder")
 public class Subsampling1DLayer extends SubsamplingLayer {

+  public static Subsampling1DLayerBuilder<?, ?> builder() {
+    return innerBuilder();
+  }
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) {
+    return innerBuilder()
+            .poolingType(poolingType);
+  }
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType) {
+    return innerBuilder()
+            .poolingType(poolingType.toPoolingType());
+  }
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(int... kernelSize) {
+    return innerBuilder()
+            .kernelSize(kernelSize);
+  }
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
+    return innerBuilder()
+            .kernelSize(kernelSize)
+            .stride(stride);
+  }
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride, int[] padding) {
+    return innerBuilder()
+            .kernelSize(kernelSize)
+            .stride(stride)
+            .padding(padding);
+  }
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) {
+    return innerBuilder()
+            .poolingType(poolingType.toPoolingType())
+            .kernelSize(kernelSize)
+            .stride(stride)
+            .padding(padding)
+            ;
+  }
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) {
+    return innerBuilder()
+            .poolingType(poolingType)
+            .kernelSize(kernelSize)
+            .stride(stride)
+            .padding(padding)
+            ;
+  }
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) {
+    return innerBuilder()
+            .poolingType(poolingType)
+            .kernelSize(kernelSize)
+            ;
+  }
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize) {
+    return innerBuilder()
+            .poolingType(poolingType.toPoolingType())
+            .kernelSize(kernelSize)
+            ;
+  }
+
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride) {
+    return innerBuilder()
+            .poolingType(poolingType)
+            .kernelSize(kernelSize)
+            .stride(stride)
+            ;
+  }
+
+  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize, int[] stride) {
+    return innerBuilder()
+            .poolingType(poolingType.toPoolingType())
+            .kernelSize(kernelSize)
+            .stride(stride)
+            ;
+  }
+
+
  @Override
  public org.deeplearning4j.nn.api.Layer instantiate(
      NeuralNetConfiguration conf,
@ -176,20 +260,20 @@ public class Subsampling1DLayer extends SubsamplingLayer {
   * @return
 */
    @Override
-    public B kernelSize(int... kernelSize) {
-      super.kernelSize( ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]);
+    public B kernelSize(int @NonNull ... kernelSize) {
+      super.kernelSize(ValidationUtils.validate1NonNegative(new int[]{kernelSize[0]}, "kernelSize")[0]); //fix width = 1
      return self();
    }

    @Override
-    public B stride(int... stride) {
-      super.stride( ValidationUtils.validate1NonNegative(stride, "stride")[0]);
+    public B stride(@NotNull int... stride) {
+      super.stride( ValidationUtils.validate1NonNegative(new int[]{stride[0]}, "stride")[0]);
      return self();
    }

    @Override
-    public B padding(int... padding) {
-      super.padding( ValidationUtils.validate1NonNegative(padding, "padding")[0]);
+    public B padding(@NotNull int... padding) {
+      super.padding( ValidationUtils.validate1NonNegative(new int[]{padding[0]}, "padding"));
      return self();
    }
  }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
@ -27,10 +27,7 @@ import lombok.*;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
 import org.deeplearning4j.nn.api.ParamInitializer;
-import org.deeplearning4j.nn.conf.CNN2DFormat;
-import org.deeplearning4j.nn.conf.ConvolutionMode;
-import org.deeplearning4j.nn.conf.InputPreProcessor;
-import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.*;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
 import org.deeplearning4j.nn.conf.memory.MemoryReport;
@ -84,7 +81,8 @@ public class SubsamplingLayer extends NoParamLayer {
   * @param padding padding in the height and width dimensions
   */
  @Builder.Default protected int[] padding = new int[] {0, 0};
-  protected int pnorm;
+
+   protected int pnorm;
  @Builder.Default protected double eps = 1e-8;
  /**
   * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper
@ -104,6 +102,7 @@ public class SubsamplingLayer extends NoParamLayer {
   */
  protected @Builder.Default CNN2DFormat dataFormat =
      CNN2DFormat.NCHW; // default value for legacy reasons
+  protected @Builder.Default RNNFormat rnnFormat = RNNFormat.NCW;
  /**
   * When doing average pooling, should the padding values be included in the divisor or not?<br>
   * Not applicable for max and p-norm pooling.<br>
@ -127,6 +126,7 @@ public class SubsamplingLayer extends NoParamLayer {
   *     average pooling
   */
  @Builder.Default protected boolean avgPoolIncludePadInDivisor = true;
+
  /**
   * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated
   * convolutions, which are also known as atrous convolutions.<br>
@ -301,7 +301,7 @@ public class SubsamplingLayer extends NoParamLayer {
  public void setNIn(InputType inputType, boolean override) {
    // No op: subsampling layer doesn't have nIn value
    if (!defaultValueOverridden || override) {
-      this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat();
+      this.rnnFormat = ((InputType.InputTypeRecurrent) inputType).getFormat();
      defaultValueOverridden = true;
    }
  }
@ -355,14 +355,6 @@ public class SubsamplingLayer extends NoParamLayer {
        .build();
  }

-  public int getPnorm() {
-    return pnorm;
-  }
-
-  public double getEps() {
-    return eps;
-  }
-
  public enum PoolingType {
    MAX,
    AVG,
@ -394,33 +386,33 @@ public class SubsamplingLayer extends NoParamLayer {
      return self();
    }

-    public B eps(int eps) {
+    public B eps(double eps) {
      ValidationUtils.validateNonNegative(eps, "eps");
      this.eps$value = eps;
      this.eps$set = true;
      return self();
    }

-    public B kernelSize(int... kernelSize) {
-      this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize");
+    public B kernelSize(int @NonNull... kernelSize) {
+      this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize");
      this.kernelSize$set = true;
      return self();
    }

-    public B stride(int... stride) {
-      this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride");
+    public B stride(int @NonNull ... stride) {
+      this.stride$value = ValidationUtils.validate2NonNegative(stride, true, "stride");
      this.stride$set = true;
      return self();
    }

-    public B padding(int... padding) {
-      this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding");
+    public B padding(int @NonNull ... padding) {
+      this.padding$value = ValidationUtils.validate2NonNegative(padding, true, "padding");
      this.padding$set = true;
      return self();
    }

-    public B dilation(int... dilation) {
-      this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation");
+    public B dilation(int @NonNull ... dilation) {
+      this.dilation$value = ValidationUtils.validate2NonNegative(dilation, true, "dilation");
      this.dilation$set = true;
      return self();
    }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java
@ -74,6 +74,7 @@ public class FrozenLayer extends LayerConfiguration {
      boolean initializeParams,
      DataType networkDataType) {

+    innerConfiguration.setNetConfiguration(conf);
    // Need to be able to instantiate a layer, from a config - for JSON -> net type situations
    org.deeplearning4j.nn.api.Layer underlying =
        innerConfiguration.instantiate(
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java
@ -20,6 +20,7 @@

 package org.deeplearning4j.nn.conf.layers.samediff;

+import com.fasterxml.jackson.annotation.JsonIgnore;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@ -52,7 +53,8 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
@EqualsAndHashCode(callSuper = true, doNotUseGetters = true)
@NoArgsConstructor
@SuperBuilder
-public abstract class AbstractSameDiffLayer extends LayerConfiguration {
+public abstract class AbstractSameDiffLayer extends LayerConfiguration
+    implements org.deeplearning4j.nn.api.ITraininableLayerConfiguration {

  /**
   * The regularization for the parameters (excluding biases) - for example {@link WeightDecay}
@ -63,16 +65,14 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
   * @param regularization Regularization to apply for the network parameters/weights (excluding
   *     biases)
   */
-  @Getter
-  protected List<Regularization> regularization;
+  @Getter protected List<Regularization> regularization;
  /**
   * The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the
   * regularization for the biases only - for example {@link WeightDecay}
   *
   * @param regularizationBias Regularization to apply for the network biases only
   */
-  @Getter
-  protected List<Regularization> regularizationBias;
+  @Getter protected List<Regularization> regularizationBias;
  /**
   * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
   * org.nd4j.linalg.learning.config.Nesterovs}
@ -87,21 +87,23 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
   * @param biasUpdater Updater to use for bias parameters
   */
  protected @Getter @Setter IUpdater biasUpdater;
-@Getter  @Setter
-  protected GradientNormalization gradientNormalization;
-@Getter @Setter
-  protected double gradientNormalizationThreshold = Double.NaN;
-@Getter @Setter
-  private SDLayerParams layerParams;
+
+  @Getter @Setter protected GradientNormalization gradientNormalization;
+  @Getter @Setter protected double gradientNormalizationThreshold = Double.NaN;
+  @Getter @Setter private SDLayerParams layerParams;
+
+  @Getter @Setter private DataType dataType;

  @Override
  public void runInheritance(@NotNull NeuralNetConfiguration conf) {
    super.runInheritance(conf);
-    if (this.biasUpdater == null ) this.biasUpdater = conf.getBiasUpdater();
+    if (this.biasUpdater == null) this.biasUpdater = conf.getBiasUpdater();
    if (this.updater == null) this.updater = conf.getUpdater();
-    if (this.regularizationBias == null || regularizationBias.isEmpty()) this.regularizationBias = conf.getRegularizationBias();
-    if (this.regularization == null || regularization.isEmpty()) this.regularization = conf.getRegularization();
-   // if( this.weightInit == null) this.weightInit = conf.getWeightInit();
+    if (this.regularizationBias == null || regularizationBias.isEmpty())
+      this.regularizationBias = conf.getRegularizationBias();
+    if (this.regularization == null || regularization.isEmpty())
+      this.regularization = conf.getRegularization();
+    // if( this.weightInit == null) this.weightInit = conf.getWeightInit();
    if (this.gradientNormalization == null)
      this.gradientNormalization = conf.getGradientNormalization();
    // if(this.weightInit == null) this.weightInit = conf.getWeightInit();
@ -109,6 +111,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
      this.gradientNormalizationThreshold = conf.getGradientNormalizationThreshold();
    }
  }
+
  @Override
  public List<Regularization> getRegularizationByParam(String paramName) {
    if (layerParams.isWeightParam(paramName)) {
@ -119,6 +122,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
    return null;
  }

+  @JsonIgnore
  public SDLayerParams getLayerParams() {
    if (layerParams == null) {
      layerParams = new SDLayerParams();
@ -138,7 +142,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
    return null;
  }

-
  /**
   * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String,
   * long...)} and {@link SDLayerParams#addBiasParam(String, long...)}
@ -207,7 +210,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
        fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array);
  }

-
  /**
   * This method generates an "all ones" mask array for use in the SameDiff model when none is
   * provided.
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
@ -52,8 +52,8 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
 /** A layer with input and output, no parameters or gradients */
@NoArgsConstructor(force = true)
@Slf4j
-//@JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id")
-//@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class")
+// @JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id")
+// @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class")
 public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> implements Layer {

  private final @Getter List<String> variables = new ArrayList<>();
@ -80,10 +80,8 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
  protected DataType dataType;
  protected @Getter @Setter int iterationCount;
  protected @Getter @Setter int epochCount;
-  @JsonIgnore
-  private @Getter @Setter IModel net;
-  @JsonIgnore
-  @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration;
+  @JsonIgnore private @Getter @Setter IModel net;
+  @JsonIgnore @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration;

  public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) {
    //noinspection unchecked
@ -95,19 +93,18 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
    }
    this.dataType = dataType;
    if (layerConfiguration.getNetConfiguration() == null) {
-      throw new RuntimeException("You cannot create a layer from a layer configuration, that is not part of any neural network configuration.");
-      }
+      throw new RuntimeException(
+          "You cannot create a layer from a layer configuration, that is not part of any neural network configuration.");
+    }
    this.net = layerConfiguration.getNetConfiguration().getNet();
  }

  public void addTrainingListeners(TrainingListener... listeners) {
-    if(listeners != null)
-    trainingListeners.addAll(List.of(listeners));
+    if (listeners != null) trainingListeners.addAll(List.of(listeners));
  }

  public void addTrainingListeners(Collection<TrainingListener> listeners) {
-    if(listeners != null)
-    trainingListeners.addAll(listeners);
+    if (listeners != null) trainingListeners.addAll(listeners);
  }

  @Override
@ -471,7 +468,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl

  @Override
  public int getInputMiniBatchSize() {
-    if(input==null) return 0;
+    if (input == null) return 0;
    return (int) input.size(0);
  }

@ -565,8 +562,9 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
   */
  @Override
  public void setParamTable(Map<String, INDArray> paramTable) {
-    log.warn("Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName());
-    //throw new RuntimeException("Not implemented");
+    log.warn(
+        "Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName());
+    // throw new RuntimeException("Not implemented");
  }

  /**
@ -578,7 +576,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
   */
  @Override
  public Map<String, INDArray> getParamTable(boolean isBackprop) {
-//    throw new RuntimeException("Not implemented");
+    //    throw new RuntimeException("Not implemented");
    return null;
  }

@ -590,7 +588,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
   */
  @Override
  public INDArray getParams() {
-    //throw new RuntimeException("Not implemented");
+    // throw new RuntimeException("Not implemented");
    return null;
  }

--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
@ -662,6 +662,7 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>
   */
  public boolean hasBias() {
    // Overridden by layers supporting no bias mode: dense, output, convolutional, embedding
+    //return true;
    return true;
  }

--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java
@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.ITraininableLayerConfiguration;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.conf.CacheMode;
+import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration;
 import org.deeplearning4j.nn.conf.misc.DummyConfig;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
@ -88,6 +89,8 @@ public class FrozenLayer extends BaseWrapperLayer {
        return underlying.activate(input, false, workspaceMgr);
    }

+
+
    @Override
    public void fit() {
        if (!logFit) {
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java
@ -47,163 +47,188 @@ import org.nd4j.linalg.factory.Broadcast;
 import org.nd4j.linalg.factory.Nd4j;

 public class Convolution1DLayer extends ConvolutionLayer {
-    public Convolution1DLayer(LayerConfiguration conf, DataType dataType) {
-        super(conf, dataType);
+  public Convolution1DLayer(LayerConfiguration conf, DataType dataType) {
+    super(conf, dataType);
+  }
+
+  @Override
+  public Pair<Gradient, INDArray> backpropGradient(
+      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
+    assertInputSet(true);
+    if (epsilon.rank() != 3)
+      throw new DL4JInvalidInputException(
+          "Got rank "
+              + epsilon.rank()
+              + " array as epsilon for Convolution1D backprop with shape "
+              + Arrays.toString(epsilon.shape())
+              + ". Expected rank 3 array with shape [minibatchSize, features, length]. "
+              + layerId());
+    Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr);
+    IActivation afn = getTypedLayerConfiguration().getActivationFn();
+    INDArray delta =
+        afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params
+
+    org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration();
+    Conv1DConfig conf =
+        Conv1DConfig.builder()
+            .k(c.getKernelSize()[0])
+            .s(c.getStride()[0])
+            .d(c.getDilation()[0])
+            .p(c.getPadding()[0])
+            .dataFormat(Conv1DConfig.NCW)
+            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
+            .build();
+
+    INDArray w =
+        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
+            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
+
+    INDArray[] inputArrs;
+    INDArray[] outputArrs;
+    INDArray wg =
+        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
+            gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat());
+    INDArray epsOut =
+        workspaceMgr.createUninitialized(
+            ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
+    INDArray input = this.input.castTo(dataType);
+    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
+      input = input.permute(0, 2, 1); // NHWC to NCHW
    }

-
-    @Override
-    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
-        assertInputSet(true);
-        if (epsilon.rank() != 3)
-            throw new DL4JInvalidInputException("Got rank " + epsilon.rank()
-                    + " array as epsilon for Convolution1D backprop with shape "
-                    + Arrays.toString(epsilon.shape())
-                    + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + layerId());
-        Pair<INDArray,INDArray> fwd = preOutput(false,true,workspaceMgr);
-        IActivation afn = getTypedLayerConfiguration().getActivationFn();
-        INDArray delta = afn.backprop(fwd.getFirst(), epsilon).getFirst(); //TODO handle activation function params
-
-        Convolution1D c = getTypedLayerConfiguration();
-        Conv1DConfig conf = Conv1DConfig.builder()
-                .k(c.getKernelSize()[0])
-                .s(c.getStride()[0])
-                .d(c.getDilation()[0])
-                .p(c.getPadding()[0])
-                .dataFormat(Conv1DConfig.NCW)
-                .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
-                .build();
-
-        INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
-                getParam(ConvolutionParamInitializer.WEIGHT_KEY),
-                RNNFormat.NCW);
-
-        INDArray[] inputArrs;
-        INDArray[] outputArrs;
-        INDArray wg = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
-                gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
-                getRnnDataFormat());
-        INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
-        INDArray input = this.input.castTo(dataType);
-        if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
-            input = input.permute(0,2,1); //NHWC to NCHW
-        }
-
-        if(getTypedLayerConfiguration().hasBias()) {
-            INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
-            b = b.reshape(b.length());
-            inputArrs = new INDArray[]{input, w, b, delta};
-            INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
-            bg = bg.reshape(bg.length());
-            outputArrs = new INDArray[]{epsOut, wg, bg};
-        } else {
-            inputArrs = new INDArray[]{input, w, delta};
-            outputArrs = new INDArray[]{epsOut, wg};
-        }
-
-        Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
-        Nd4j.exec(op);
-
-        Gradient retGradient = new DefaultGradient();
-        if(getTypedLayerConfiguration().hasBias()) {
-            retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
-        }
-        retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), 'c');
-        if (getRnnDataFormat() == RNNFormat.NWC) {
-            epsOut = epsOut.permute(0, 2, 1);
-        }
-        return new Pair<>(retGradient, epsOut);
+    if (getTypedLayerConfiguration().hasBias()) {
+      INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
+      b = b.reshape(b.length());
+      inputArrs = new INDArray[] {input, w, b, delta};
+      INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
+      bg = bg.reshape(bg.length());
+      outputArrs = new INDArray[] {epsOut, wg, bg};
+    } else {
+      inputArrs = new INDArray[] {input, w, delta};
+      outputArrs = new INDArray[] {epsOut, wg};
    }

-    @Override
-    protected Pair<INDArray, INDArray> preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
-        Pair<INDArray,INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
-        INDArray p3d = preOutput.getFirst();
-        INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
-        preOutput.setFirst(p);
-        return preOutput;
+    Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
+    Nd4j.exec(op);
+
+    Gradient retGradient = new DefaultGradient();
+    if (getTypedLayerConfiguration().hasBias()) {
+      retGradient.setGradientFor(
+          ConvolutionParamInitializer.BIAS_KEY,
+          gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
+    }
+    retGradient.setGradientFor(
+        ConvolutionParamInitializer.WEIGHT_KEY,
+        gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
+        'c');
+    if (getRnnDataFormat() == RNNFormat.NWC) {
+      epsOut = epsOut.permute(0, 2, 1);
+    }
+    return new Pair<>(retGradient, epsOut);
+  }
+
+  @Override
+  protected Pair<INDArray, INDArray> preOutput4d(
+      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+    Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
+    INDArray p3d = preOutput.getFirst();
+    INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
+    preOutput.setFirst(p);
+    return preOutput;
+  }
+
+  @Override
+  protected Pair<INDArray, INDArray> preOutput(
+      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+    assertInputSet(false);
+
+    INDArray input = this.input.castTo(dataType);
+    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
+      input = input.permute(0, 2, 1); // NHWC to NCHW
    }

-    @Override
-    protected Pair<INDArray,INDArray> preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
-        assertInputSet(false);
+    org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration();
+    Conv1DConfig conf =
+        Conv1DConfig.builder()
+            .k(c.getKernelSize()[0])
+            .s(c.getStride()[0])
+            .d(c.getDilation()[0])
+            .p(c.getPadding()[0])
+            .dataFormat(Conv1DConfig.NCW)
+            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
+            .build();

-        INDArray input = this.input.castTo(dataType);
-        if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
-            input = input.permute(0,2,1); //NHWC to NCHW
-        }
+    INDArray w =
+        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
+            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);

-        Convolution1D c = getTypedLayerConfiguration();
-        Conv1DConfig conf = Conv1DConfig.builder()
-                .k(c.getKernelSize()[0])
-                .s(c.getStride()[0])
-                .d(c.getDilation()[0])
-                .p(c.getPadding()[0])
-                .dataFormat(Conv1DConfig.NCW)
-                .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
-                .build();
-
-
-        INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
-                getParam(ConvolutionParamInitializer.WEIGHT_KEY)
-                ,RNNFormat.NCW);
-
-
-        INDArray[] inputs;
-        if(getTypedLayerConfiguration().hasBias()) {
-            INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
-            b = b.reshape(b.length());
-            inputs = new INDArray[]{input, w, b};
-        } else {
-            inputs = new INDArray[]{input, w};
-        }
-
-        Conv1D op = new Conv1D(inputs, null, conf);
-        List<LongShapeDescriptor> outShape = op.calculateOutputShape();
-        op.setOutputArgument(0, Nd4j.create(outShape.get(0), false));
-        Nd4j.exec(op);
-        INDArray output = op.getOutputArgument(0);
-
-        if(getRnnDataFormat() == RNNFormat.NWC) {
-            output = output.permute(0,2,1);
-        }
-
-        return new Pair<>(output, null);
+    INDArray[] inputs;
+    if (getTypedLayerConfiguration().hasBias()) {
+      INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
+      b = b.reshape(b.length());
+      inputs = new INDArray[] {input, w, b};
+    } else {
+      inputs = new INDArray[] {input, w};
    }

+    Conv1D op = new Conv1D(inputs, null, conf);
+    List<LongShapeDescriptor> outShape = op.calculateOutputShape();
+    op.setOutputArgument(0, Nd4j.create(outShape.get(0), false));
+    Nd4j.exec(op);
+    INDArray output = op.getOutputArgument(0);

-    @Override
-    public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
-        INDArray act4d = super.activate(training, workspaceMgr);
-        INDArray act3d = act4d.rank() > 3 ?
-                act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
-
-        if(maskArray != null) {
-            INDArray maskOut = feedForwardMaskArray(maskArray, MaskState.Active, (int)act3d.size(0)).getFirst();
-            Preconditions.checkState(act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
-                    "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
-                    act3d.shape(), maskOut.shape());
-            Broadcast.mul(act3d, maskOut, act3d, 0, 2);
-        }
-
-        return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, act3d);   //Should be zero copy most of the time
+    if (getRnnDataFormat() == RNNFormat.NWC) {
+      output = output.permute(0, 2, 1);
    }

-    @Override
-    public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
-                                                          int minibatchSize) {
-        INDArray reduced = Convolution2DUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0],
-                getTypedLayerConfiguration().getStride()[0], getTypedLayerConfiguration().getPadding()[0], getTypedLayerConfiguration().getDilation()[0],
-                getTypedLayerConfiguration().getConvolutionMode());
-        return new Pair<>(reduced, currentMaskState);
+    return new Pair<>(output, null);
+  }
+
+  @Override
+  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
+    INDArray act4d = super.activate(training, workspaceMgr);
+    INDArray act3d =
+        act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
+
+    if (maskArray != null) {
+      INDArray maskOut =
+          feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst();
+      Preconditions.checkState(
+          act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
+          "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
+          act3d.shape(),
+          maskOut.shape());
+      Broadcast.mul(act3d, maskOut, act3d, 0, 2);
    }

-    @Override
-    public Convolution1D getTypedLayerConfiguration() {
-        return (Convolution1D)layerConfiguration;
-    }
+    return workspaceMgr.leverageTo(
+        ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time
+  }

-    private RNNFormat getRnnDataFormat(){
-        return getTypedLayerConfiguration().getRnnDataFormat();
-    }
+  @Override
+  public Pair<INDArray, MaskState> feedForwardMaskArray(
+      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
+    INDArray reduced =
+        Convolution2DUtils.cnn1dMaskReduction(
+            maskArray,
+            getTypedLayerConfiguration().getKernelSize()[0],
+            getTypedLayerConfiguration().getStride()[0],
+            getTypedLayerConfiguration().getPadding()[0],
+            getTypedLayerConfiguration().getDilation()[0],
+            getTypedLayerConfiguration().getConvolutionMode());
+    return new Pair<>(reduced, currentMaskState);
+  }
+
+  private RNNFormat getRnnDataFormat() {
+    return getTypedLayerConfiguration().getRnnDataFormat();
+  }
+
+/**
+*
+   * @return
+*/
+  @Override
+  public Convolution1D getTypedLayerConfiguration() {
+    return (Convolution1D) super.getTypedLayerConfiguration();
+  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java
@ -0,0 +1,226 @@
+/*
+ *  ******************************************************************************
+ *  *
+ *  *
+ *  * This program and the accompanying materials are made available under the
+ *  * terms of the Apache License, Version 2.0 which is available at
+ *  * https://www.apache.org/licenses/LICENSE-2.0.
+ *  *
+ *  *  See the NOTICE file distributed with this work for additional
+ *  *  information regarding copyright ownership.
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  * License for the specific language governing permissions and limitations
+ *  * under the License.
+ *  *
+ *  * SPDX-License-Identifier: Apache-2.0
+ *  *****************************************************************************
+ */
+
+package org.deeplearning4j.nn.layers.convolution;
+
+import java.util.Arrays;
+import java.util.List;
+import org.deeplearning4j.exception.DL4JInvalidInputException;
+import org.deeplearning4j.nn.api.MaskState;
+import org.deeplearning4j.nn.conf.RNNFormat;
+import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
+import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
+import org.deeplearning4j.nn.gradient.DefaultGradient;
+import org.deeplearning4j.nn.gradient.Gradient;
+import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
+import org.deeplearning4j.nn.workspace.ArrayType;
+import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
+import org.deeplearning4j.util.Convolution1DUtils;
+import org.deeplearning4j.util.Convolution2DUtils;
+import org.nd4j.common.base.Preconditions;
+import org.nd4j.common.primitives.Pair;
+import org.nd4j.linalg.activations.IActivation;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1D;
+import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1DDerivative;
+import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv1DConfig;
+import org.nd4j.linalg.api.shape.LongShapeDescriptor;
+import org.nd4j.linalg.factory.Broadcast;
+import org.nd4j.linalg.factory.Nd4j;
+
+public class Convolution1DNewLayer<Layer_ConfT extends Convolution1DNew>
+    extends ConvolutionNewLayer<Layer_ConfT> {
+  public Convolution1DNewLayer(LayerConfiguration conf, DataType dataType) {
+    super(conf, dataType);
+  }
+
+  @Override
+  public Pair<Gradient, INDArray> backpropGradient(
+      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
+    assertInputSet(true);
+    if (epsilon.rank() != 3)
+      throw new DL4JInvalidInputException(
+          "Got rank "
+              + epsilon.rank()
+              + " array as epsilon for Convolution1D backprop with shape "
+              + Arrays.toString(epsilon.shape())
+              + ". Expected rank 3 array with shape [minibatchSize, features, length]. "
+              + layerId());
+    Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr);
+    IActivation afn = getTypedLayerConfiguration().getActivationFn();
+    INDArray delta =
+        afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params
+
+    Convolution1DNew c = getTypedLayerConfiguration();
+    Conv1DConfig conf =
+        Conv1DConfig.builder()
+            .k(c.getKernelSize()[0])
+            .s(c.getStride()[0])
+            .d(c.getDilation()[0])
+            .p(c.getPadding()[0])
+            .dataFormat(Conv1DConfig.NCW)
+            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
+            .build();
+
+    INDArray w =
+        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
+            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
+
+    INDArray[] inputArrs;
+    INDArray[] outputArrs;
+    INDArray wg =
+        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
+            gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat());
+    INDArray epsOut =
+        workspaceMgr.createUninitialized(
+            ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
+    INDArray input = this.input.castTo(dataType);
+    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
+      input = input.permute(0, 2, 1); // NHWC to NCHW
+    }
+
+    if (getTypedLayerConfiguration().hasBias()) {
+      INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
+      b = b.reshape(b.length());
+      inputArrs = new INDArray[] {input, w, b, delta};
+      INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
+      bg = bg.reshape(bg.length());
+      outputArrs = new INDArray[] {epsOut, wg, bg};
+    } else {
+      inputArrs = new INDArray[] {input, w, delta};
+      outputArrs = new INDArray[] {epsOut, wg};
+    }
+
+    Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
+    Nd4j.exec(op);
+
+    Gradient retGradient = new DefaultGradient();
+    if (getTypedLayerConfiguration().hasBias()) {
+      retGradient.setGradientFor(
+          ConvolutionParamInitializer.BIAS_KEY,
+          gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
+    }
+    retGradient.setGradientFor(
+        ConvolutionParamInitializer.WEIGHT_KEY,
+        gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
+        'c');
+    if (getRnnDataFormat() == RNNFormat.NWC) {
+      epsOut = epsOut.permute(0, 2, 1);
+    }
+    return new Pair<>(retGradient, epsOut);
+  }
+
+  @Override
+  protected Pair<INDArray, INDArray> preOutput4d(
+      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+    Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
+    INDArray p3d = preOutput.getFirst();
+    INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
+    preOutput.setFirst(p);
+    return preOutput;
+  }
+
+  @Override
+  protected Pair<INDArray, INDArray> preOutput(
+      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+    assertInputSet(false);
+
+    INDArray input = this.input.castTo(dataType);
+    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
+      input = input.permute(0, 2, 1); // NHWC to NCHW
+    }
+
+    Convolution1DNew c = getTypedLayerConfiguration();
+    Conv1DConfig conf =
+        Conv1DConfig.builder()
+            .k(c.getKernelSize()[0])
+            .s(c.getStride()[0])
+            .d(c.getDilation()[0])
+            .p(c.getPadding()[0])
+            .dataFormat(Conv1DConfig.NCW)
+            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
+            .build();
+
+    INDArray w =
+        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
+            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
+
+    INDArray[] inputs;
+    if (getTypedLayerConfiguration().hasBias()) {
+      INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
+      b = b.reshape(b.length());
+      inputs = new INDArray[] {input, w, b};
+    } else {
+      inputs = new INDArray[] {input, w};
+    }
+
+    Conv1D op = new Conv1D(inputs, null, conf);
+    List<LongShapeDescriptor> outShape = op.calculateOutputShape();
+    op.setOutputArgument(0, Nd4j.create(outShape.get(0), false));
+    Nd4j.exec(op);
+    INDArray output = op.getOutputArgument(0);
+
+    if (getRnnDataFormat() == RNNFormat.NWC) {
+      output = output.permute(0, 2, 1);
+    }
+
+    return new Pair<>(output, null);
+  }
+
+  @Override
+  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
+    INDArray act4d = super.activate(training, workspaceMgr);
+    INDArray act3d =
+        act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
+
+    if (maskArray != null) {
+      INDArray maskOut =
+          feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst();
+      Preconditions.checkState(
+          act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
+          "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
+          act3d.shape(),
+          maskOut.shape());
+      Broadcast.mul(act3d, maskOut, act3d, 0, 2);
+    }
+
+    return workspaceMgr.leverageTo(
+        ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time
+  }
+
+  @Override
+  public Pair<INDArray, MaskState> feedForwardMaskArray(
+      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
+    INDArray reduced =
+        Convolution2DUtils.cnn1dMaskReduction(
+            maskArray,
+            getTypedLayerConfiguration().getKernelSize()[0],
+            getTypedLayerConfiguration().getStride()[0],
+            getTypedLayerConfiguration().getPadding()[0],
+            getTypedLayerConfiguration().getDilation()[0],
+            getTypedLayerConfiguration().getConvolutionMode());
+    return new Pair<>(reduced, currentMaskState);
+  }
+
+  private RNNFormat getRnnDataFormat() {
+    return getTypedLayerConfiguration().getRnnDataFormat();
+  }
+}
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java
@ -0,0 +1,753 @@
+/*
+ *  ******************************************************************************
+ *  *
+ *  *
+ *  * This program and the accompanying materials are made available under the
+ *  * terms of the Apache License, Version 2.0 which is available at
+ *  * https://www.apache.org/licenses/LICENSE-2.0.
+ *  *
+ *  *  See the NOTICE file distributed with this work for additional
+ *  *  information regarding copyright ownership.
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  * License for the specific language governing permissions and limitations
+ *  * under the License.
+ *  *
+ *  * SPDX-License-Identifier: Apache-2.0
+ *  *****************************************************************************
+ */
+
+package org.deeplearning4j.nn.layers.convolution;
+
+import java.util.Arrays;
+
+import lombok.extern.slf4j.Slf4j;
+import org.deeplearning4j.common.config.DL4JClassLoading;
+import org.deeplearning4j.exception.DL4JInvalidInputException;
+import org.deeplearning4j.nn.api.MaskState;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
+import org.deeplearning4j.nn.conf.CacheMode;
+import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
+import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
+import org.deeplearning4j.nn.gradient.DefaultGradient;
+import org.deeplearning4j.nn.gradient.Gradient;
+import org.deeplearning4j.nn.layers.BaseLayer;
+import org.deeplearning4j.nn.layers.LayerHelper;
+import org.deeplearning4j.nn.layers.mkldnn.MKLDNNConvHelper;
+import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
+import org.deeplearning4j.nn.workspace.ArrayType;
+import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
+import org.deeplearning4j.util.Convolution2DUtils;
+import org.nd4j.common.primitives.Pair;
+import org.nd4j.linalg.activations.IActivation;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.memory.MemoryWorkspace;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.shape.Shape;
+import org.nd4j.linalg.convolution.Convolution;
+import org.nd4j.linalg.exception.ND4JArraySizeException;
+import org.nd4j.linalg.exception.ND4JOpProfilerException;
+import org.nd4j.linalg.factory.Nd4j;
+
+@Slf4j
+public class ConvolutionNewLayer<
+        LayerConf_T extends org.deeplearning4j.nn.conf.layers.Convolution1DNew>
+    extends BaseLayer<org.deeplearning4j.nn.conf.layers.Convolution1DNew> {
+
+  protected INDArray i2d;
+  protected ConvolutionHelper helper = null;
+  protected int helperCountFail = 0;
+  protected ConvolutionMode convolutionMode;
+  protected transient INDArray dummyBias; // Used only when: hasBias == false AND helpers are used
+  protected transient INDArray dummyBiasGrad; // As above
+
+
+  public ConvolutionNewLayer(LayerConfiguration conf, DataType dataType) {
+    super(conf, dataType);
+    initializeHelper();
+    if (conf instanceof Convolution1DNew) {
+      convolutionMode = ((Convolution1DNew) conf).getConvolutionMode();
+    } else if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) {
+      convolutionMode =
+          ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode();
+    }
+  }
+
+  void initializeHelper() {
+    String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend");
+    if ("CUDA".equalsIgnoreCase(backend)) {
+      helper =
+          DL4JClassLoading.createNewInstance(
+              "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper",
+              ConvolutionHelper.class,
+              dataType);
+      log.debug("CudnnConvolutionHelper successfully initialized");
+      if (!helper.checkSupported()) {
+        helper = null;
+      }
+    } else if ("CPU".equalsIgnoreCase(backend)) {
+      helper = new MKLDNNConvHelper(dataType);
+      log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName());
+    }
+
+    if (helper != null && !helper.checkSupported()) {
+      log.debug("Removed helper {} as not supported", helper.getClass());
+      helper = null;
+    }
+  }
+
+  @Override
+  public Type type() {
+    return Type.CONVOLUTIONAL;
+  }
+
+/**
+*
+   * @return
+*/
+  @Override
+  public Convolution1DNew getTypedLayerConfiguration() {
+    return super.getTypedLayerConfiguration();
+  }
+
+  @Override
+  public Pair<Gradient, INDArray> backpropGradient(
+      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
+    assertInputSet(true);
+    INDArray weights =
+        getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr);
+    INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr);
+
+    INDArray input = this.input.castTo(dataType); // No op if correct type
+    if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType);
+
+    INDArray origInput = input;
+    INDArray origEps = epsilon;
+    if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) {
+      input = input.permute(0, 3, 1, 2); // NHWC to NCHW
+      epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW
+    }
+
+    long miniBatch = input.size(0);
+    int inH = (int) input.size(2);
+    int inW = (int) input.size(3);
+
+    long outDepth = weights.size(0);
+    long inDepth = weights.size(1);
+    int kH = (int) weights.size(2);
+    int kW = (int) weights.size(3);
+
+    int[] dilation = getTypedLayerConfiguration().getDilation();
+    int[] kernel = getTypedLayerConfiguration().getKernelSize();
+    int[] strides = getTypedLayerConfiguration().getStride();
+    int[] pad;
+    int[] outSize;
+    if (convolutionMode == ConvolutionMode.Same) {
+      outSize =
+          Convolution2DUtils.getOutputSize(
+              input,
+              kernel,
+              strides,
+              null,
+              convolutionMode,
+              dilation,
+              CNN2DFormat.NCHW); // Also performs validation
+      pad =
+          Convolution2DUtils.getSameModeTopLeftPadding(
+              outSize, new int[] {inH, inW}, kernel, strides, dilation);
+    } else {
+      pad = getTypedLayerConfiguration().getPadding();
+      outSize =
+          Convolution2DUtils.getOutputSize(
+              input,
+              kernel,
+              strides,
+              pad,
+              convolutionMode,
+              dilation,
+              CNN2DFormat.NCHW); // Also performs validation
+    }
+
+    int outH = outSize[0];
+    int outW = outSize[1];
+
+    INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
+    INDArray weightGradView =
+        gradientViews.get(
+            ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW]
+    INDArray weightGradView2df =
+        Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false)
+            .transpose();
+
+    INDArray delta;
+    IActivation afn = getTypedLayerConfiguration().getActivationFn();
+
+    Pair<INDArray, INDArray> p = preOutput4d(true, true, workspaceMgr);
+    INDArray z = p.getFirst();
+    CNN2DFormat f = getTypedLayerConfiguration().getConvFormat();
+    if (f != CNN2DFormat.NCHW) {
+      z = z.permute(0, 3, 1, 2); // NHWC to NCHW
+    }
+    delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params
+
+    if (helper != null
+        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
+      INDArray helperDelta = delta;
+      if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC)
+        helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC
+
+      if (!hasBias() && !(helper instanceof MKLDNNConvHelper)) {
+        // MKL-DNN supports no bias, CuDNN doesn't
+        if (dummyBiasGrad == null) {
+          try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
+            dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut());
+          }
+        }
+        biasGradView = dummyBiasGrad;
+      }
+
+      Pair<Gradient, INDArray> ret = null;
+      try {
+        ret =
+            helper.backpropGradient(
+                origInput,
+                weights,
+                bias,
+                helperDelta,
+                kernel,
+                strides,
+                pad,
+                biasGradView,
+                weightGradView,
+                afn,
+                getTypedLayerConfiguration().getCudnnAlgoMode(),
+                getTypedLayerConfiguration().getCudnnBwdFilterAlgo(),
+                getTypedLayerConfiguration().getCudnnBwdDataAlgo(),
+                convolutionMode,
+                dilation,
+                getTypedLayerConfiguration().getConvFormat(),
+                workspaceMgr);
+      } catch (ND4JOpProfilerException e) {
+        throw e; // NaN panic etc for debugging
+      } catch (Exception e) {
+        if (e.getMessage().contains("Failed to allocate")) {
+          // This is a memory exception - don't fallback to built-in implementation
+          throw e;
+        }
+
+        if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
+          helperCountFail++;
+          if (helper instanceof MKLDNNConvHelper) {
+            log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
+          } else {
+            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
+          }
+        } else {
+          throw new RuntimeException(
+              "Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false",
+              e);
+        }
+      }
+
+      if (ret != null) {
+        // Backprop dropout, if present
+        INDArray gradPostDropout = ret.getRight();
+        gradPostDropout = backpropDropOutIfPresent(gradPostDropout);
+        ret.setSecond(gradPostDropout);
+        return ret;
+      }
+    }
+
+    delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW]
+
+    // Note: due to the permute in preOut, and the fact that we essentially do a
+    // preOut.muli(epsilon), this reshape
+    // should be zero-copy; only possible exception being sometimes with the "identity" activation
+    // case
+    INDArray delta2d =
+        delta.reshape('c', outDepth, miniBatch * outH * outW); // Shape.newShapeNoCopy(delta,new
+    // int[]{outDepth,miniBatch*outH*outW},false);
+
+    // Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input
+    // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
+    // To get this: create an array of the order we want, permute it to the order required by im2col
+    // implementation, and then do im2col on that
+    // to get old order from required order: permute(0,3,4,5,1,2)
+    INDArray im2col2d =
+        p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not
+    if (im2col2d == null) {
+      INDArray col =
+          Nd4j.createUninitialized(
+              dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
+      INDArray col2 = col.permute(0, 3, 4, 5, 1, 2);
+      Convolution.im2col(
+          input,
+          kH,
+          kW,
+          strides[0],
+          strides[1],
+          pad[0],
+          pad[1],
+          dilation[0],
+          dilation[1],
+          convolutionMode == ConvolutionMode.Same,
+          col2);
+      // Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape
+      im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW);
+    }
+
+    // Calculate weight gradients, using cc->c mmul.
+    // weightGradView2df is f order, but this is because it's transposed from c order
+    // Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c
+    // order, not usual f order
+    Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0);
+
+    // Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally
+    // in c order for some reason)
+    INDArray wPermuted =
+        weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order
+    INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth);
+
+    // Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format
+    // before col2im reduction)
+    // Note: cc -> f mmul here, then reshape to 6d in f order
+    INDArray epsNext2d =
+        w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array?
+    INDArray eps6d =
+        Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true);
+
+    // Calculate epsilonNext by doing im2col reduction.
+    // Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW]
+    // currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first
+    eps6d = eps6d.permute(5, 2, 1, 0, 4, 3);
+    INDArray epsNextOrig =
+        workspaceMgr.createUninitialized(
+            ArrayType.ACTIVATION_GRAD,
+            eps6d.dataType(),
+            new long[] {inDepth, miniBatch, inH, inW},
+            'c');
+
+    // Note: we are execute col2im in a way that the output array should be used in a stride 1 muli
+    // in the layer below... (same strides as zs/activations)
+    INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3);
+    Convolution.col2im(
+        eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]);
+
+    Gradient retGradient = new DefaultGradient();
+    if (getTypedLayerConfiguration().hasBias()) {
+      delta2d.sum(biasGradView, 1); // biasGradView is initialized/zeroed first in sum op
+      retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView);
+    }
+    retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c');
+
+    weightNoiseParams.clear();
+
+    epsNext = backpropDropOutIfPresent(epsNext);
+
+    if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) {
+      epsNext = epsNext.permute(0, 2, 3, 1); // NCHW to NHWC
+    }
+
+    return new Pair<>(retGradient, epsNext);
+  }
+
+  /**
+   * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain
+   * their standard non-4d preOutput method, while overriding this to return 4d activations (for use
+   * in backprop) without modifying the public API
+   */
+  protected Pair<INDArray, INDArray> preOutput4d(
+      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+    return preOutput(training, forBackprop, workspaceMgr);
+  }
+
+  protected void validateInputRank() {
+    // Input validation: expect rank 4 matrix
+    if (input.rank() != 4) {
+      String layerName = layerConfiguration.getName();
+      if (layerName == null) layerName = "(not named)";
+      throw new DL4JInvalidInputException(
+          "Got rank "
+              + input.rank()
+              + " array as input to ConvolutionLayer (layer name = "
+              + layerName
+              + ", layer index = "
+              + index
+              + ") with shape "
+              + Arrays.toString(input.shape())
+              + ". "
+              + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]."
+              + (input.rank() == 2
+                  ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)"
+                  : "")
+              + " "
+              + layerId());
+    }
+  }
+
+  protected void validateInputDepth(long inDepth) {
+    CNN2DFormat format = getTypedLayerConfiguration().getConvFormat();
+    int dim = format == CNN2DFormat.NHWC ? 3 : 1;
+    if (input.size(dim) != inDepth) {
+      String layerName = layerConfiguration.getName();
+      if (layerName == null) layerName = "(not named)";
+
+      String s =
+          "Cannot do forward pass in Convolution layer (layer name = "
+              + layerName
+              + ", layer index = "
+              + index
+              + "): input array channels does not match CNN layer configuration"
+              + " (data format = "
+              + format
+              + ", data input channels = "
+              + input.size(dim)
+              + ", "
+              + getTypedLayerConfiguration().getConvFormat().dimensionNames()
+              + "="
+              + Arrays.toString(input.shape())
+              + "; expected"
+              + " input channels = "
+              + inDepth
+              + ") "
+              + layerId();
+
+      int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3;
+      if (input.size(dimIfWrongFormat) == inDepth) {
+        // User might have passed NCHW data to a NHWC net, or vice versa?
+        s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG;
+      }
+
+      throw new DL4JInvalidInputException(s);
+    }
+  }
+
+  /**
+   * PreOutput method that also returns the im2col2d array (if being called for backprop), as this
+   * can be re-used instead of being calculated again.
+   *
+   * @param training Train or test time (impacts dropout)
+   * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return
+   *     null for second pair entry. Note that it may still be null in the case of CuDNN and the
+   *     like.
+   * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array
+   */
+  protected Pair<INDArray, INDArray> preOutput(
+      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+    assertInputSet(false);
+    INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr);
+    INDArray weights =
+        getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr);
+
+    validateInputRank();
+
+    INDArray input = this.input.castTo(dataType);
+    INDArray inputOrig = input;
+    if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
+      input = input.permute(0, 3, 1, 2).dup(); // NHWC to NCHW
+    }
+
+    long miniBatch = input.size(0);
+    long outDepth = weights.size(0);
+    long inDepth = weights.size(1);
+    validateInputDepth(inDepth);
+
+    long kH = weights.size(2);
+    long kW = weights.size(3);
+
+    int[] dilation = getTypedLayerConfiguration().getDilation();
+    int[] kernel = getTypedLayerConfiguration().getKernelSize();
+    int[] strides = getTypedLayerConfiguration().getStride();
+
+    int[] pad;
+    int[] outSize;
+    if (convolutionMode == ConvolutionMode.Same) {
+      outSize =
+          Convolution2DUtils.getOutputSize(
+              input,
+              kernel,
+              strides,
+              null,
+              convolutionMode,
+              dilation,
+              CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method
+
+      if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE)
+        throw new ND4JArraySizeException();
+      int[] inWidthHeight;
+      //  if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW)
+      // TODO: Switch hardcoded state later. For now, convolution is implemented as
+      // switch to NCHW then permute back for NWHC
+      inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)};
+
+      /*     else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) {
+          inWidthHeight =  new int[] {(int) input.size(1), (int) input.size(2)};
+      }
+      else
+           throw new IllegalStateException("No data format configured!");*/
+      pad =
+          Convolution2DUtils.getSameModeTopLeftPadding(
+              outSize, inWidthHeight, kernel, strides, dilation);
+    } else {
+      pad = getTypedLayerConfiguration().getPadding();
+      outSize =
+          Convolution2DUtils.getOutputSize(
+              input,
+              kernel,
+              strides,
+              pad,
+              convolutionMode,
+              dilation,
+              CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method
+    }
+
+    int outH = outSize[0];
+    int outW = outSize[1];
+
+    if (helper != null
+        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
+      if (preOutput != null && forBackprop) {
+        return new Pair<>(preOutput, null);
+      }
+
+      // For no-bias convolutional layers: use an empty (all 0s) value for biases
+      if (!hasBias()) {
+        if (dummyBias == null) {
+          try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
+            dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut());
+          }
+        }
+        bias = dummyBias;
+      }
+
+      INDArray ret = null;
+      try {
+        ret =
+            helper.preOutput(
+                inputOrig,
+                weights,
+                bias,
+                kernel,
+                strides,
+                pad,
+                getTypedLayerConfiguration().getCudnnAlgoMode(),
+                getTypedLayerConfiguration().getCudnnFwdAlgo(),
+                convolutionMode,
+                dilation,
+                getTypedLayerConfiguration().getConvFormat(),
+                workspaceMgr);
+      } catch (ND4JOpProfilerException e) {
+        throw e; // NaN panic etc for debugging
+      } catch (Exception e) {
+        if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
+          // This is a memory exception - don't fallback to built-in implementation
+          throw e;
+        }
+
+        if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
+          helperCountFail++;
+          if (helper instanceof MKLDNNConvHelper) {
+            log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
+          } else {
+            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
+          }
+        } else {
+          throw new RuntimeException(
+              "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
+              e);
+        }
+      }
+      if (ret != null) {
+        return new Pair<>(ret, null);
+      }
+    }
+
+    if (preOutput != null && i2d != null && forBackprop) {
+      return new Pair<>(preOutput, i2d);
+    }
+
+    // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input
+    // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
+    // To get this: create an array of the order we want, permute it to the order required by im2col
+    // implementation, and then do im2col on that
+    // to get old order from required order: permute(0,3,4,5,1,2)
+    // Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through
+    // the rows post-reshape
+    INDArray col =
+        Nd4j.createUninitialized(
+            weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
+    int[] permute = new int[] {0, 3, 4, 5, 1, 2};
+    INDArray col2 = col.permute(permute);
+    INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float
+    if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException();
+    Convolution.im2col(
+        im2ColIn,
+        (int) kH,
+        (int) kW,
+        strides[0],
+        strides[1],
+        pad[0],
+        pad[1],
+        dilation[0],
+        dilation[1],
+        convolutionMode == ConvolutionMode.Same,
+        col2);
+
+    INDArray im2col2d =
+        Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false);
+
+    // Current order of weights: [depthOut,depthIn,kH,kW], c order
+    // Permute to give [kW,kH,depthIn,depthOut], f order
+    // Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless
+    // weights aren't in c order for some reason
+    INDArray permutedW = weights.permute(3, 2, 1, 0);
+    INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth);
+
+    // Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut]
+    INDArray z =
+        workspaceMgr.createUninitialized(
+            ArrayType.ACTIVATIONS,
+            weights.dataType(),
+            new long[] {im2col2d.size(0), reshapedW.size(1)},
+            'f');
+    im2col2d.mmuli(reshapedW, z);
+
+    // Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is
+    // [miniBatch*outH*outW,depthOut] -> addiRowVector
+    if (getTypedLayerConfiguration().hasBias()) {
+      z.addiRowVector(bias);
+    }
+
+    // Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order:
+    // [miniBatch,outDepth,outH,outW];
+    z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true);
+    z = z.permute(2, 3, 1, 0);
+
+    if (training
+        && cacheMode != CacheMode.NONE
+        && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
+        && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
+      try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
+        i2d = im2col2d.unsafeDuplication();
+      }
+    }
+
+    if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
+      z = z.permute(0, 2, 3, 1); // NCHW to NHWC
+      z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z);
+    }
+
+    return new Pair<>(z, forBackprop ? im2col2d : null);
+  }
+
+  @Override
+  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
+    if (input == null) {
+      throw new IllegalArgumentException(
+          "Cannot perform forward pass with null input " + layerId());
+    }
+
+    if (cacheMode == null) cacheMode = CacheMode.NONE;
+
+    applyDropOutIfNecessary(training, workspaceMgr);
+
+    INDArray z = preOutput(training, false, workspaceMgr).getFirst();
+
+    // we do cache only if cache workspace exists. Skip otherwise
+    if (training
+        && cacheMode != CacheMode.NONE
+        && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
+        && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
+      try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
+        preOutput = z.unsafeDuplication();
+      }
+    }
+
+    // String afn = conf.getLayer().getActivationFunction();
+    IActivation afn = getTypedLayerConfiguration().getActivationFn();
+
+    if (helper != null
+        && Shape.strideDescendingCAscendingF(z)
+        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
+      INDArray ret = null;
+      try {
+        ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training);
+      } catch (ND4JOpProfilerException e) {
+        throw e; // NaN panic etc for debugging
+      } catch (Exception e) {
+        if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
+          // This is a memory exception - don't fallback to built-in implementation
+          throw e;
+        }
+
+        if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
+          helperCountFail++;
+          if (helper instanceof MKLDNNConvHelper) {
+            log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
+          } else {
+            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
+          }
+        } else {
+          throw new RuntimeException(
+              "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
+              e);
+        }
+      }
+
+      if (ret != null) {
+        return ret;
+      }
+    }
+
+    INDArray activation = afn.getActivation(z, training);
+    return activation;
+  }
+
+  @Override
+  public boolean hasBias() {
+    return getTypedLayerConfiguration().hasBias();
+  }
+
+  @Override
+  public boolean isPretrainLayer() {
+    return false;
+  }
+
+  @Override
+  public LayerHelper getHelper() {
+    return helper;
+  }
+
+  @Override
+  public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) {
+    throw new UnsupportedOperationException("Not supported");
+  }
+
+  @Override
+  public void setParamsTable(INDArray paramsTable) {
+    // Override, as base layer does f order parameter flattening by default
+    setParams(paramsTable, 'c');
+  }
+
+  @Override
+  public Pair<INDArray, MaskState> feedForwardMaskArray(
+      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
+    if (maskArray == null) {
+      // For same mode (with stride 1): output activations size is always same size as input
+      // activations size -> mask array is same size
+      return new Pair<>(maskArray, currentMaskState);
+    }
+
+    INDArray outMask =
+        Convolution2DUtils.cnn2dMaskReduction(
+            maskArray,
+            getTypedLayerConfiguration().getKernelSize(),
+            getTypedLayerConfiguration().getStride(),
+            getTypedLayerConfiguration().getPadding(),
+            getTypedLayerConfiguration().getDilation(),
+            getTypedLayerConfiguration().getConvolutionMode());
+    return new Pair<>(outMask, currentMaskState);
+  }
+}
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java
@ -47,7 +47,7 @@ public class DenseLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Dens

  @Override
  public boolean hasBias() {
-    return super.hasBias();
+    return getTypedLayerConfiguration().isHasBias();
  }

  @Override
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
@ -37,195 +37,248 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.lossfunctions.ILossFunction;

-public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.layers.RnnOutputLayer> {
+public class RnnOutputLayer
+    extends BaseOutputLayer<org.deeplearning4j.nn.conf.layers.RnnOutputLayer> {

-    public RnnOutputLayer(LayerConfiguration conf, DataType dataType) {
-        super(conf, dataType);
+  public RnnOutputLayer(LayerConfiguration conf, DataType dataType) {
+    super(conf, dataType);
+  }
+
+  @Override
+  public Pair<Gradient, INDArray> backpropGradient(
+      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
+    assertInputSet(true);
+    if (input.rank() != 3) {
+      throw new UnsupportedOperationException(
+          "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]."
+              + " Got input with rank "
+              + input.rank()
+              + " and shape "
+              + Arrays.toString(input.shape())
+              + " - "
+              + layerId());
    }

-    @Override
-    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
-        assertInputSet(true);
-        if (input.rank() != 3) {
-            throw new UnsupportedOperationException(
-                    "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." +
-                            " Got input with rank " + input.rank() + " and shape " + Arrays.toString(input.shape()) + " - " + layerId());
-        }
+    RNNFormat format = getTypedLayerConfiguration().getDataFormat();
+    int td = (format == RNNFormat.NCW) ? 2 : 1; //either NCW or NWC
+    Preconditions.checkState(
+        labels.rank() == 3,
+        "Expected rank 3 labels array, got label array with shape %ndShape",
+        labels);
+    Preconditions.checkState(
+        input.size(td) == labels.size(td),
+        "Sequence lengths do not match for RnnOutputLayer input and labels:"
+            + "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - "
+            + "mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape.\n",
+        input, "\n\n",
+        labels);

-        RNNFormat format = getTypedLayerConfiguration().getDataFormat();
-        int td = (format == RNNFormat.NCW) ? 2 : 1;
-        Preconditions.checkState(labels.rank() == 3, "Expected rank 3 labels array, got label array with shape %ndShape", labels);
-        Preconditions.checkState(input.size(td) == labels.size(td), "Sequence lengths do not match for RnnOutputLayer input and labels:" +
-                "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape", input, labels);
-
-
-        INDArray inputTemp = input;
-        if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
-            this.input = input.permute(0, 2, 1);
-        }
-
-        this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM);
-
-        applyDropOutIfNecessary(true, workspaceMgr);    //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients
-
-        Pair<Gradient, INDArray> gradAndEpsilonNext = super.backpropGradient(epsilon, workspaceMgr);    //Also applies dropout
-        this.input = inputTemp;
-        INDArray epsilon2d = gradAndEpsilonNext.getSecond();
-
-        INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
-        if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
-            epsilon3d = epsilon3d.permute(0, 2, 1);
-        }
-        weightNoiseParams.clear();
-
-        //epsilon3d = backpropDropOutIfPresent(epsilon3d);
-        return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d);
+    INDArray inputTemp = input;
+    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
+      this.input = input.permute(0, 2, 1);
    }

-    /**{@inheritDoc}
-     */
-    @Override
-    public double f1Score(INDArray examples, INDArray labels) {
-        if (examples.rank() == 3)
-            examples = TimeSeriesUtils.reshape3dTo2d(examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
-        if (labels.rank() == 3)
-            labels = TimeSeriesUtils.reshape3dTo2d(labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
-        return super.f1Score(examples, labels);
+    this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM);
+
+    applyDropOutIfNecessary(
+        true,
+        workspaceMgr); // Edge case: we skip OutputLayer forward pass during training as this isn't
+    // required to calculate gradients
+
+    Pair<Gradient, INDArray> gradAndEpsilonNext =
+        super.backpropGradient(epsilon, workspaceMgr); // Also applies dropout
+    this.input = inputTemp;
+    INDArray epsilon2d = gradAndEpsilonNext.getSecond();
+
+    INDArray epsilon3d =
+        TimeSeriesUtils.reshape2dTo3d(
+            epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
+    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
+      epsilon3d = epsilon3d.permute(0, 2, 1);
+    }
+    weightNoiseParams.clear();
+
+    // epsilon3d = backpropDropOutIfPresent(epsilon3d);
+    return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public double f1Score(INDArray examples, INDArray labels) {
+    if (examples.rank() == 3)
+      examples =
+          TimeSeriesUtils.reshape3dTo2d(
+              examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
+    if (labels.rank() == 3)
+      labels =
+          TimeSeriesUtils.reshape3dTo2d(
+              labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
+    return super.f1Score(examples, labels);
+  }
+
+  public INDArray getInput() {
+    return input;
+  }
+
+  @Override
+  public Layer.Type type() {
+    return Layer.Type.RECURRENT;
+  }
+
+  @Override
+  protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) {
+    assertInputSet(false);
+    if (input.rank() == 3) {
+      // Case when called from RnnOutputLayer
+      INDArray inputTemp = input;
+      input =
+          (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC)
+              ? input.permute(0, 2, 1)
+              : input;
+      input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM);
+      INDArray out = super.preOutput(training, workspaceMgr);
+      this.input = inputTemp;
+      return out;
+    } else {
+      // Case when called from BaseOutputLayer
+      INDArray out = super.preOutput(training, workspaceMgr);
+      return out;
+    }
+  }
+
+  @Override
+  protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
+    INDArray labels = this.labels;
+    if (labels.rank() == 3) {
+      labels =
+          (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC)
+              ? labels.permute(0, 2, 1)
+              : labels;
+      return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType);
+    }
+    return labels;
+  }
+
+  @Override
+  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
+    INDArray input = this.input;
+    if (input.rank() != 3)
+      throw new UnsupportedOperationException(
+          "Input must be rank 3. Got input with rank " + input.rank() + " " + layerId());
+    INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr);
+    INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr);
+
+    applyDropOutIfNecessary(training, workspaceMgr);
+    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
+      input = input.permute(0, 2, 1);
+    }
+    INDArray input2d =
+        TimeSeriesUtils.reshape3dTo2d(
+            input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM);
+
+    INDArray act2d =
+        getTypedLayerConfiguration()
+            .getActivationFn()
+            .getActivation(input2d.mmul(W).addiRowVector(b), training);
+    if (maskArray != null) {
+      if (!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())) {
+        // Per output masking
+        act2d.muli(maskArray.castTo(act2d.dataType()));
+      } else {
+        // Per time step masking
+        act2d.muliColumnVector(maskArray.castTo(act2d.dataType()));
+      }
    }

-    public INDArray getInput() {
-        return input;
+    INDArray ret =
+        TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
+    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
+      ret = ret.permute(0, 2, 1);
+    }
+    return ret;
+  }
+
+  @Override
+  public void setMaskArray(INDArray maskArray) {
+    if (maskArray != null) {
+      // Two possible cases:
+      // (a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector)
+      // (b) per output masking - rank 3 mask array  -> reshape to rank 2 (
+      if (maskArray.rank() == 2) {
+        this.maskArray =
+            TimeSeriesUtils.reshapeTimeSeriesMaskToVector(
+                maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
+      } else if (maskArray.rank() == 3) {
+        this.maskArray =
+            TimeSeriesUtils.reshape3dTo2d(
+                maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
+      } else {
+        throw new UnsupportedOperationException(
+            "Invalid mask array: must be rank 2 or 3 (got: rank "
+                + maskArray.rank()
+                + ", shape = "
+                + Arrays.toString(maskArray.shape())
+                + ") "
+                + layerId());
+      }
+    } else {
+      this.maskArray = null;
+    }
+  }
+
+  @Override
+  public Pair<INDArray, MaskState> feedForwardMaskArray(
+      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
+
+    // If the *input* mask array is present and active, we should use it to mask the output
+    if (maskArray != null && currentMaskState == MaskState.Active) {
+      this.inputMaskArray =
+          TimeSeriesUtils.reshapeTimeSeriesMaskToVector(
+              maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
+      this.inputMaskArrayState = currentMaskState;
+    } else {
+      this.inputMaskArray = null;
+      this.inputMaskArrayState = null;
    }

-    @Override
-    public Layer.Type type() {
-        return Layer.Type.RECURRENT;
+    return null; // Last layer in network
+  }
+
+  /**
+   * Compute the score for each example individually, after labels and input have been set.
+   *
+   * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include
+   *     regularization)
+   * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith
+   *     example
+   */
+  @Override
+  public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) {
+    // For RNN: need to sum up the score over each time step before returning.
+
+    if (input == null || labels == null)
+      throw new IllegalStateException(
+          "Cannot calculate score without input and labels " + layerId());
+    INDArray preOut = preOutput2d(false, workspaceMgr);
+
+    ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction();
+    INDArray scoreArray =
+        lossFunction.computeScoreArray(
+            getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM),
+            preOut,
+            getTypedLayerConfiguration().getActivationFn(),
+            maskArray);
+    // scoreArray: shape [minibatch*timeSeriesLength, 1]
+    // Reshape it to [minibatch, timeSeriesLength] then sum over time step
+
+    INDArray scoreArrayTs =
+        TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int) input.size(0));
+    INDArray summedScores = scoreArrayTs.sum(true, 1);
+
+    if (fullNetRegTerm != 0.0) {
+      summedScores.addi(fullNetRegTerm);
    }

-    @Override
-    protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) {
-        assertInputSet(false);
-        if (input.rank() == 3) {
-            //Case when called from RnnOutputLayer
-            INDArray inputTemp = input;
-            input = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? input.permute(0, 2, 1):input;
-            input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM);
-            INDArray out = super.preOutput(training, workspaceMgr);
-            this.input = inputTemp;
-            return out;
-        } else {
-            //Case when called from BaseOutputLayer
-            INDArray out = super.preOutput(training, workspaceMgr);
-            return out;
-        }
-    }
-
-    @Override
-    protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
-        INDArray labels = this.labels;
-        if (labels.rank() == 3){
-            labels = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? labels.permute(0, 2, 1) : labels;
-            return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType);
-        }
-        return labels;
-    }
-
-    @Override
-    public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
-        INDArray input = this.input;
-        if (input.rank() != 3)
-            throw new UnsupportedOperationException(
-                            "Input must be rank 3. Got input with rank " + input.rank() + " " + layerId());
-        INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr);
-        INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr);
-
-        applyDropOutIfNecessary(training, workspaceMgr);
-        if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
-            input = input.permute(0, 2, 1);
-        }
-        INDArray input2d = TimeSeriesUtils.reshape3dTo2d(input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM);
-
-        INDArray act2d = getTypedLayerConfiguration().getActivationFn().getActivation(input2d.mmul(W).addiRowVector(b), training);
-        if (maskArray != null) {
-            if(!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())){
-                //Per output masking
-                act2d.muli(maskArray.castTo(act2d.dataType()));
-            } else {
-                //Per time step masking
-                act2d.muliColumnVector(maskArray.castTo(act2d.dataType()));
-            }
-        }
-
-        INDArray ret = TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
-        if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
-            ret = ret.permute(0, 2, 1);
-        }
-        return ret;
-    }
-
-    @Override
-    public void setMaskArray(INDArray maskArray) {
-        if (maskArray != null) {
-            //Two possible cases:
-            //(a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector)
-            //(b) per output masking - rank 3 mask array  -> reshape to rank 2 (
-            if (maskArray.rank() == 2) {
-                this.maskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
-            } else if (maskArray.rank() == 3) {
-                this.maskArray = TimeSeriesUtils.reshape3dTo2d(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
-            } else {
-                throw new UnsupportedOperationException(
-                                "Invalid mask array: must be rank 2 or 3 (got: rank " + maskArray.rank() + ", shape = "
-                                                + Arrays.toString(maskArray.shape()) + ") " + layerId());
-            }
-        } else {
-            this.maskArray = null;
-        }
-    }
-
-    @Override
-    public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
-                    int minibatchSize) {
-
-        //If the *input* mask array is present and active, we should use it to mask the output
-        if (maskArray != null && currentMaskState == MaskState.Active) {
-            this.inputMaskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
-            this.inputMaskArrayState = currentMaskState;
-        } else {
-            this.inputMaskArray = null;
-            this.inputMaskArrayState = null;
-        }
-
-        return null; //Last layer in network
-    }
-
-    /**Compute the score for each example individually, after labels and input have been set.
-     *
-     * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include regularization)
-     * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example
-     */
-    @Override
-    public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) {
-        //For RNN: need to sum up the score over each time step before returning.
-
-        if (input == null || labels == null)
-            throw new IllegalStateException("Cannot calculate score without input and labels " + layerId());
-        INDArray preOut = preOutput2d(false, workspaceMgr);
-
-        ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction();
-        INDArray scoreArray =
-                        lossFunction.computeScoreArray(getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), preOut,
-                                getTypedLayerConfiguration().getActivationFn(), maskArray);
-        //scoreArray: shape [minibatch*timeSeriesLength, 1]
-        //Reshape it to [minibatch, timeSeriesLength] then sum over time step
-
-        INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0));
-        INDArray summedScores = scoreArrayTs.sum(true, 1);
-
-        if (fullNetRegTerm != 0.0) {
-            summedScores.addi(fullNetRegTerm);
-        }
-
-        return summedScores;
-    }
+    return summedScores;
+  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java
@ -47,7 +47,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
 import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;

@Data
-public abstract class BaseWrapperLayer extends AbstractLayer {
+public abstract class BaseWrapperLayer<LayerConf_T extends LayerConfiguration> extends AbstractLayer {

  protected Layer underlying;

@ -57,8 +57,8 @@ public abstract class BaseWrapperLayer extends AbstractLayer {
  }

  @Override
-  public BaseLayerConfiguration getTypedLayerConfiguration() {
-    return (BaseLayerConfiguration) underlying.getLayerConfiguration();
+  public LayerConf_T getTypedLayerConfiguration() {
+    return (LayerConf_T) underlying.getLayerConfiguration();
  }

  /**
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
@ -712,7 +712,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork
        if (layer_conf instanceof BaseLayerConfiguration)
          ((BaseLayerConfiguration) layer_conf).setDataType(netDtype);

-        nParamsPerLayer[i] = layer_conf.initializer().numParams(layer_conf);
+        nParamsPerLayer[i] = layer_conf.numParams();
        paramLength += nParamsPerLayer[i];
      }
      log.debug(
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java
@ -0,0 +1,183 @@
+/*
+ *  ******************************************************************************
+ *  *
+ *  *
+ *  * This program and the accompanying materials are made available under the
+ *  * terms of the Apache License, Version 2.0 which is available at
+ *  * https://www.apache.org/licenses/LICENSE-2.0.
+ *  *
+ *  *  See the NOTICE file distributed with this work for additional
+ *  *  information regarding copyright ownership.
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ *  * License for the specific language governing permissions and limitations
+ *  * under the License.
+ *  *
+ *  * SPDX-License-Identifier: Apache-2.0
+ *  *****************************************************************************
+ */
+
+package org.deeplearning4j.nn.params;
+
+
+import java.util.*;
+import lombok.val;
+import org.deeplearning4j.nn.api.AbstractParamInitializer;
+import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
+import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
+import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
+import org.deeplearning4j.nn.weights.WeightInitUtil;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.indexing.NDArrayIndex;
+
+public class ConvolutionNewParamInitializer extends AbstractParamInitializer {
+
+    private static final ConvolutionNewParamInitializer INSTANCE = new ConvolutionNewParamInitializer();
+
+    public static ConvolutionNewParamInitializer getInstance() {
+        return INSTANCE;
+    }
+
+
+    public final static String WEIGHT_KEY = DefaultParamInitializer.WEIGHT_KEY;
+    public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY;
+
+    @Override
+    public long numParams(LayerConfiguration l) {
+        return l.numParams();
+    }
+
+    @Override
+    public List<String> paramKeys(LayerConfiguration layer) {
+        ConvolutionLayer layerConf =
+                (ConvolutionLayer) layer;
+        if(layerConf.hasBias()){
+            return Arrays.asList(WEIGHT_KEY, BIAS_KEY);
+        } else {
+            return weightKeys(layer);
+        }
+    }
+
+    @Override
+    public List<String> weightKeys(LayerConfiguration layer) {
+        return Collections.singletonList(WEIGHT_KEY);
+    }
+
+    @Override
+    public List<String> biasKeys(LayerConfiguration layer) {
+        ConvolutionLayer layerConf =
+                (ConvolutionLayer) layer;
+        if(layerConf.hasBias()){
+            return Collections.singletonList(BIAS_KEY);
+        } else {
+            return Collections.emptyList();
+        }
+    }
+
+    @Override
+    public boolean isWeightParam(LayerConfiguration layer, String key) {
+        return WEIGHT_KEY.equals(key);
+    }
+
+    @Override
+    public boolean isBiasParam(LayerConfiguration layer, String key) {
+        return BIAS_KEY.equals(key);
+    }
+
+    @Override
+    public Map<String, INDArray> init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) {
+        Convolution1DNew layer = (Convolution1DNew) conf;
+        if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");
+
+        Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
+
+        Convolution1DNew layerConf =
+                        (Convolution1DNew) conf;
+
+        val nOut = layerConf.getNOut();
+
+        if(layer.hasBias()){
+            //Standard case
+            INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
+            INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf)));
+            params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
+            params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
+            conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
+            conf.getNetConfiguration().addNetWideVariable(BIAS_KEY);
+        } else {
+            INDArray weightView = paramsView;
+            params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
+            conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
+        }
+
+        return params;
+    }
+
+    @Override
+    public Map<String, INDArray> getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) {
+
+        Convolution1DNew layerConf =
+                        (Convolution1DNew) conf;
+
+        int[] kernel = layerConf.getKernelSize();
+        val nIn = layerConf.getNIn();
+        val nOut = layerConf.getNOut();
+
+        Map<String, INDArray> out = new LinkedHashMap<>();
+        if(layerConf.hasBias()){
+            //Standard case
+            INDArray biasGradientView = gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
+            INDArray weightGradientView =
+                    gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf)))
+                            .reshape('c', nOut, nIn, kernel[0], kernel[1]);
+            out.put(BIAS_KEY, biasGradientView);
+            out.put(WEIGHT_KEY, weightGradientView);
+        } else {
+            INDArray weightGradientView = gradientView.reshape('c', nOut, nIn, kernel[0], kernel[1]);
+            out.put(WEIGHT_KEY, weightGradientView);
+        }
+        return out;
+    }
+
+    //1 bias per feature map
+    protected INDArray createBias(LayerConfiguration conf, INDArray biasView, boolean initializeParams) {
+        //the bias is a 1D tensor -- one bias per output feature map
+        Convolution1DNew layerConf =
+                        (Convolution1DNew) conf;
+        if (initializeParams)
+            biasView.assign(layerConf.getBiasInit());
+        return biasView;
+    }
+
+
+    protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) {
+        /*
+         Create a 4d weight matrix of:
+           (number of kernels, num input channels, kernel height, kernel width)
+         Note c order is used specifically for the CNN weights, as opposed to f order elsewhere
+         Inputs to the convolution layer are:
+         (batch size, num input feature maps, image height, image width)
+         */
+        Convolution1DNew layerConf =
+                        (Convolution1DNew) conf;
+        if (initializeParams) {
+            int[] kernel = layerConf.getKernelSize();
+            int[] stride = layerConf.getStride();
+
+            val inputDepth = layerConf.getNIn();
+            val outputDepth = layerConf.getNOut();
+
+            double fanIn = inputDepth * kernel[0] * kernel[1];
+            double fanOut = outputDepth * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]);
+
+            val weightsShape = new long[] {outputDepth, inputDepth, kernel[0], kernel[1]};
+
+            return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView);
+        } else {
+            int[] kernel = layerConf.getKernelSize();
+            return WeightInitUtil.reshapeWeights(
+                            new long[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]}, weightView, 'c');
+        }
+    }
+}
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java
@ -20,7 +20,6 @@

 package org.deeplearning4j.util;

-
 import java.util.Arrays;
 import org.deeplearning4j.exception.DL4JInvalidConfigException;
 import org.deeplearning4j.exception.DL4JInvalidInputException;
@ -35,281 +34,332 @@ import org.nd4j.linalg.exception.ND4JArraySizeException;

 public class Convolution1DUtils {

-    private static final int ONE = 1;
+  private static final int ONE = 1;

+  private Convolution1DUtils() {}

-    private Convolution1DUtils() {
+  public static int getOutputSize(
+      INDArray inputData, int kernel, int strides, int padding, ConvolutionMode convolutionMode) {
+    return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE);
+  }
+
+  /**
+   * Returns true if the given layer has an {@link RNNFormat}. This is true for: {@link
+   * Convolution1D}, {@link Subsampling1DLayer} {@link SimpleRnn} {@link LSTM} {@link
+   * EmbeddingSequenceLayer}
+   *
+   * @param layer the layer to test
+   * @return true if the input layer has an rnn format false otherwise
+   */
+  public static boolean hasRnnDataFormat(LayerConfiguration layer) {
+    return layer instanceof Convolution1D
+        || layer instanceof Convolution1D
+        || layer instanceof Subsampling1DLayer
+        || layer instanceof SimpleRnn
+        || layer instanceof LSTM
+        || layer instanceof EmbeddingSequenceLayer;
+  }
+
+  /**
+   * Get the {@link RNNFormat} for the given layer. Throws an {@link IllegalArgumentException} if a
+   * layer doesn't have an rnn format
+   *
+   * @param layer the layer to get the format for
+   * @return the format for the layer
+   */
+  public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) {
+    Preconditions.checkState(
+        hasRnnDataFormat(layer),
+        "ILayer of type "
+            + layer.getClass().getName()
+            + " and name "
+            + layer.getName()
+            + " does not have an RNNFormat");
+    if (layer instanceof SimpleRnn) {
+      SimpleRnn simpleRnn = (SimpleRnn) layer;
+      return simpleRnn.getDataFormat();
+    } else if (layer instanceof Convolution1D) {
+      Convolution1D convolution1D = (Convolution1D) layer;
+      return convolution1D.getRnnDataFormat();
+    } else if (layer instanceof Convolution1D) {
+      Convolution1D convolution1D = (Convolution1D) layer;
+      return convolution1D.getRnnDataFormat();
+    } else if (layer instanceof Subsampling1DLayer) {
+      Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer;
+      return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC;
+    } else if (layer instanceof LSTM) {
+      LSTM lstm = (LSTM) layer;
+      return lstm.getDataFormat();
+    } else if (layer instanceof EmbeddingSequenceLayer) {
+      EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer;
+      return embeddingSequenceLayer.getOutputDataFormat();
+    } else {
+      throw new IllegalArgumentException(
+          "Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName());
+    }
+  }
+
+  /**
+   * Reshapes the given weight array or weight gradient to work with the specified {@link RNNFormat}
+   *
+   * @param w the weight array or gradient
+   * @param rnnFormat the {@link RNNFormat} to use
+   * @return the reshaped array.
+   */
+  public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) {
+
+    if (rnnFormat == RNNFormat.NWC)
+      w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2))
+              .permute(2, 1, 0); // [oC, iC, k, 1] to [k, iC, oC]
+    else {
+      w = w.reshape(w.ordering(), w.size(2), w.size(1), w.size(0));
    }

+    return w;
+  }

-    public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
-                                    ConvolutionMode convolutionMode) {
-        return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE);
+  /**
+   * Get the output size (height) for the given input data and CNN1D configuration
+   *
+   * @param inH Input size (height, or channels).
+   * @param kernel Kernel size
+   * @param strides Stride
+   * @param padding Padding
+   * @param convolutionMode Convolution mode (Same, Strict, Truncate)
+   * @param dilation Kernel dilation
+   * @return Output size (width)
+   */
+  public static long getOutputSize(
+      long inH,
+      int kernel,
+      int strides,
+      int padding,
+      ConvolutionMode convolutionMode,
+      int dilation) {
+    long eKernel = effectiveKernelSize(kernel, dilation);
+    if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
+      return (int) Math.ceil(inH / ((double) strides));
+    }
+    return (inH - eKernel + 2L * padding) / strides + 1;
+  }
+
+  /**
+   * Get the output size (height) for the given input data and CNN1D configuration
+   *
+   * @param inputData Input data
+   * @param kernel Kernel size
+   * @param strides Stride
+   * @param padding Padding
+   * @param convolutionMode Convolution mode (Same, Strict, Truncate)
+   * @param dilation Kernel dilation
+   * @return Output size (width)
+   */
+  public static int getOutputSize(
+      INDArray inputData,
+      int kernel,
+      int strides,
+      int padding,
+      ConvolutionMode convolutionMode,
+      int dilation) {
+    if (inputData.size(2) > Integer.MAX_VALUE) throw new ND4JArraySizeException();
+    int inH = (int) inputData.size(2);
+    int eKernel = effectiveKernelSize(kernel, dilation);
+    boolean atrous = (eKernel == kernel);
+    validateShapes(inputData, eKernel, strides, padding, convolutionMode, dilation, inH, atrous);
+
+    if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
+      int outH = (int) Math.ceil(inH / ((double) strides));
+      return outH;
    }

-    /**
-     * Returns true if the given layer has an
-     * {@link RNNFormat}.
-     * This is true for:
-     * {@link Convolution1D},
-     * {@link Subsampling1DLayer}
-     * {@link SimpleRnn}
-     * {@link LSTM}
-     * {@link EmbeddingSequenceLayer}
-     * @param layer the layer to test
-     * @return true if the input layer has an rnn format
-     * false otherwise
-     */
-    public static boolean hasRnnDataFormat(LayerConfiguration layer) {
-        return layer instanceof Convolution1D ||
-                layer instanceof Convolution1D ||
-                layer instanceof Subsampling1DLayer ||
-                layer instanceof SimpleRnn ||
-                layer instanceof LSTM ||
-                layer instanceof EmbeddingSequenceLayer;
+    int outH = (inH - eKernel + 2 * padding) / strides + 1;
+    return outH;
+  }
+
+  public static void validateShapes(
+      INDArray inputData,
+      int eKernel,
+      int strides,
+      int padding,
+      ConvolutionMode convolutionMode,
+      int dilation,
+      int inShape,
+      boolean atrous) {
+
+    int inH = inShape;
+    boolean t = convolutionMode == ConvolutionMode.Truncate;
+
+    if (t && (eKernel <= 0 || eKernel > inH + 2 * padding)) {
+      StringBuilder sb = new StringBuilder();
+      sb.append("Invalid input data or configuration: ");
+      if (atrous) sb.append("effective ");
+      sb.append("kernel height and input height must satisfy 0 < ");
+      if (atrous) sb.append("effective ");
+      sb.append("kernel height <= input height + 2 * padding height. \nGot ");
+      if (atrous) sb.append("effective ");
+      sb.append("kernel height = ")
+          .append(eKernel)
+          .append(", input height = ")
+          .append(inH)
+          .append(" and padding height = ")
+          .append(padding)
+          .append(" which do not satisfy 0 < ")
+          .append(eKernel)
+          .append(" <= ")
+          .append(inH + 2 * padding)
+          .append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation));
+
+      throw new DL4JInvalidInputException(sb.toString());
    }

-    /**
-     * Get the {@link RNNFormat} for the given layer.
-     * Throws an {@link IllegalArgumentException}
-     * if a layer doesn't have an rnn format
-     * @param layer the layer to get the format for
-     * @return the format for the layer
-     */
-    public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) {
-        Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getName() + " does not have an RNNFormat");
-        if(layer instanceof SimpleRnn) {
-            SimpleRnn simpleRnn = (SimpleRnn) layer;
-            return simpleRnn.getDataFormat();
-        } else if(layer instanceof Convolution1D) {
-            Convolution1D convolution1D = (Convolution1D) layer;
-            return convolution1D.getRnnDataFormat();
-        } else if(layer instanceof Convolution1D) {
-            Convolution1D convolution1D = (Convolution1D) layer;
-            return convolution1D.getRnnDataFormat();
-        } else if(layer instanceof Subsampling1DLayer) {
-            Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer;
-            return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC;
-        } else if(layer instanceof LSTM) {
-            LSTM lstm = (LSTM) layer;
-            return lstm.getDataFormat();
-        } else if(layer instanceof EmbeddingSequenceLayer) {
-            EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer;
-            return embeddingSequenceLayer.getOutputDataFormat();
-        }
-        else {
-            throw new IllegalArgumentException("Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName());
-        }
+    if (convolutionMode == ConvolutionMode.Strict) {
+      if ((inH - eKernel + 2 * padding) % strides != 0) {
+        double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0;
+        String str = String.format("%.2f", d);
+        int truncated = (int) d;
+        int sameSize = (int) Math.ceil(inH / ((double) strides));
+
+        String sb =
+            "Invalid input data or configuration: Combination of kernel size, "
+                + "stride and padding are not "
+                + "valid for given input height, using ConvolutionMode.Strict\n"
+                + "ConvolutionMode.Strict requires: output height = (input height - kernelSize + "
+                + "2*padding)/stride + 1 to be an integer. Got: ("
+                + inH
+                + " - "
+                + eKernel
+                + " + 2*"
+                + padding
+                + ")/"
+                + strides
+                + " + 1 = "
+                + str
+                + "\n"
+                + "See \"Constraints on strides\" at http://cs231n.github."
+                + "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n"
+                + "To truncate/crop the input, such that output height = floor("
+                + str
+                + ") = "
+                + truncated
+                + ", use ConvolutionType.Truncate.\n"
+                + "Alternatively use ConvolutionType.Same, which will use padding to give an "
+                + "output height of ceil("
+                + inH
+                + "/"
+                + strides
+                + ")="
+                + sameSize
+                + getCommonErrorMsg(inputData, eKernel, strides, padding, dilation);
+
+        throw new DL4JInvalidConfigException(sb);
+      }
    }
+  }

-    /**
-     * Reshapes the given weight
-     * array or weight gradient
-     * to work with the specified
-     * {@link RNNFormat}
-     * @param w the weight array or gradient
-     * @param rnnFormat the {@link RNNFormat} to use
-     * @return the reshaped array.
-     */
-    public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) {
-        if(rnnFormat == RNNFormat.NWC)
-            w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2)).permute(2, 1, 0);   //[oC, iC, k, 1] to [k, iC, oC]
-        else {
-            w = w.reshape(w.ordering(),w.size(2),w.size(1),w.size(0));
-        }
-
-        return w;
+  public static int effectiveKernelSize(int kernel, int dilation) {
+    // Determine the effective kernel size, accounting for dilation
+    // http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
+    if (dilation == 1) {
+      return kernel;
+    } else {
+      return kernel + (kernel - 1) * (dilation - 1);
    }
+  }

-
-    /**
-     * Get the output size (height) for the given input data and CNN1D configuration
-     *
-     * @param inH             Input size (height, or channels).
-     * @param kernel          Kernel size
-     * @param strides         Stride
-     * @param padding         Padding
-     * @param convolutionMode Convolution mode (Same, Strict, Truncate)
-     * @param dilation        Kernel dilation
-     * @return Output size (width)
-     */
-    public static long getOutputSize(long inH, int kernel, int strides, int padding,
-                                    ConvolutionMode convolutionMode, int dilation) {
-        long eKernel = effectiveKernelSize(kernel, dilation);
-        if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
-            return (int) Math.ceil(inH / ((double) strides));
-        }
-        return (inH - eKernel + 2L * padding) / strides + 1;
+  private static String getCommonErrorMsg(
+      INDArray inputData, int kernel, int strides, int padding, int dilation) {
+    String s =
+        "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]="
+            + Arrays.toString(inputData.shape())
+            + ", inputKernel="
+            + kernel;
+    if (dilation != 1) {
+      int effectiveKernel = effectiveKernelSize(kernel, dilation);
+      s += ", effectiveKernelGivenDilation=" + effectiveKernel;
    }
+    return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation;
+  }

-    /**
-     * Get the output size (height) for the given input data and CNN1D configuration
-     *
-     * @param inputData       Input data
-     * @param kernel          Kernel size
-     * @param strides         Stride
-     * @param padding         Padding
-     * @param convolutionMode Convolution mode (Same, Strict, Truncate)
-     * @param dilation        Kernel dilation
-     * @return Output size (width)
-     */
-    public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
-                                    ConvolutionMode convolutionMode, int dilation) {
-        if (inputData.size(2) > Integer.MAX_VALUE)
-            throw new ND4JArraySizeException();
-        int inH = (int) inputData.size(2);
-        int eKernel = effectiveKernelSize(kernel, dilation);
-        boolean atrous = (eKernel == kernel);
-        validateShapes(inputData, eKernel, strides, padding, convolutionMode, dilation, inH, atrous);
-
-        if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
-            int outH = (int) Math.ceil(inH / ((double) strides));
-            return outH;
-        }
-
-        int outH = (inH - eKernel + 2 * padding) / strides + 1;
-        return outH;
+  /** Check that the convolution mode is consistent with the padding specification */
+  public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) {
+    if (mode == ConvolutionMode.Same) {
+      boolean nullPadding = padding == 0;
+      if (!nullPadding)
+        throw new IllegalArgumentException(
+            "Padding cannot be used when using the `same' convolution mode");
    }
+  }

-    public static void validateShapes(INDArray inputData, int eKernel, int strides, int padding,
-                                      ConvolutionMode convolutionMode, int dilation, int inShape,
-                                      boolean atrous) {
+  /**
+   * Get top padding for same mode only.
+   *
+   * @param outSize Output size (length 2 array, height dimension first)
+   * @param inSize Input size (length 2 array, height dimension first)
+   * @param kernel Kernel size (length 2 array, height dimension first)
+   * @param strides Strides (length 2 array, height dimension first)
+   * @param dilation Dilation (length 2 array, height dimension first)
+   * @return Top left padding (length 2 array, height dimension first)
+   */
+  public static int getSameModeTopLeftPadding(
+      int outSize, int inSize, int kernel, int strides, int dilation) {
+    int eKernel = effectiveKernelSize(kernel, dilation);
+    // Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2
+    int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2;
+    Preconditions.checkState(
+        outPad >= 0,
+        "Invalid padding values calculated: %s - "
+            + "layer configuration is invalid? Input size %s, output size %s, kernel %s, "
+            + "strides %s, dilation %s",
+        outPad,
+        inSize,
+        outSize,
+        kernel,
+        strides,
+        dilation);
+    return outPad;
+  }

-        int inH = inShape;
-        boolean t = convolutionMode == ConvolutionMode.Truncate;
+  public static int getSameModeBottomRightPadding(
+      int outSize, int inSize, int kernel, int strides, int dilation) {
+    int eKernel = effectiveKernelSize(kernel, dilation);
+    int totalPad = ((outSize - 1) * strides + eKernel - inSize);
+    int tlPad = totalPad / 2;
+    int brPad = totalPad - tlPad;
+    Preconditions.checkState(
+        brPad >= 0,
+        "Invalid padding values (right) calculated: %s - "
+            + "layer configuration is invalid? Input size %s, output size %s, kernel %s, "
+            + "strides %s, dilation %s",
+        brPad,
+        inSize,
+        outSize,
+        kernel,
+        strides,
+        dilation);
+    return brPad;
+  }

-        if (t && (eKernel <= 0 || eKernel > inH + 2 * padding)) {
-            StringBuilder sb = new StringBuilder();
-            sb.append("Invalid input data or configuration: ");
-            if (atrous) sb.append("effective ");
-            sb.append("kernel height and input height must satisfy 0 < ");
-            if (atrous) sb.append("effective ");
-            sb.append("kernel height <= input height + 2 * padding height. \nGot ");
-            if (atrous) sb.append("effective ");
-            sb.append("kernel height = ").append(eKernel).append(", input height = ").append(inH)
-                    .append(" and padding height = ").append(padding).append(" which do not satisfy 0 < ")
-                    .append(eKernel).append(" <= ").append(inH + 2 * padding)
-                    .append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation));
-
-            throw new DL4JInvalidInputException(sb.toString());
-        }
-
-
-        if (convolutionMode == ConvolutionMode.Strict) {
-            if ((inH - eKernel + 2 * padding) % strides != 0) {
-                double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0;
-                String str = String.format("%.2f", d);
-                int truncated = (int) d;
-                int sameSize = (int) Math.ceil(inH / ((double) strides));
-
-                String sb = "Invalid input data or configuration: Combination of kernel size, " +
-                        "stride and padding are not " +
-                        "valid for given input height, using ConvolutionMode.Strict\n" +
-                        "ConvolutionMode.Strict requires: output height = (input height - kernelSize + " +
-                        "2*padding)/stride + 1 to be an integer. Got: (" +
-                        inH + " - " + eKernel + " + 2*" + padding + ")/" +
-                        strides + " + 1 = " +
-                        str + "\n" + "See \"Constraints on strides\" at http://cs231n.github." +
-                        "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n" +
-                        "To truncate/crop the input, such that output height = floor(" +
-                        str + ") = " +
-                        truncated + ", use ConvolutionType.Truncate.\n" +
-                        "Alternatively use ConvolutionType.Same, which will use padding to give an " +
-                        "output height of ceil(" +
-                        inH + "/" + strides + ")=" + sameSize +
-                        getCommonErrorMsg(inputData, eKernel, strides, padding, dilation);
-
-                throw new DL4JInvalidConfigException(sb);
-            }
-        }
+  /**
+   * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for
+   * kernel size and stride, and values >= 0 for padding.
+   *
+   * @param kernel Kernel size to check
+   * @param stride Stride to check
+   * @param padding Padding to check
+   */
+  public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) {

+    if (kernel <= 0) {
+      throw new IllegalStateException(
+          "Invalid kernel size: value must be positive (> 0). Got: " + kernel);
    }
-
-    public static int effectiveKernelSize(int kernel, int dilation) {
-        //Determine the effective kernel size, accounting for dilation
-        //http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
-        if (dilation == 1) {
-            return kernel;
-        } else {
-            return kernel + (kernel - 1) * (dilation - 1);
-        }
+    if (stride <= 0) {
+      throw new IllegalStateException(
+          "Invalid kernel size: value must be positive (> 0). Got: " + stride);
    }
-
-    private static String getCommonErrorMsg(INDArray inputData, int kernel, int strides, int padding, int dilation) {
-        String s = "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]=" + Arrays.toString(inputData.shape())
-                + ", inputKernel=" + kernel;
-        if (dilation != 1) {
-            int effectiveKernel = effectiveKernelSize(kernel, dilation);
-            s += ", effectiveKernelGivenDilation=" + effectiveKernel;
-        }
-        return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation;
+    if (padding < 0) {
+      throw new IllegalStateException(
+          "Invalid kernel size: value must be positive (> 0). Got: " + padding);
    }
-
-
-    /**
-     * Check that the convolution mode is consistent with the padding specification
-     */
-    public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) {
-        if (mode == ConvolutionMode.Same) {
-            boolean nullPadding = padding == 0;
-            if (!nullPadding)
-                throw new IllegalArgumentException("Padding cannot be used when using the `same' convolution mode");
-
-        }
-    }
-
-    /**
-     * Get top padding for same mode only.
-     *
-     * @param outSize  Output size (length 2 array, height dimension first)
-     * @param inSize   Input size (length 2 array, height dimension first)
-     * @param kernel   Kernel size (length 2 array, height dimension first)
-     * @param strides  Strides  (length 2 array, height dimension first)
-     * @param dilation Dilation (length 2 array, height dimension first)
-     * @return Top left padding (length 2 array, height dimension first)
-     */
-    public static int getSameModeTopLeftPadding(int outSize, int inSize, int kernel, int strides, int dilation) {
-        int eKernel = effectiveKernelSize(kernel, dilation);
-        //Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2
-        int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2;
-        Preconditions.checkState(outPad >= 0, "Invalid padding values calculated: %s - " +
-                        "layer configuration is invalid? Input size %s, output size %s, kernel %s, " +
-                        "strides %s, dilation %s", outPad, inSize, outSize, kernel, strides, dilation);
-        return outPad;
-    }
-
-    public static int getSameModeBottomRightPadding(int outSize, int inSize, int kernel, int strides, int dilation) {
-        int eKernel = effectiveKernelSize(kernel, dilation);
-        int totalPad = ((outSize - 1) * strides + eKernel - inSize);
-        int tlPad = totalPad / 2;
-        int brPad = totalPad - tlPad;
-        Preconditions.checkState(brPad >= 0, "Invalid padding values (right) calculated: %s - " +
-                "layer configuration is invalid? Input size %s, output size %s, kernel %s, " +
-                "strides %s, dilation %s", brPad, inSize, outSize, kernel, strides, dilation);
-        return brPad;
-    }
-
-    /**
-     * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for kernel size and
-     * stride, and values >= 0 for padding.
-     *
-     * @param kernel  Kernel size  to check
-     * @param stride  Stride to check
-     * @param padding Padding to check
-     */
-    public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) {
-
-        if (kernel <= 0) {
-            throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + kernel);
-        }
-        if (stride <= 0) {
-            throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + stride);
-
-        }
-        if (padding < 0) {
-            throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + padding);
-        }
-    }
-
-
+  }
 }