Fixing tests

Signed-off-by: brian <brian@brutex.de>
2023-07-25 10:59:46 +02:00 · 2023-07-25 10:59:46 +02:00 · 4dc5a116b6
commit 4dc5a116b6
parent 997143b9dd
41 changed files with 4285 additions and 1309 deletions
--- a/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java
+++ b/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java
@ -0,0 +1,167 @@
 /*
 *
 *    ******************************************************************************
 *    *
 *    * This program and the accompanying materials are made available under the
 *    * terms of the Apache License, Version 2.0 which is available at
 *    * https://www.apache.org/licenses/LICENSE-2.0.
 *    *
 *    *  See the NOTICE file distributed with this work for additional
 *    *  information regarding copyright ownership.
 *    * Unless required by applicable law or agreed to in writing, software
 *    * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *    * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *    * License for the specific language governing permissions and limitations
 *    * under the License.
 *    *
 *    * SPDX-License-Identifier: Apache-2.0
 *    *****************************************************************************
 *
 */
 package net.brutex.ai.nd4j.tests;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator;
 import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
 import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.layers.DenseLayer;
 import org.deeplearning4j.nn.conf.layers.OutputLayer;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.junit.jupiter.api.Test;
 import org.nd4j.common.primitives.Pair;
 import org.nd4j.evaluation.classification.Evaluation;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.lossfunctions.LossFunctions;
@Slf4j
 public class ExploreParamsTest {
  @Test
  public void testParam() {
    NeuralNetConfiguration conf =
        NeuralNetConfiguration.builder()
            .seed(12345)
                .dataType(DataType.DOUBLE)
            .layer(
                DenseLayer.builder().nIn(4).nOut(30).name("1. Dense").activation(Activation.TANH))
           .layer(DenseLayer.builder().nIn(30).nOut(10).name("2. Dense"))
            //  .layer(FrozenLayer.builder(DenseLayer.builder().nOut(6).build()).build())
            .layer(
                OutputLayer.builder()
                    .nOut(3)
                    .lossFunction(LossFunctions.LossFunction.MSE)
                    .activation(Activation.SOFTMAX))
            .build();
    MultiLayerNetwork nn = new MultiLayerNetwork(conf);
    nn.init();
    log.info(nn.summary());
    // INDArray input = Nd4j.rand(10,4);
    INDArray labels = Nd4j.zeros(9, 3);
    INDArray input =
        Nd4j.create(
            new double[][] {
              {5.15, 3.5, 1.4, 0.21},     // setosa
              {4.9, 3.2, 1.4, 0.2},       // setosa
              {4.7, 3.2, 1.23, 0.2},      // setosa
              {7, 3.25, 4.7, 1.41},       // versicolor
              {6.4, 3.2, 4.54, 1.5},      // versicolor
              {6.9, 3.1, 4.92, 1.5},      // versicolor
              {7.7, 3, 6.1, 2.3},         // virginica
              {6.3, 3.4, 5.6, 2.45},      // virginica
              {6.4, 3.12, 5.5, 1.8}       // virginica
            });
    labels.putScalar(0, 1);
    labels.putScalar(3, 1);
    labels.putScalar(6, 1);
    labels.putScalar(10, 1);
    labels.putScalar(13, 1);
    labels.putScalar(16, 1);
    labels.putScalar(20, 1);
    labels.putScalar(23, 1);
    labels.putScalar(26, 1);
    IrisDataSetIterator iter = new IrisDataSetIterator();
    //Iterable<Pair<INDArray, INDArray>> it = List.of(new Pair<INDArray, INDArray>(input, labels));
    List l = new ArrayList<>();
    for (int i=0; i< input.rows(); i++) {
      l.add(new Pair(input.getRow(i), labels.getRow(i)));
    }
    Iterable<Pair<INDArray, INDArray>> it = l;
    INDArrayDataSetIterator diter = new INDArrayDataSetIterator(it, 1);
    for (int i = 0; i < 100; i++) {
      // nn.fit(input, labels);
      // nn.fit( input, labels);
      nn.fit(diter);
      // nn.feedForward(input);
      if(i%20==0) log.info("Score: {}", nn.getScore());
    }
    Evaluation eval = nn.evaluate(iter, List.of("setosa", "vericolor", "virginica"));
    log.info("\n{}", eval.stats());
  }
  @Test
  public void testParam2() throws IOException {
    NeuralNetConfiguration conf =
            NeuralNetConfiguration.builder()
                    .seed(12345)
                    .layer(
                            DenseLayer.builder().nIn(784).nOut(20).name("1. Dense"))
                    .layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense"))
                    .layer(
                            OutputLayer.builder()
                                    .nOut(10)
                                    .lossFunction(LossFunctions.LossFunction.MSE)
                                    .activation(Activation.SOFTMAX))
                    .build();
    MultiLayerNetwork nn = new MultiLayerNetwork(conf);
    nn.init();
    log.info(nn.summary());
    NeuralNetConfiguration conf2 =
            NeuralNetConfiguration.builder()
                    .seed(12345)
                    .layer(
                            DenseLayer.builder().nIn(784).nOut(20).name("1. Dense").dropOut(0.7))
                    .layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense"))
                    .layer(
                            OutputLayer.builder()
                                    .nOut(10)
                                    .lossFunction(LossFunctions.LossFunction.MSE)
                                    .activation(Activation.SOFTMAX))
                    .build();
    MultiLayerNetwork nn2 = new MultiLayerNetwork(conf2);
    nn2.init();
    log.info(nn2.summary());
    MnistDataSetIterator iter = new MnistDataSetIterator(10, 500);
    MnistDataSetIterator iter2 = new MnistDataSetIterator(10, 50);
    for (int i = 0; i < 200; i++) {
      nn.fit(iter);
      nn2.fit(iter);
      if(i%20==0) log.info("Score: {} vs. {}", nn.getScore(), nn2.getScore());
    }
    Evaluation eval = nn.evaluate(iter2);
    Evaluation eval2 = nn2.evaluate(iter2);
    log.info("\n{} \n{}", eval.stats(), eval2.stats());
  }
 }
--- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java
+++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java
@ -45,6 +45,7 @@ import org.datavec.image.transform.PipelineImageTransform;
 import org.datavec.image.transform.ResizeImageTransform;
 import org.datavec.image.transform.ShowImageTransform;
 import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
 import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
 import org.deeplearning4j.nn.conf.GradientNormalization;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.distribution.Distribution;
@ -65,6 +66,7 @@ import org.deeplearning4j.optimize.listeners.ScoreToChartListener;
 import org.junit.jupiter.api.Test;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.activations.impl.ActivationLReLU;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.dataset.DataSet;
 import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
@ -80,11 +82,11 @@ public class App {
  private static final int X_DIM = 20 ;
  private static final int Y_DIM = 20;
-  private static final int CHANNELS = 1;
+  private static final int CHANNELS = 3;
-  private static final int batchSize = 10;
+  private static final int batchSize = 50;
  private static final int INPUT = 128;
-  private static final int OUTPUT_PER_PANEL = 4;
+  private static final int OUTPUT_PER_PANEL = 16;
  private static final int ARRAY_SIZE_PER_SAMPLE = X_DIM*Y_DIM*CHANNELS;
  private static final IUpdater UPDATER = Adam.builder().learningRate(LEARNING_RATE).beta1(0.5).build();
@ -146,7 +148,7 @@ public class App {
        ActivationLayer.builder(new ActivationLReLU(0.2)).build(),
        DropoutLayer.builder(1 - 0.5).build(),
-        OutputLayer.builder().name("dis-output").lossFunction(LossFunction.XENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build()
+        OutputLayer.builder().name("dis-output").lossFunction(LossFunction.MCXENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build()
    };
  }
@ -196,6 +198,7 @@ public class App {
        .activation( Activation.IDENTITY )
        .layersFromArray(  layers  )
        .inputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS))
            .dataType(DataType.FLOAT)
        .build();
 ((NeuralNetConfiguration) conf).init();
    return conf;
@ -212,9 +215,9 @@ public class App {
    log.info("\u001B[32m  Some \u001B[1m green \u001B[22m text \u001B[0m \u001B[7m Inverted\u001B[0m   ");
    Nd4j.getMemoryManager().setAutoGcWindow(500);
-//    MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45);
+   //MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45);
-  //  FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/flowers"), NativeImageLoader.getALLOWED_FORMATS());
+   //FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/flowers"), NativeImageLoader.getALLOWED_FORMATS());
-    FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/humans"), NativeImageLoader.getALLOWED_FORMATS());
+   FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/humans"), NativeImageLoader.getALLOWED_FORMATS());
    ImageTransform transform = new ColorConversionTransform(new Random(42), 7 );
@ -223,7 +226,7 @@ public class App {
    ImageTransform transform3 = new ResizeImageTransform(X_DIM, Y_DIM);
    ImageTransform tr = new PipelineImageTransform.Builder()
-        .addImageTransform(transform) //convert to GREY SCALE
+        //.addImageTransform(transform) //convert to GREY SCALE
        .addImageTransform(transform3)
        //.addImageTransform(transform2)
        .build();
@ -270,10 +273,10 @@ public class App {
        break;
        }
-        if(i%20 == 0) {
+        //if(i%20 == 0) {
-         // frame2 = visualize(new INDArray[]{real}, batchSize,
+         frame2 = visualize(new INDArray[]{real}, batchSize,
-         //     frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images
+         frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images
-        }
+        //}
       real.divi(255f);
 //        int batchSize = (int) real.shape()[0];
@ -290,7 +293,7 @@ public class App {
        DataSet data = DataSet.merge(Arrays.asList(realSet, fakeSet));
        dis.fit(data);
-        dis.fit(data);
+        //dis.fit(data);
        // Update the discriminator in the GAN network
        updateGan(gen, dis, gan);
@ -298,7 +301,7 @@ public class App {
        //gan.fit(new DataSet(Nd4j.rand(batchSize, INPUT), Nd4j.zeros(batchSize, 1)));
        gan.fit(new DataSet(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1)));
-
+        //Visualize and reporting
        if (j % 10 == 1) {
          System.out.println("Iteration " + j + " Visualizing...");
          INDArray[] samples = batchSize > OUTPUT_PER_PANEL ? new INDArray[OUTPUT_PER_PANEL] : new INDArray[batchSize];
@ -320,11 +323,16 @@ public class App {
          frame = visualize(samples, 1, frame == null ? new JFrame() : frame, false); //each samples only has 1 image, thus batchElements=1
        }
      }
-      trainData.reset();
+      if (trainData.resetSupported()) {
          trainData.reset();
      } else {
          log.error("Trainingdata {} does not support reset.", trainData.toString());
      }
        // Copy the GANs generator to gen.
        updateGen(gen, gan);
    }
-    // Copy the GANs generator to gen.
+
    updateGen(gen, gan);
    gen.save(new File("mnist-mlp-generator.dlj"));
  }
@ -383,7 +391,12 @@ public class App {
  }
  private static JLabel getImage(INDArray tensor, int batchElement, boolean isOrig) {
-    final BufferedImage bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY);
+    final BufferedImage bi;
    if(CHANNELS>1) {
        bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_INT_RGB); //need to change here based on channels
    } else {
        bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY); //need to change here based on channels
    }
    final int imageSize = X_DIM * Y_DIM;
    final int offset = batchElement * imageSize;
    int pxl = offset * CHANNELS; //where to start in the INDArray
--- a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java
+++ b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java
@ -24,12 +24,14 @@ package net.brutex.gan;
 import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
 import org.deeplearning4j.nn.conf.GradientNormalization;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.ActivationLayer;
 import org.deeplearning4j.nn.conf.layers.DenseLayer;
 import org.deeplearning4j.nn.conf.layers.DropoutLayer;
 import org.deeplearning4j.nn.conf.layers.OutputLayer;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.nn.weights.WeightInit;
 import org.junit.jupiter.api.Test;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.activations.impl.ActivationLReLU;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -98,7 +100,10 @@ public class MnistSimpleGAN {
    return new MultiLayerNetwork(discConf);
  }
-
+  @Test
  public void runTest() throws Exception {
    main(null);
  }
  public static void main(String[] args) throws Exception {
    GAN gan = new GAN.Builder()
        .generator(MnistSimpleGAN::getGenerator)
@ -108,6 +113,7 @@ public class MnistSimpleGAN {
        .updater(UPDATER)
        .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
        .gradientNormalizationThreshold(100)
        .build();
    Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000);
--- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java
+++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java
@ -2385,11 +2385,15 @@ public interface INDArray extends Serializable, AutoCloseable {
     */
    long[] stride();
-    /**
+  /**
-     * Return the ordering (fortran or c  'f' and 'c' respectively) of this ndarray
+   * Return the ordering (fortran or c  'f' and 'c' respectively) of this ndarray <br/><br/>
-     * @return the ordering of this ndarray
+   * C Is Contiguous layout. Mathematically speaking, row major.<br/>
-     */
+   * F Is Fortran contiguous layout. Mathematically speaking, column major.<br/>
-    char ordering();
+   * {@see https://en.wikipedia.org/wiki/Row-_and_column-major_order}<br/>
   *
   * @return the ordering of this ndarray
   */
  char ordering();
    /**
     * Returns the size along a specified dimension
--- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
+++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
@ -5121,7 +5121,7 @@ public class Nd4j {
            Nd4j.backend = backend;
            updateNd4jContext();
            props = Nd4jContext.getInstance().getConf();
-            logger.info("Properties for Nd4jContext " + props);
+            log.debug("Properties for Nd4jContext {}", props);
            PropertyParser pp = new PropertyParser(props);
            String otherDtype = pp.toString(ND4JSystemProperties.DTYPE);
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java
@ -122,7 +122,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp()).seed(12345L)
                    .dist(new NormalDistribution(0, 2)).list()
-                    .layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
+                    .layer(0, Convolution2D.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
                            .activation(Activation.IDENTITY).build())
                    .layer(1,BatchNormalization.builder().useLogStd(useLogStd).build())
                    .layer(2, ActivationLayer.builder().activation(Activation.TANH).build())
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java
@ -91,7 +91,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
                  .updater(new NoOp())
                  .dist(new NormalDistribution(0, 1))
                  .convolutionMode(ConvolutionMode.Same)
                  .list()
                  .layer(
                      Convolution1D.builder()
                          .activation(afn)
@ -435,7 +434,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
                    .updater(new NoOp())
                    .dist(new NormalDistribution(0, 1))
                    .convolutionMode(ConvolutionMode.Same)
                    .list()
                    .layer(
                        0,
                        Convolution1D.builder()
@ -461,6 +459,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
                            .stride(stride)
                            .padding(padding)
                            .pnorm(pnorm)
                                .name("SubsamplingLayer")
                            .build())
                    .layer(
                        3,
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java
@ -0,0 +1,811 @@
 /*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */
 package org.deeplearning4j.gradientcheck;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.*;
 import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.util.Convolution1DUtils;
 import org.junit.jupiter.api.Test;
 import org.nd4j.common.primitives.Pair;
 import org.nd4j.evaluation.classification.Evaluation;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.dataset.DataSet;
 import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.indexing.NDArrayIndex;
 import org.nd4j.linalg.learning.config.NoOp;
 import org.nd4j.linalg.lossfunctions.LossFunctions;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
@Slf4j
 public class CNN1DNewGradientCheckTest extends BaseDL4JTest {
  private static final boolean PRINT_RESULTS = true;
  private static final boolean RETURN_ON_FIRST_FAILURE = false;
  private static final double DEFAULT_EPS = 1e-6;
  private static final double DEFAULT_MAX_REL_ERROR = 1e-3;
  private static final double DEFAULT_MIN_ABS_ERROR = 1e-8;
  static {
    Nd4j.setDataType(DataType.DOUBLE);
  }
  @Test
  public void testCnn1D() {
    int minibatchSize = 4;
    int[] dataChannels = {4, 10}; //the input
    int[] kernels = {2,4,5,8};
    int stride = 2;
    int padding = 3;
    int seriesLength = 300;
    for (int kernel : kernels) {
      for (int dChannels : dataChannels) {
        int numLabels = ((seriesLength + (2 * padding) - kernel) / stride) + 1;
        final NeuralNetConfiguration conf =
            NeuralNetConfiguration.builder()
                .dataType(DataType.DOUBLE)
                .updater(new NoOp())
                .dist(new NormalDistribution(0, 1))
                .convolutionMode(ConvolutionMode.Same)
                .layer(
                    Convolution1DNew.builder()
                        .activation(Activation.RELU)
                        .kernelSize(kernel)
                        .stride(stride)
                        .padding(padding)
                        .nIn(dChannels) // channels
                        .nOut(3)
                        .rnnDataFormat(RNNFormat.NCW)
                        .build())
                .layer(
                    RnnOutputLayer.builder()
                        .lossFunction(LossFunctions.LossFunction.MCXENT)
                        .activation(Activation.SOFTMAX)
                        .nOut(4)
                        .build())
                .inputType(InputType.recurrent(dChannels, seriesLength))
                .build();
        INDArray input = Nd4j.rand(minibatchSize, dChannels, seriesLength);
        INDArray labels = Nd4j.zeros(minibatchSize, 4, numLabels);
        for (int i = 0; i < minibatchSize; i++) {
          for (int j = 0; j < numLabels; j++) {
            labels.putScalar(new int[] {i, i % 4, j}, 1.0);
          }
        }
        final MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();
        String msg =
            "Minibatch="
                + minibatchSize
                + ", activationFn="
                + Activation.RELU
                + ", kernel = "
                + kernel;
        System.out.println(msg);
        for (int j = 0; j < net.getnLayers(); j++)
          System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams());
 /**
        List<Pair<INDArray, INDArray>> iter = new java.util.ArrayList<>(Collections.emptyList());
        iter.add(new Pair<>(input, labels));
        for(int x=0;x<100; x++) net.fit(input, labels);
        Evaluation eval = net.evaluate(new INDArrayDataSetIterator(iter,2), Arrays.asList(new String[]{"One", "Two", "Three", "Four"}));
        // net.fit(input, labels);
        eval.eval(labels, net.output(input));
 **/
        boolean gradOK =
            GradientCheckUtil.checkGradients(
                net,
                DEFAULT_EPS,
                DEFAULT_MAX_REL_ERROR,
                DEFAULT_MIN_ABS_ERROR,
                PRINT_RESULTS,
                RETURN_ON_FIRST_FAILURE,
                input,
                labels);
        assertTrue(gradOK, msg);
        TestUtils.testModelSerialization(net);
      }
      }
  }
  @Test
  public void testCnn1DWithLocallyConnected1D() {
    Nd4j.getRandom().setSeed(1337);
    int[] minibatchSizes = {2, 3};
    int length = 25;
    int convNIn = 18;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 4;
    int[] kernels = {1,2,4};
    int stride = 1;
    int padding = 0;
    Activation[] activations = {Activation.SIGMOID};
    for (Activation afn : activations) {
      for (int minibatchSize : minibatchSizes) {
        for (int kernel : kernels) {
          INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
          INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
          for (int i = 0; i < minibatchSize; i++) {
            for (int j = 0; j < length; j++) {
              labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
            }
          }
          NeuralNetConfiguration conf =
              NeuralNetConfiguration.builder()
                  .dataType(DataType.DOUBLE)
                  .updater(new NoOp())
                  .dist(new NormalDistribution(0, 1))
                  .convolutionMode(ConvolutionMode.Same)
                  .layer(
                      Convolution1DNew.builder()
                          .activation(afn)
                          .kernelSize(kernel)
                          .stride(stride)
                          .padding(padding)
                          .nIn(convNIn)
                          .nOut(convNOut1)
                          .rnnDataFormat(RNNFormat.NCW)
                          .build())
                  .layer(
                      LocallyConnected1D.builder()
                          .activation(afn)
                          .kernelSize(kernel)
                          .stride(stride)
                          .padding(padding)
                          .nIn(convNOut1)
                          .nOut(convNOut2)
                          .hasBias(false)
                          .build())
                  .layer(
                      RnnOutputLayer.builder()
                          .lossFunction(LossFunctions.LossFunction.MCXENT)
                          .activation(Activation.SOFTMAX)
                          .nOut(finalNOut)
                          .build())
                  .inputType(InputType.recurrent(convNIn, length))
                  .build();
          String json = conf.toJson();
          NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
          assertEquals(conf, c2);
          MultiLayerNetwork net = new MultiLayerNetwork(conf);
          net.init();
          String msg =
              "Minibatch=" + minibatchSize + ", activationFn=" + afn + ", kernel = " + kernel;
          if (PRINT_RESULTS) {
            System.out.println(msg);
            //                        for (int j = 0; j < net.getnLayers(); j++)
            //                            System.out.println("ILayer " + j + " # params: " +
            // net.getLayer(j).numParams());
          }
          boolean gradOK =
              GradientCheckUtil.checkGradients(
                  net,
                  DEFAULT_EPS,
                  DEFAULT_MAX_REL_ERROR,
                  DEFAULT_MIN_ABS_ERROR,
                  PRINT_RESULTS,
                  RETURN_ON_FIRST_FAILURE,
                  input,
                  labels);
          assertTrue(gradOK, msg);
          TestUtils.testModelSerialization(net);
        }
      }
    }
  }
  @Test
  public void testCnn1DWithCropping1D() {
    Nd4j.getRandom().setSeed(1337);
    int[] minibatchSizes = {1, 3};
    int length = 7;
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 4;
    int[] kernels = {1, 2, 4};
    int stride = 1;
    int padding = 0;
    int cropping = 1;
    int croppedLength = length - 2 * cropping;
    Activation[] activations = {Activation.SIGMOID};
    SubsamplingLayer.PoolingType[] poolingTypes =
        new SubsamplingLayer.PoolingType[] {
          SubsamplingLayer.PoolingType.MAX,
          SubsamplingLayer.PoolingType.AVG,
          SubsamplingLayer.PoolingType.PNORM
        };
    for (Activation afn : activations) {
      for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
        for (int minibatchSize : minibatchSizes) {
          for (int kernel : kernels) {
            INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
            INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, croppedLength);
            for (int i = 0; i < minibatchSize; i++) {
              for (int j = 0; j < croppedLength; j++) {
                labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
              }
            }
            NeuralNetConfiguration conf =
                NeuralNetConfiguration.builder()
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp())
                    .dist(new NormalDistribution(0, 1))
                    .convolutionMode(ConvolutionMode.Same)
                    .layer(
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut1)
                            .build())
                    .layer(Cropping1D.builder(cropping).build())
                    .layer(
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut2)
                            .build())
                    .layer(
                        RnnOutputLayer.builder()
                            .lossFunction(LossFunctions.LossFunction.MCXENT)
                            .activation(Activation.SOFTMAX)
                            .nOut(finalNOut)
                            .build())
                    .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
                    .build();
            String json = conf.toJson();
            NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
            assertEquals(conf, c2);
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            String msg =
                "PoolingType="
                    + poolingType
                    + ", minibatch="
                    + minibatchSize
                    + ", activationFn="
                    + afn
                    + ", kernel = "
                    + kernel;
            if (PRINT_RESULTS) {
              System.out.println(msg);
              //                            for (int j = 0; j < net.getnLayers(); j++)
              //                                System.out.println("ILayer " + j + " # params: " +
              // net.getLayer(j).numParams());
            }
            boolean gradOK =
                GradientCheckUtil.checkGradients(
                    net,
                    DEFAULT_EPS,
                    DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR,
                    PRINT_RESULTS,
                    RETURN_ON_FIRST_FAILURE,
                    input,
                    labels);
            assertTrue(gradOK, msg);
            TestUtils.testModelSerialization(net);
          }
        }
      }
    }
  }
  @Test
  public void testCnn1DWithZeroPadding1D() {
    Nd4j.getRandom().setSeed(1337);
    int[] minibatchSizes = {1, 3};
    int length = 7;
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 4;
    int[] kernels = {1, 2, 4};
    int stride = 1;
    int pnorm = 2;
    int padding = 0;
    int zeroPadding = 2;
    int paddedLength = length + 2 * zeroPadding;
    Activation[] activations = {Activation.SIGMOID};
    SubsamplingLayer.PoolingType[] poolingTypes =
        new SubsamplingLayer.PoolingType[] {
          SubsamplingLayer.PoolingType.MAX,
          SubsamplingLayer.PoolingType.AVG,
          SubsamplingLayer.PoolingType.PNORM
        };
    for (Activation afn : activations) {
      for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
        for (int minibatchSize : minibatchSizes) {
          for (int kernel : kernels) {
            INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
            INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, paddedLength);
            for (int i = 0; i < minibatchSize; i++) {
              for (int j = 0; j < paddedLength; j++) {
                labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
              }
            }
            NeuralNetConfiguration conf =
                NeuralNetConfiguration.builder()
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp())
                    .dist(new NormalDistribution(0, 1))
                    .convolutionMode(ConvolutionMode.Same)
                    .layer(
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(2, kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut1)
                            .build())
                    .layer(ZeroPadding1DLayer.builder(zeroPadding).build())
                    .layer(
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut2)
                            .build())
                    .layer(ZeroPadding1DLayer.builder(0).build())
                    .layer(
                        Subsampling1DLayer.builder(poolingType)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .pnorm(pnorm)
                            .build())
                    .layer(
                        RnnOutputLayer.builder()
                            .lossFunction(LossFunctions.LossFunction.MCXENT)
                            .activation(Activation.SOFTMAX)
                            .nOut(finalNOut)
                            .build())
                    .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
                    .build();
            String json = conf.toJson();
            NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
            assertEquals(conf, c2);
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            String msg =
                "PoolingType="
                    + poolingType
                    + ", minibatch="
                    + minibatchSize
                    + ", activationFn="
                    + afn
                    + ", kernel = "
                    + kernel;
            if (PRINT_RESULTS) {
              System.out.println(msg);
              //                            for (int j = 0; j < net.getnLayers(); j++)
              //                                System.out.println("ILayer " + j + " # params: " +
              // net.getLayer(j).numParams());
            }
            boolean gradOK =
                GradientCheckUtil.checkGradients(
                    net,
                    DEFAULT_EPS,
                    DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR,
                    PRINT_RESULTS,
                    RETURN_ON_FIRST_FAILURE,
                    input,
                    labels);
            assertTrue(gradOK, msg);
            TestUtils.testModelSerialization(net);
          }
        }
      }
    }
  }
  @Test
  public void testCnn1DWithSubsampling1D() {
    Nd4j.getRandom().setSeed(12345);
    int[] minibatchSizes = {1, 3};
    int length = 7;
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 4;
    int[] kernels = {1, 2, 4};
    int stride = 1;
    int padding = 0;
    int pnorm = 2;
    Activation[] activations = {Activation.SIGMOID, Activation.TANH};
    SubsamplingLayer.PoolingType[] poolingTypes =
        new SubsamplingLayer.PoolingType[] {
          SubsamplingLayer.PoolingType.MAX,
          SubsamplingLayer.PoolingType.AVG,
          SubsamplingLayer.PoolingType.PNORM
        };
    for (Activation afn : activations) {
      for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
        for (int minibatchSize : minibatchSizes) {
          for (int kernel : kernels) {
            INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
            INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
            for (int i = 0; i < minibatchSize; i++) {
              for (int j = 0; j < length; j++) {
                labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
              }
            }
            NeuralNetConfiguration conf =
                NeuralNetConfiguration.builder()
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp())
                    .dist(new NormalDistribution(0, 1))
                    .convolutionMode(ConvolutionMode.Same)
                    .layer(
                        0,
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut1)
                            .build())
                    .layer(
                        1,
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut2)
                            .build())
                    .layer(
                        2,
                        Subsampling1DLayer.builder(poolingType)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .pnorm(pnorm)
                            .name("SubsamplingLayer")
                            .build())
                    .layer(
                        3,
                        RnnOutputLayer.builder()
                            .lossFunction(LossFunctions.LossFunction.MCXENT)
                            .activation(Activation.SOFTMAX)
                            .nOut(finalNOut)
                            .build())
                    .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
                    .build();
            String json = conf.toJson();
            NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
            assertEquals(conf, c2);
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            String msg =
                "PoolingType="
                    + poolingType
                    + ", minibatch="
                    + minibatchSize
                    + ", activationFn="
                    + afn
                    + ", kernel = "
                    + kernel;
            if (PRINT_RESULTS) {
              System.out.println(msg);
              //                            for (int j = 0; j < net.getnLayers(); j++)
              //                                System.out.println("ILayer " + j + " # params: " +
              // net.getLayer(j).numParams());
            }
            boolean gradOK =
                GradientCheckUtil.checkGradients(
                    net,
                    DEFAULT_EPS,
                    DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR,
                    PRINT_RESULTS,
                    RETURN_ON_FIRST_FAILURE,
                    input,
                    labels);
            assertTrue(gradOK, msg);
            TestUtils.testModelSerialization(net);
          }
        }
      }
    }
  }
  @Test
  public void testCnn1dWithMasking() {
    int length = 12;
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 3;
    int pnorm = 2;
    SubsamplingLayer.PoolingType[] poolingTypes =
        new SubsamplingLayer.PoolingType[] {
          SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG
        };
    for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
      for (ConvolutionMode cm :
          new ConvolutionMode[] {ConvolutionMode.Same, ConvolutionMode.Truncate}) {
        for (int stride : new int[] {1, 2}) {
          String s = cm + ", stride=" + stride + ", pooling=" + poolingType;
          log.info("Starting test: " + s);
          Nd4j.getRandom().setSeed(12345);
          NeuralNetConfiguration conf =
              NeuralNetConfiguration.builder()
                  .dataType(DataType.DOUBLE)
                  .updater(new NoOp())
                  .activation(Activation.TANH)
                  .dist(new NormalDistribution(0, 1))
                  .convolutionMode(cm)
                  .seed(12345)
                  .layer(
                      Convolution1DNew.builder()
                          .kernelSize(2)
                          .rnnDataFormat(RNNFormat.NCW)
                          .stride(stride)
                          .nIn(convNIn)
                          .nOut(convNOut1)
                          .build())
                  .layer(
                      Subsampling1DLayer.builder(poolingType)
                          .kernelSize(2)
                          .stride(stride)
                          .pnorm(pnorm)
                          .build())
                  .layer(
                      Convolution1DNew.builder()
                          .kernelSize(2)
                          .rnnDataFormat(RNNFormat.NCW)
                          .stride(stride)
                          .nIn(convNOut1)
                          .nOut(convNOut2)
                          .build())
                  .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build())
                  .layer(
                      OutputLayer.builder()
                          .lossFunction(LossFunctions.LossFunction.MCXENT)
                          .activation(Activation.SOFTMAX)
                          .nOut(finalNOut)
                          .build())
                  .inputType(InputType.recurrent(convNIn, length))
                  .build();
          MultiLayerNetwork net = new MultiLayerNetwork(conf);
          net.init();
          INDArray f = Nd4j.rand(2, convNIn, length);
          INDArray fm = Nd4j.create(2, length);
          fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
          fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, 6)).assign(1);
          INDArray label = TestUtils.randomOneHot(2, finalNOut);
          boolean gradOK =
              GradientCheckUtil.checkGradients(
                  new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm));
          assertTrue(gradOK, s);
          TestUtils.testModelSerialization(net);
          // TODO also check that masked step values don't impact forward pass, score or gradients
          DataSet ds = new DataSet(f, label, fm, null);
          double scoreBefore = net.score(ds);
          net.setInput(f);
          net.setLabels(label);
          net.setLayerMaskArrays(fm, null);
          net.computeGradientAndScore();
          INDArray gradBefore = net.getFlattenedGradients().dup();
          f.putScalar(1, 0, 10, 10.0);
          f.putScalar(1, 1, 11, 20.0);
          double scoreAfter = net.score(ds);
          net.setInput(f);
          net.setLabels(label);
          net.setLayerMaskArrays(fm, null);
          net.computeGradientAndScore();
          INDArray gradAfter = net.getFlattenedGradients().dup();
          assertEquals(scoreBefore, scoreAfter, 1e-6);
          assertEquals(gradBefore, gradAfter);
        }
      }
    }
  }
  @Test
  public void testCnn1Causal() throws Exception {
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 3;
    int[] lengths = {11, 12, 13, 9, 10, 11};
    int[] kernels = {2, 3, 2, 4, 2, 3};
    int[] dilations = {1, 1, 2, 1, 2, 1};
    int[] strides = {1, 2, 1, 2, 1, 1};
    boolean[] masks = {false, true, false, true, false, true};
    boolean[] hasB = {true, false, true, false, true, true};
    for (int i = 0; i < lengths.length; i++) {
      int length = lengths[i];
      int k = kernels[i];
      int d = dilations[i];
      int st = strides[i];
      boolean mask = masks[i];
      boolean hasBias = hasB[i];
      // TODO has bias
      String s = "k=" + k + ", s=" + st + " d=" + d + ", seqLen=" + length;
      log.info("Starting test: " + s);
      Nd4j.getRandom().setSeed(12345);
      NeuralNetConfiguration conf =
          NeuralNetConfiguration.builder()
              .dataType(DataType.DOUBLE)
              .updater(new NoOp())
              .activation(Activation.TANH)
              .weightInit(new NormalDistribution(0, 1))
              .seed(12345)
              .layer(
                  Convolution1DNew.builder()
                      .kernelSize(k)
                      .dilation(d)
                      .hasBias(hasBias)
                      .convolutionMode(ConvolutionMode.Causal)
                      .stride(st)
                      .nOut(convNOut1)
                      .build())
              .layer(
                  Convolution1DNew.builder()
                      .kernelSize(k)
                      .dilation(d)
                      .convolutionMode(ConvolutionMode.Causal)
                      .stride(st)
                      .nOut(convNOut2)
                      .build())
              .layer(
                  RnnOutputLayer.builder()
                      .lossFunction(LossFunctions.LossFunction.MCXENT)
                      .activation(Activation.SOFTMAX)
                      .nOut(finalNOut)
                      .build())
              .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
              .build();
      MultiLayerNetwork net = new MultiLayerNetwork(conf);
      net.init();
      INDArray f = Nd4j.rand(DataType.DOUBLE, 2, convNIn, length);
      INDArray fm = null;
      if (mask) {
        fm = Nd4j.create(2, length);
        fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
        fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, length - 2)).assign(1);
      }
      long outSize1 = Convolution1DUtils.getOutputSize(length, k, st, 0, ConvolutionMode.Causal, d);
      long outSize2 =
          Convolution1DUtils.getOutputSize(outSize1, k, st, 0, ConvolutionMode.Causal, d);
      INDArray label = TestUtils.randomOneHotTimeSeries(2, finalNOut, (int) outSize2);
      String msg =
              "Minibatch="
                      + 1
                      + ", activationFn="
                      + Activation.RELU
                      + ", kernel = "
                      + k;
      System.out.println(msg);
      for (int j = 0; j < net.getnLayers(); j++)
        System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams());
      boolean gradOK =
          GradientCheckUtil.checkGradients(
              new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm));
      assertTrue(gradOK, s);
      TestUtils.testModelSerialization(net);
    }
  }
 }
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
@ -108,8 +108,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                  .updater(new NoOp())
                  .weightInit(WeightInit.XAVIER)
                  .seed(12345L)
-                  .list()
+
-                  .layer(0, ConvolutionLayer.builder(1, 1).nOut(6).activation(afn).build())
+                  .layer(0, Convolution2D.builder().kernelSize(1).stride(1).nOut(6).activation(afn).build())
                  .layer(1, OutputLayer.builder(lf).activation(outputActivation).nOut(3).build())
                  .inputType(InputType.convolutionalFlat(1, 4, 1));
--- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java
+++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java
@ -24,6 +24,7 @@ import lombok.val;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Convolution2D;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
@ -85,7 +86,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution {
        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                enforceTrainingConfig, conf, kerasMajorVersion);
-        val builder = ConvolutionLayer.builder().name(this.name)
+        val builder = Convolution2D.builder().name(this.name)
                .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                .activation(getIActivationFromConfig(layerConfig, conf))
                .weightInit(init)
--- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
+++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
@ -28,6 +28,7 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Convolution2D;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
@ -95,7 +96,7 @@ public class KerasConvolution2D extends KerasConvolution {
        LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
-        final var builder = ConvolutionLayer.builder().name(this.name)
+        final var builder = Convolution2D.builder().name(this.name)
                .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                .activation(getIActivationFromConfig(layerConfig, conf))
                .weightInit(init)
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java
@ -222,6 +222,14 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
    // TODO do not put inside self to avoid serialization issues
    // innerConfigurations.add(0, this); //put this configuration at first place
    getLayerConfigurations().stream()
            .forEach(
                    lconf ->
                            lconf.setNetConfiguration(
                                    this)); // set this as net config for all layers (defined in here, not stacked
    /**
     * Inherit network wide configuration setting to those layer configurations that do not have an
     * individual setting (nor a default)
@ -230,11 +238,6 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
      lconf.runInheritance();
    }
    getLayerConfigurations().stream()
        .forEach(
            lconf ->
                lconf.setNetConfiguration(
                    this)); // set this as net config for all layers (defined in here, not stacked
    // Validate BackpropType setting
    if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH)
@ -326,7 +329,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
          LayerConfiguration layer = getFlattenedLayerConfigurations().get(i - 1);
          // convolution 1d is an edge case where it has rnn input type but the filters
          // should be the output
-          if (layer instanceof Convolution1D) {
+          if (layer instanceof Convolution1D || layer instanceof Convolution1DNew) {
            if (l instanceof DenseLayer && getInputType() instanceof InputType.InputTypeRecurrent) {
              FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l;
              if (getInputType() instanceof InputType.InputTypeRecurrent) {
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java
@ -21,7 +21,13 @@
 package org.deeplearning4j.nn.conf;
 /**
 * N is the batch size<br/>
 * C is the number of feature maps (that is,, number of channels)<br/>
 * H is the image height (not used for 1D conv as this is an RNN format<br/>
 * W is the image width<br/>
 * **/
 public enum RNNFormat implements DataFormat {
-    NCW,
+    /** n=batch size; c=channels/ features; w=width **/ NCW,
-    NWC
+    /** n=batch size; w=width; c=channels/ features **/ NWC
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java
@ -0,0 +1,142 @@
 /*
 *
 *    ******************************************************************************
 *    *
 *    * This program and the accompanying materials are made available under the
 *    * terms of the Apache License, Version 2.0 which is available at
 *    * https://www.apache.org/licenses/LICENSE-2.0.
 *    *
 *    *  See the NOTICE file distributed with this work for additional
 *    *  information regarding copyright ownership.
 *    * Unless required by applicable law or agreed to in writing, software
 *    * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *    * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *    * License for the specific language governing permissions and limitations
 *    * under the License.
 *    *
 *    * SPDX-License-Identifier: Apache-2.0
 *    *****************************************************************************
 *
 */
 package org.deeplearning4j.nn.conf.layers;
 import java.util.Arrays;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import lombok.*;
 import lombok.experimental.Accessors;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.util.ValidationUtils;
 /**
 * ConvolutionLayer nIn in the input layer is the number of channels nOut is the number of filters
 * to be used in the net or in other words the channels The builder specifies the filter/kernel
 * size, the stride and padding The pooling layer takes the kernel size
 *
 * <p>Supports multiple dimensions: In 1D CNN, kernel moves in 1 direction. Input and output data of
 * 1D CNN is 2 dimensional. Mostly used on Time-Series data.
 *
 * <p>In 2D CNN, kernel moves in 2 directions. Input and output data of 2D CNN is 3 dimensional.
 * Mostly used on Image data.
 *
 * <p>In 3D CNN, kernel moves in 3 directions. Input and output data of 3D CNN is 4 dimensional.
 * Mostly used on 3D Image data (MRI, CT Scans, Video).
 */
@ToString(callSuper = true)
@NoArgsConstructor
@EqualsAndHashCode(callSuper = true)
@Slf4j
@SuperBuilder
 public abstract class AbstractConvolutionLayer extends FeedForwardLayer {
  /** The kernel of this convolution with size in each n-dimensions */
  @Getter private int[] kernelSize;
  /** The stride */
  @Getter private int[] stride;
  /** The padding */
  @Getter private int[] padding;
  /** The dilation */
  @Getter private int[] dilation;
  /** If true (default): include bias parameters in the model. False: no bias. */
  @Builder.Default
  @Getter
  @Accessors(fluent = true)
  @Setter
  private boolean hasBias = true;
  /**
   * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
   * details Default is {@link ConvolutionMode}.Truncate.
   */
  @Builder.Default @Getter @Setter
  private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
  /**
   * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
   * be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
   * false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
   */
  @Getter @Setter @Builder.Default private boolean cudnnAllowFallback = true;
  /** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
  @Getter @Setter @Builder.Default private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST;
  @Getter @Setter private ConvolutionLayer.FwdAlgo cudnnFwdAlgo;
  @Getter @Setter private ConvolutionLayer.BwdFilterAlgo cudnnBwdFilterAlgo;
  @Getter @Setter private ConvolutionLayer.BwdDataAlgo cudnnBwdDataAlgo;
  /**
   * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
   * See {@link CNN2DFormat} for more details.<br>
   * Default: NCHW
   *
   * @param format Format for activations (in and out)
   */
  @Builder.Default @Getter @Setter
  private CNN2DFormat convFormat =
          CNN2DFormat.NCHW; // default value for legacy serialization reasons
  /**
   * Number of parameters this layer has a result of its configuration.
   *
   * @return number or parameters
   */
  @Override
  public long numParams() {
    var kern = 1;
    for (int i : getKernelSize()) {
      kern = kern * i;
    }
    return nIn * nOut * kern + (hasBias() ? nOut : 0);
  }
  public abstract static class AbstractConvolutionLayerBuilder<
          C extends AbstractConvolutionLayer, B extends AbstractConvolutionLayerBuilder<C, B>>
      extends FeedForwardLayerBuilder<C, B> {
    public B kernelSize(int @NonNull ... kernelSize) {
      if (this.kernelSize != null) {
        log.warn("You are setting the kernel more than once, last call with override prior calls.");
      }
      this.kernelSize = kernelSize;
      return self();
    }
    public B stride(int @NonNull ... stride) {
      this.stride = stride;
      return self();
    }
    public B padding(int @NonNull ... padding) {
      this.padding = padding;
      return self();
    }
    public B dilation(int @NonNull ... dilation) {
      this.dilation = dilation;
      return self();
    }
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java
@ -52,6 +52,16 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
@SuperBuilder
 public abstract class BaseLayerConfiguration extends LayerConfiguration
    implements ITraininableLayerConfiguration, Serializable, Cloneable {
  /**
   * Number of parameters this layer has a result of its configuration. This default implementation
   * calls {@link #initializer()}.numParams( this ).
   *
   * @return number or parameters
   */
  @Override
  public long numParams() {
    return initializer().numParams(this);
  }
  /**
   * Set constraints to be applied to all layers. Default: no constraints.<br>
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java
@ -45,6 +45,7 @@ import org.nd4j.linalg.factory.Nd4j;
@NoArgsConstructor
 public class CapsuleLayer extends SameDiffLayer {
    private static final String WEIGHT_PARAM = "weight";
    private static final String BIAS_PARAM = "bias";
    /**
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java
@ -36,22 +36,17 @@ import org.deeplearning4j.util.ValidationUtils;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
-/*
+
 //TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
 //Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
- * This approach treats a multivariate time series with L timesteps and
+/**
 * This approach treats a multivariate time series with L time steps and
 * P variables as an L x 1 x P image (L rows high, 1 column wide, P
 * channels deep). The kernel should be H<L pixels high and W=1 pixels
 * wide.
-
+ *
-In 1D CNN, kernel moves in 1 direction.
+ * In 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions.
-Input and output data of 1D CNN is 2 dimensional. Mostly used on Time-Series data.
+ * Input and output data of 1D CNN is 2-dimensional. Mostly used on Time-Series data.
 In 2D CNN, kernel moves in 2 directions.
 Input and output data of 2D CNN is 3 dimensional. Mostly used on Image data.
 In 3D CNN, kernel moves in 3 directions.
 Input and output data of 3D CNN is 4 dimensional. Mostly used on 3D Image data (MRI, CT Scans, Video).
 */
@Data
@ToString(callSuper = true)
@ -223,7 +218,7 @@ public class Convolution1D extends ConvolutionLayer {
  }
  public abstract static class Convolution1DBuilder<
-          C extends ConvolutionLayer, B extends Convolution1DBuilder<C, B>>
+          C extends Convolution1D, B extends Convolution1DBuilder<C, B>>
      extends ConvolutionLayerBuilder<C, B> {
    @Override
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java
@ -0,0 +1,250 @@
 /*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */
 package org.deeplearning4j.nn.conf.layers;
 import java.util.Collection;
 import java.util.Map;
 import lombok.*;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.ParamInitializer;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
 import org.deeplearning4j.nn.params.ConvolutionNewParamInitializer;
 import org.deeplearning4j.optimize.api.TrainingListener;
 import org.deeplearning4j.util.Convolution1DUtils;
 import org.deeplearning4j.util.ValidationUtils;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 // TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
 // Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
 /**
 * This approach treats a multivariate time series with L time steps and P variables as an L x 1 x P
 * image (L rows high, 1 column wide, P channels deep). The kernel should be H<L pixels high and W=1
 * pixels wide.
 *
 * <p>In 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions. Input and output data of
 * 1D CNN is 2-dimensional. Mostly used on Time-Series data.
 */
@Data
@Slf4j
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
@Jacksonized
@SuperBuilder
 public class Convolution1DNew extends AbstractConvolutionLayer {
  /**
   * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
   * See {@link CNN2DFormat} for more details.<br>
   * Default: NCHW
   *
   * @param format Format for activations (in and out)
   */
  @Builder.Default
  protected CNN2DFormat dataFormat =
      CNN2DFormat.NCHW; // default value for legacy serialization reasons
  @Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW;
  @Override
  public ParamInitializer initializer() {
    return ConvolutionNewParamInitializer.getInstance();
  }
  @Override
  public org.deeplearning4j.nn.api.Layer instantiate(
      NeuralNetConfiguration conf,
      Collection<TrainingListener> trainingListeners,
      int layerIndex,
      INDArray layerParamsView,
      boolean initializeParams,
      DataType networkDataType) {
    setNetConfiguration(conf);
    LayerValidation.assertNInNOutSet("Convolution1D", getName(), layerIndex, getNIn(), getNOut());
    LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
    /*
       Object ret;
       try {
       ret = lconf.getCanConfigure()
               .getConstructor(LayerConfiguration.class, DataType.class)
               .newInstance(new Object[] { lconf, networkDataType });
       } catch (Exception e) {
         throw new RuntimeException(e);
    */
    org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer ret =
        new org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer(lconf, networkDataType);
    ret.addTrainingListeners(trainingListeners);
    ret.setIndex(layerIndex);
    ret.setParamsViewArray(layerParamsView);
    Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
    ret.setParamTable(paramTable);
    ret.setLayerConfiguration(this);
    return ret;
  }
  @Override
  public InputType getOutputType(int layerIndex, InputType inputType) {
    if (inputType == null || inputType.getType() != InputType.Type.RNN) {
      throw new IllegalStateException(
          "Invalid input for 1D CNN layer (layer index = "
              + layerIndex
              + ", layer name = \""
              + getName()
              + "\"): expect RNN input type with size > 0. Got: "
              + inputType);
    }
    InputType.InputTypeRecurrent it = (InputType.InputTypeRecurrent) inputType;
    long inputTsLength = it.getTimeSeriesLength();
    long outLength;
    if (inputTsLength < 0) {
      // Probably: user did InputType.recurrent(x) without specifying sequence length
      outLength = -1;
    } else {
      outLength =
          Convolution1DUtils.getOutputSize(
              inputTsLength,
              getKernelSize()[0],
              getStride()[0],
              getPadding()[0],
              getConvolutionMode(),
              getDilation()[0]);
    }
    return InputType.recurrent(nOut, outLength, rnnDataFormat);
  }
  @Override
  public void setNIn(InputType inputType, boolean override) {
    if (inputType == null || inputType.getType() != InputType.Type.RNN) {
      throw new IllegalStateException(
          "Invalid input for 1D CNN layer (layer name = \""
              + getName()
              + "\"): expect RNN input type with size > 0. Got: "
              + inputType);
    }
    InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
    if (nIn <= 0 || override) {
      this.nIn = r.getSize();
    }
    if (this.rnnDataFormat == null || override) this.rnnDataFormat = r.getFormat();
    if (this.dataFormat == null || override)
      this.dataFormat = rnnDataFormat == RNNFormat.NCW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC;
  }
  @Override
  public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
    if (inputType == null) {
      throw new IllegalStateException(
          "Invalid input for Convolution1D layer (layer name=\""
              + getName()
              + "\"): input is null");
    }
    return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat, getName());
  }
  /**
   * This is a report of the estimated memory consumption for the given layer
   *
   * @param inputType Input type to the layer. Memory consumption is often a function of the input
   *     type
   * @return Memory report for the layer
   */
  @Override
  public LayerMemoryReport getMemoryReport(InputType inputType) {
    return null;
  }
  protected boolean allowCausal() {
    return true;
  }
  private static final class Convolution1DNewBuilderImpl
      extends Convolution1DNewBuilder<Convolution1DNew, Convolution1DNewBuilderImpl> {
    public Convolution1DNew build() {
      Convolution1DNew l = new Convolution1DNew(this);
      if (l.getDilation() == null) {
        dilation(1, 1);
      }
      if (l.getPadding() == null) {
        padding(0);
      }
      l = new Convolution1DNew(this);
      Convolution1DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()[0]);
      Convolution1DUtils.validateCnn1DKernelStridePadding(
          l.getKernelSize()[0], l.getStride()[0], l.getPadding()[0]);
      l.initializeConstraints();
      return l;
    }
  }
  public abstract static class Convolution1DNewBuilder<
          C extends Convolution1DNew, B extends Convolution1DNewBuilder<C, B>>
      extends AbstractConvolutionLayerBuilder<C, B> {
    private int dimensions(Class arrayType) {
      return arrayType.isArray() ? 1 + dimensions(arrayType.getComponentType()) : 0;
    }
    @Override
    public B kernelSize(int @NonNull ... kernel) {
      // Todo, we always provide arrays, but only first element is really used
      if (dimensions(kernel.getClass()) > 1)
        log.warn(
            "Kernel size has '{}' dimensions, only using first dimensions for 1D convolution layer.",
            dimensions(kernel.getClass()));
      super.kernelSize(
          ValidationUtils.validate1NonNegative(new int[] {kernel[0]}, "kernelSize")[0], 1);
      return self();
    }
    public B padding(int @NonNull ... padding) {
      // Todo, we always provide arrays, but only first element is really used
      super.padding(ValidationUtils.validate1NonNegative(new int[] {padding[0]}, "padding"));
      return self();
    }
    public B dilation(int @NonNull ... dilation) {
      // Todo, we always provide arrays, but only first element is really used
      super.dilation(ValidationUtils.validate1NonNegative(new int[] {dilation[0]}, "dilation"));
      return self();
    }
    public B stride(int @NonNull ... stride) {
      // Todo, we always provide arrays, but only first element is really used
      super.stride(ValidationUtils.validate1NonNegative(new int[] {stride[0]}, "stride")[0], 1);
      return self();
    }
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
@ -61,6 +61,23 @@ import org.nd4j.linalg.api.ndarray.INDArray;
@EqualsAndHashCode(callSuper = true)
@SuperBuilder(builderMethodName = "innerBuilder")
 public class ConvolutionLayer extends FeedForwardLayer {
  public static ConvolutionLayerBuilder<?, ?> builder() {
    return innerBuilder();
  }
  public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
    return innerBuilder().kernelSize(kernelSize);
  }
  public static ConvolutionLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
    return innerBuilder().kernelSize(kernelSize).stride(stride);
  }
  public static ConvolutionLayerBuilder<?, ?> builder(
          int[] kernelSize, int[] stride, int[] padding) {
    return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding);
  }
  /**
   * Size of the convolution rows/columns
   *
@ -122,23 +139,6 @@ public class ConvolutionLayer extends FeedForwardLayer {
  @Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter
  private boolean defaultValueOverriden = false;
  public static ConvolutionLayerBuilder<?, ?> builder() {
    return innerBuilder();
  }
  public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
    return innerBuilder().kernelSize(kernelSize);
  }
  public static ConvolutionLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
    return innerBuilder().kernelSize(kernelSize).stride(stride);
  }
  public static ConvolutionLayerBuilder<?, ?> builder(
      int[] kernelSize, int[] stride, int[] padding) {
    return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding);
  }
  public boolean hasBias() {
    return hasBias;
  }
@ -429,6 +429,7 @@ public class ConvolutionLayer extends FeedForwardLayer {
    }
  }
  /*
  private static final class ConvolutionLayerBuilderImpl
      extends ConvolutionLayerBuilder<ConvolutionLayer, ConvolutionLayerBuilderImpl> {
    public ConvolutionLayer build() {
@ -473,6 +474,6 @@ public class ConvolutionLayer extends FeedForwardLayer {
      return l;
    }
  }
-
+*/
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java
@ -38,110 +38,131 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 /**
- * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of filters to be used
+ * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of
- * in the net or in other words the channels The builder specifies the filter/kernel size, the stride and padding
+ * filters to be used in the net or in other words the channels The builder specifies the
- * The pooling layer takes the kernel size
+ * filter/kernel size, the stride and padding The pooling layer takes the kernel size
 */
@Data
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
-@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder")
+@Jacksonized
@SuperBuilder
 public class Deconvolution2D extends ConvolutionLayer {
  @Builder.Default private CNN2DFormat format = CNN2DFormat.NCHW;
-@Builder.Default
+  protected boolean allowCausal() {
-private CNN2DFormat format = CNN2DFormat.NCHW;
+    // Causal convolution - allowed for 1D only
-    protected boolean allowCausal() {
+    return false;
-        //Causal convolution - allowed for 1D only
+  }
-        return false;
+
  public boolean hasBias() {
    return isHasBias();
  }
  @Override
  public Deconvolution2D clone() {
    Deconvolution2D clone = (Deconvolution2D) super.clone();
    if (clone.getKernelSize() != null) {
      clone.setKernelSize(clone.getKernelSize().clone());
    }
    if (clone.getStride() != null) {
      clone.setStride(clone.getStride().clone());
    }
    if (clone.getPadding() != null) {
      clone.setPadding(clone.getPadding().clone());
    }
    return clone;
  }
  @Override
  public Layer instantiate(
      NeuralNetConfiguration conf,
      Collection<TrainingListener> trainingListeners,
      int layerIndex,
      INDArray layerParamsView,
      boolean initializeParams,
      DataType networkDataType) {
    setNetConfiguration(conf);
    LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut());
    LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
    runInheritance();
    org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret =
        new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType);
    ret.addTrainingListeners(trainingListeners);
    ret.setIndex(layerIndex);
    ret.setParamsViewArray(layerParamsView);
    Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
    ret.setParamTable(paramTable);
    ret.setLayerConfiguration(lconf);
    return ret;
  }
  @Override
  public ParamInitializer initializer() {
    return DeconvolutionParamInitializer.getInstance();
  }
  @Override
  public InputType getOutputType(int layerIndex, InputType inputType) {
    if (inputType == null || inputType.getType() != InputType.Type.CNN) {
      throw new IllegalStateException(
          "Invalid input for Convolution layer (layer name=\""
              + getName()
              + "\"): Expected CNN input, got "
              + inputType);
    }
-    private static final class Deconvolution2DBuilderImpl extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
+    return InputTypeUtil.getOutputTypeDeconvLayer(
-        public Deconvolution2D build() {
+        inputType,
-            Deconvolution2D l = new Deconvolution2D(this);
+        getKernelSize(),
-            l.initializeConstraints();
+        getStride(),
-            return l;
+        getPadding(),
-        }
+        getDilation(),
        getConvolutionMode(),
        nOut,
        layerIndex,
        getName(),
        Deconvolution2DLayer.class);
  }
  private static final class Deconvolution2DBuilderImpl
      extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
    public Deconvolution2D build() {
      Deconvolution2D l = new Deconvolution2D(this);
      l.initializeConstraints();
      return l;
    }
-    public static abstract class Deconvolution2DBuilder<C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>> extends ConvolutionLayerBuilder<C, B> {
+  }
  public abstract static class Deconvolution2DBuilder<
          C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>>
      extends ConvolutionLayerBuilder<C, B> {
-
+    @Override
-        @Override
+    public B kernelSize(int... kernelSize) {
-        public B kernelSize(int... kernelSize) {
+      super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
-            super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
+      return self();
            return self();
        }
        @Override
        public B stride(int... stride) {
            super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
            return self();
        }
        @Override
        public B padding(int... padding) {
            super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
            return self();
        }
        @Override
        public B dilation(int... dilation) {
            super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
            return self();
        }
    }
    public boolean hasBias() {
        return isHasBias();
    }
    @Override
-    public Deconvolution2D clone() {
+    public B stride(int... stride) {
-        Deconvolution2D clone = (Deconvolution2D) super.clone();
+      super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
-        if (clone.getKernelSize() != null) {
+      return self();
            clone.setKernelSize( clone.getKernelSize().clone());
        }
        if (clone.getStride() != null) {
            clone.setStride( clone.getStride().clone());
        }
        if (clone.getPadding() != null) {
            clone.setPadding( clone.getPadding().clone());
        }
        return clone;
    }
    @Override
-    public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
+    public B padding(int... padding) {
-                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
+      super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
-        setNetConfiguration(conf);
+      return self();
        LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut());
        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
        runInheritance();
        org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer ret =
                        new org.deeplearning4j.nn.layers.convolution.Deconvolution2DLayer(lconf, networkDataType);
        ret.addTrainingListeners(trainingListeners);
        ret.setIndex(layerIndex);
        ret.setParamsViewArray(layerParamsView);
        Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
        ret.setParamTable(paramTable);
        ret.setLayerConfiguration(lconf);
        return ret;
    }
    @Override
-    public ParamInitializer initializer() {
+    public B dilation(int... dilation) {
-        return DeconvolutionParamInitializer.getInstance();
+      super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
      return self();
    }
-
+  }
    @Override
    public InputType getOutputType(int layerIndex, InputType inputType) {
        if (inputType == null || inputType.getType() != InputType.Type.CNN) {
            throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getName()
                            + "\"): Expected CNN input, got " + inputType);
        }
        return InputTypeUtil.getOutputTypeDeconvLayer(inputType, getKernelSize(), getStride(), getPadding(), getDilation(), getConvolutionMode(),
                        nOut, layerIndex, getName(), Deconvolution2DLayer.class);
    }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java
@ -63,6 +63,7 @@ public class DenseLayer extends FeedForwardLayer {
    LayerValidation.assertNInNOutSet(
        "DenseLayerConfiguration", getName(), layerIndex, getNIn(), getNOut());
    LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
    lconf.setNetConfiguration(conf);
    runInheritance();
    org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret =
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java
@ -31,6 +31,7 @@ import lombok.experimental.SuperBuilder;
 import lombok.extern.slf4j.Slf4j;
 import net.brutex.ai.dnn.api.ILayerConfiguration;
 import net.brutex.ai.dnn.api.LayerType;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.ParamInitializer;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
@ -56,7 +57,7 @@ import org.nd4j.linalg.learning.regularization.Regularization;
@NoArgsConstructor
 // @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id")
@Slf4j
-@SuperBuilder
+@SuperBuilder(toBuilder = true)
 public abstract class LayerConfiguration
    implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration
@ -66,10 +67,20 @@ public abstract class LayerConfiguration
  @Getter @Setter protected List<LayerConstraint> biasConstraints;
  @Getter @Setter protected List<LayerConstraint> constraints;
  @Getter @Setter protected IWeightNoise weightNoise;
-  @Builder.Default private @Getter @Setter LinkedHashSet<String> variables = new LinkedHashSet<>();
+  @Builder.Default private @Getter @Setter @NonNull LinkedHashSet<String> variables = new LinkedHashSet<>();
  @Getter @Setter private IDropout dropOut;
  /** The type of the layer, basically defines the base class and its properties */
  @Builder.Default @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN;
  /**
   * Number of parameters this layer has a result of its configuration
   * @return number or parameters
   */
  public long numParams() {
    return initializer().numParams(this);
  }
  /**
   * A reference to the neural net configuration. This field is excluded from json serialization as
   * well as from equals check to avoid circular referenced.
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java
@ -22,6 +22,8 @@ package org.deeplearning4j.nn.conf.layers;
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import java.util.*;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import lombok.*;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
@ -59,10 +61,12 @@ public class LocallyConnected1D extends SameDiffLayer {
  /**
   * @param nIn Number of inputs to the layer (input size)
   */
  @JsonProperty("nIn")
  private long nIn;
  /**
   * @param nOut Number of outputs (output size)
   */
  @JsonProperty("nOut")
  private long nOut;
  /**
   * @param activation Activation function for the layer
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java
@ -34,6 +34,16 @@ import org.nd4j.linalg.learning.regularization.Regularization;
@SuperBuilder
 public abstract class NoParamLayer extends LayerConfiguration {
  /**
   * Number of parameters this layer. This will always return 0
   *
   * @return 0
   */
  @Override
  public long numParams() {
    return 0;
  }
  @Override
  public ParamInitializer initializer() {
    return EmptyParamInitializer.getInstance();
@ -58,6 +68,7 @@ public abstract class NoParamLayer extends LayerConfiguration {
  /**
   * Will always return no-Op updater.
   *
   * @return
   */
  @Override
@ -65,7 +76,7 @@ public abstract class NoParamLayer extends LayerConfiguration {
    return Updater.NONE.getIUpdaterWithDefaultConfig();
  }
-  public static abstract class NoParamLayerBuilder<C extends NoParamLayer, B extends NoParamLayerBuilder<C,B>>
+  public abstract static class NoParamLayerBuilder<
-    extends LayerConfigurationBuilder<C,B>
+          C extends NoParamLayer, B extends NoParamLayerBuilder<C, B>>
-  {}
+      extends LayerConfigurationBuilder<C, B> {}
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java
@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers;
 import java.util.Collection;
 import java.util.Map;
 import lombok.EqualsAndHashCode;
 import lombok.NonNull;
 import lombok.ToString;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
@ -35,6 +36,7 @@ import org.deeplearning4j.optimize.api.TrainingListener;
 import org.deeplearning4j.util.Convolution1DUtils;
 import org.deeplearning4j.util.Convolution2DUtils;
 import org.deeplearning4j.util.ValidationUtils;
 import org.jetbrains.annotations.NotNull;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -50,9 +52,91 @@ import org.nd4j.linalg.api.ndarray.INDArray;
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
@Jacksonized
-@SuperBuilder
+@SuperBuilder(builderMethodName = "innerBuilder")
 public class Subsampling1DLayer extends SubsamplingLayer {
  public static Subsampling1DLayerBuilder<?, ?> builder() {
    return innerBuilder();
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) {
    return innerBuilder()
            .poolingType(poolingType);
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType) {
    return innerBuilder()
            .poolingType(poolingType.toPoolingType());
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(int... kernelSize) {
    return innerBuilder()
            .kernelSize(kernelSize);
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
    return innerBuilder()
            .kernelSize(kernelSize)
            .stride(stride);
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride, int[] padding) {
    return innerBuilder()
            .kernelSize(kernelSize)
            .stride(stride)
            .padding(padding);
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) {
    return innerBuilder()
            .poolingType(poolingType.toPoolingType())
            .kernelSize(kernelSize)
            .stride(stride)
            .padding(padding)
            ;
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) {
    return innerBuilder()
            .poolingType(poolingType)
            .kernelSize(kernelSize)
            .stride(stride)
            .padding(padding)
            ;
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) {
    return innerBuilder()
            .poolingType(poolingType)
            .kernelSize(kernelSize)
            ;
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize) {
    return innerBuilder()
            .poolingType(poolingType.toPoolingType())
            .kernelSize(kernelSize)
            ;
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride) {
    return innerBuilder()
            .poolingType(poolingType)
            .kernelSize(kernelSize)
            .stride(stride)
            ;
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize, int[] stride) {
    return innerBuilder()
            .poolingType(poolingType.toPoolingType())
            .kernelSize(kernelSize)
            .stride(stride)
            ;
  }
  @Override
  public org.deeplearning4j.nn.api.Layer instantiate(
      NeuralNetConfiguration conf,
@ -176,20 +260,20 @@ public class Subsampling1DLayer extends SubsamplingLayer {
   * @return
 */
    @Override
-    public B kernelSize(int... kernelSize) {
+    public B kernelSize(int @NonNull ... kernelSize) {
-      super.kernelSize( ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]);
+      super.kernelSize(ValidationUtils.validate1NonNegative(new int[]{kernelSize[0]}, "kernelSize")[0]); //fix width = 1
      return self();
    }
    @Override
-    public B stride(int... stride) {
+    public B stride(@NotNull int... stride) {
-      super.stride( ValidationUtils.validate1NonNegative(stride, "stride")[0]);
+      super.stride( ValidationUtils.validate1NonNegative(new int[]{stride[0]}, "stride")[0]);
      return self();
    }
    @Override
-    public B padding(int... padding) {
+    public B padding(@NotNull int... padding) {
-      super.padding( ValidationUtils.validate1NonNegative(padding, "padding")[0]);
+      super.padding( ValidationUtils.validate1NonNegative(new int[]{padding[0]}, "padding"));
      return self();
    }
  }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
@ -27,10 +27,7 @@ import lombok.*;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
 import org.deeplearning4j.nn.api.ParamInitializer;
-import org.deeplearning4j.nn.conf.CNN2DFormat;
+import org.deeplearning4j.nn.conf.*;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
 import org.deeplearning4j.nn.conf.memory.MemoryReport;
@ -84,7 +81,8 @@ public class SubsamplingLayer extends NoParamLayer {
   * @param padding padding in the height and width dimensions
   */
  @Builder.Default protected int[] padding = new int[] {0, 0};
-  protected int pnorm;
+
   protected int pnorm;
  @Builder.Default protected double eps = 1e-8;
  /**
   * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper
@ -104,6 +102,7 @@ public class SubsamplingLayer extends NoParamLayer {
   */
  protected @Builder.Default CNN2DFormat dataFormat =
      CNN2DFormat.NCHW; // default value for legacy reasons
  protected @Builder.Default RNNFormat rnnFormat = RNNFormat.NCW;
  /**
   * When doing average pooling, should the padding values be included in the divisor or not?<br>
   * Not applicable for max and p-norm pooling.<br>
@ -127,6 +126,7 @@ public class SubsamplingLayer extends NoParamLayer {
   *     average pooling
   */
  @Builder.Default protected boolean avgPoolIncludePadInDivisor = true;
  /**
   * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated
   * convolutions, which are also known as atrous convolutions.<br>
@ -301,7 +301,7 @@ public class SubsamplingLayer extends NoParamLayer {
  public void setNIn(InputType inputType, boolean override) {
    // No op: subsampling layer doesn't have nIn value
    if (!defaultValueOverridden || override) {
-      this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat();
+      this.rnnFormat = ((InputType.InputTypeRecurrent) inputType).getFormat();
      defaultValueOverridden = true;
    }
  }
@ -355,14 +355,6 @@ public class SubsamplingLayer extends NoParamLayer {
        .build();
  }
  public int getPnorm() {
    return pnorm;
  }
  public double getEps() {
    return eps;
  }
  public enum PoolingType {
    MAX,
    AVG,
@ -394,33 +386,33 @@ public class SubsamplingLayer extends NoParamLayer {
      return self();
    }
-    public B eps(int eps) {
+    public B eps(double eps) {
      ValidationUtils.validateNonNegative(eps, "eps");
      this.eps$value = eps;
      this.eps$set = true;
      return self();
    }
-    public B kernelSize(int... kernelSize) {
+    public B kernelSize(int @NonNull... kernelSize) {
-      this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize");
+      this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize");
      this.kernelSize$set = true;
      return self();
    }
-    public B stride(int... stride) {
+    public B stride(int @NonNull ... stride) {
-      this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride");
+      this.stride$value = ValidationUtils.validate2NonNegative(stride, true, "stride");
      this.stride$set = true;
      return self();
    }
-    public B padding(int... padding) {
+    public B padding(int @NonNull ... padding) {
-      this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding");
+      this.padding$value = ValidationUtils.validate2NonNegative(padding, true, "padding");
      this.padding$set = true;
      return self();
    }
-    public B dilation(int... dilation) {
+    public B dilation(int @NonNull ... dilation) {
-      this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation");
+      this.dilation$value = ValidationUtils.validate2NonNegative(dilation, true, "dilation");
      this.dilation$set = true;
      return self();
    }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java
@ -74,6 +74,7 @@ public class FrozenLayer extends LayerConfiguration {
      boolean initializeParams,
      DataType networkDataType) {
    innerConfiguration.setNetConfiguration(conf);
    // Need to be able to instantiate a layer, from a config - for JSON -> net type situations
    org.deeplearning4j.nn.api.Layer underlying =
        innerConfiguration.instantiate(
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java
@ -20,6 +20,7 @@
 package org.deeplearning4j.nn.conf.layers.samediff;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@ -52,7 +53,8 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
@EqualsAndHashCode(callSuper = true, doNotUseGetters = true)
@NoArgsConstructor
@SuperBuilder
-public abstract class AbstractSameDiffLayer extends LayerConfiguration {
+public abstract class AbstractSameDiffLayer extends LayerConfiguration
    implements org.deeplearning4j.nn.api.ITraininableLayerConfiguration {
  /**
   * The regularization for the parameters (excluding biases) - for example {@link WeightDecay}
@ -63,16 +65,14 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
   * @param regularization Regularization to apply for the network parameters/weights (excluding
   *     biases)
   */
-  @Getter
+  @Getter protected List<Regularization> regularization;
  protected List<Regularization> regularization;
  /**
   * The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the
   * regularization for the biases only - for example {@link WeightDecay}
   *
   * @param regularizationBias Regularization to apply for the network biases only
   */
-  @Getter
+  @Getter protected List<Regularization> regularizationBias;
  protected List<Regularization> regularizationBias;
  /**
   * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
   * org.nd4j.linalg.learning.config.Nesterovs}
@ -87,21 +87,23 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
   * @param biasUpdater Updater to use for bias parameters
   */
  protected @Getter @Setter IUpdater biasUpdater;
-@Getter  @Setter
+
-  protected GradientNormalization gradientNormalization;
+  @Getter @Setter protected GradientNormalization gradientNormalization;
-@Getter @Setter
+  @Getter @Setter protected double gradientNormalizationThreshold = Double.NaN;
-  protected double gradientNormalizationThreshold = Double.NaN;
+  @Getter @Setter private SDLayerParams layerParams;
-@Getter @Setter
+
-  private SDLayerParams layerParams;
+  @Getter @Setter private DataType dataType;
  @Override
  public void runInheritance(@NotNull NeuralNetConfiguration conf) {
    super.runInheritance(conf);
-    if (this.biasUpdater == null ) this.biasUpdater = conf.getBiasUpdater();
+    if (this.biasUpdater == null) this.biasUpdater = conf.getBiasUpdater();
    if (this.updater == null) this.updater = conf.getUpdater();
-    if (this.regularizationBias == null || regularizationBias.isEmpty()) this.regularizationBias = conf.getRegularizationBias();
+    if (this.regularizationBias == null || regularizationBias.isEmpty())
-    if (this.regularization == null || regularization.isEmpty()) this.regularization = conf.getRegularization();
+      this.regularizationBias = conf.getRegularizationBias();
-   // if( this.weightInit == null) this.weightInit = conf.getWeightInit();
+    if (this.regularization == null || regularization.isEmpty())
      this.regularization = conf.getRegularization();
    // if( this.weightInit == null) this.weightInit = conf.getWeightInit();
    if (this.gradientNormalization == null)
      this.gradientNormalization = conf.getGradientNormalization();
    // if(this.weightInit == null) this.weightInit = conf.getWeightInit();
@ -109,6 +111,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
      this.gradientNormalizationThreshold = conf.getGradientNormalizationThreshold();
    }
  }
  @Override
  public List<Regularization> getRegularizationByParam(String paramName) {
    if (layerParams.isWeightParam(paramName)) {
@ -119,6 +122,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
    return null;
  }
  @JsonIgnore
  public SDLayerParams getLayerParams() {
    if (layerParams == null) {
      layerParams = new SDLayerParams();
@ -138,7 +142,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
    return null;
  }
  /**
   * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String,
   * long...)} and {@link SDLayerParams#addBiasParam(String, long...)}
@ -207,7 +210,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
        fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array);
  }
  /**
   * This method generates an "all ones" mask array for use in the SameDiff model when none is
   * provided.
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
@ -52,8 +52,8 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
 /** A layer with input and output, no parameters or gradients */
@NoArgsConstructor(force = true)
@Slf4j
-//@JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id")
+// @JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id")
-//@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class")
+// @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class")
 public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> implements Layer {
  private final @Getter List<String> variables = new ArrayList<>();
@ -80,10 +80,8 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
  protected DataType dataType;
  protected @Getter @Setter int iterationCount;
  protected @Getter @Setter int epochCount;
-  @JsonIgnore
+  @JsonIgnore private @Getter @Setter IModel net;
-  private @Getter @Setter IModel net;
+  @JsonIgnore @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration;
  @JsonIgnore
  @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration;
  public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) {
    //noinspection unchecked
@ -95,19 +93,18 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
    }
    this.dataType = dataType;
    if (layerConfiguration.getNetConfiguration() == null) {
-      throw new RuntimeException("You cannot create a layer from a layer configuration, that is not part of any neural network configuration.");
+      throw new RuntimeException(
-      }
+          "You cannot create a layer from a layer configuration, that is not part of any neural network configuration.");
    }
    this.net = layerConfiguration.getNetConfiguration().getNet();
  }
  public void addTrainingListeners(TrainingListener... listeners) {
-    if(listeners != null)
+    if (listeners != null) trainingListeners.addAll(List.of(listeners));
    trainingListeners.addAll(List.of(listeners));
  }
  public void addTrainingListeners(Collection<TrainingListener> listeners) {
-    if(listeners != null)
+    if (listeners != null) trainingListeners.addAll(listeners);
    trainingListeners.addAll(listeners);
  }
  @Override
@ -471,7 +468,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
  @Override
  public int getInputMiniBatchSize() {
-    if(input==null) return 0;
+    if (input == null) return 0;
    return (int) input.size(0);
  }
@ -565,8 +562,9 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
   */
  @Override
  public void setParamTable(Map<String, INDArray> paramTable) {
-    log.warn("Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName());
+    log.warn(
-    //throw new RuntimeException("Not implemented");
+        "Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName());
    // throw new RuntimeException("Not implemented");
  }
  /**
@ -578,7 +576,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
   */
  @Override
  public Map<String, INDArray> getParamTable(boolean isBackprop) {
-//    throw new RuntimeException("Not implemented");
+    //    throw new RuntimeException("Not implemented");
    return null;
  }
@ -590,7 +588,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
   */
  @Override
  public INDArray getParams() {
-    //throw new RuntimeException("Not implemented");
+    // throw new RuntimeException("Not implemented");
    return null;
  }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
@ -662,6 +662,7 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>
   */
  public boolean hasBias() {
    // Overridden by layers supporting no bias mode: dense, output, convolutional, embedding
    //return true;
    return true;
  }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java
@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.ITraininableLayerConfiguration;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.conf.CacheMode;
 import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration;
 import org.deeplearning4j.nn.conf.misc.DummyConfig;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
@ -88,6 +89,8 @@ public class FrozenLayer extends BaseWrapperLayer {
        return underlying.activate(input, false, workspaceMgr);
    }
    @Override
    public void fit() {
        if (!logFit) {
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java
@ -47,163 +47,188 @@ import org.nd4j.linalg.factory.Broadcast;
 import org.nd4j.linalg.factory.Nd4j;
 public class Convolution1DLayer extends ConvolutionLayer {
-    public Convolution1DLayer(LayerConfiguration conf, DataType dataType) {
+  public Convolution1DLayer(LayerConfiguration conf, DataType dataType) {
-        super(conf, dataType);
+    super(conf, dataType);
  }
  @Override
  public Pair<Gradient, INDArray> backpropGradient(
      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (epsilon.rank() != 3)
      throw new DL4JInvalidInputException(
          "Got rank "
              + epsilon.rank()
              + " array as epsilon for Convolution1D backprop with shape "
              + Arrays.toString(epsilon.shape())
              + ". Expected rank 3 array with shape [minibatchSize, features, length]. "
              + layerId());
    Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr);
    IActivation afn = getTypedLayerConfiguration().getActivationFn();
    INDArray delta =
        afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params
    org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration();
    Conv1DConfig conf =
        Conv1DConfig.builder()
            .k(c.getKernelSize()[0])
            .s(c.getStride()[0])
            .d(c.getDilation()[0])
            .p(c.getPadding()[0])
            .dataFormat(Conv1DConfig.NCW)
            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
            .build();
    INDArray w =
        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
    INDArray[] inputArrs;
    INDArray[] outputArrs;
    INDArray wg =
        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
            gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat());
    INDArray epsOut =
        workspaceMgr.createUninitialized(
            ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
    INDArray input = this.input.castTo(dataType);
    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
      input = input.permute(0, 2, 1); // NHWC to NCHW
    }
-
+    if (getTypedLayerConfiguration().hasBias()) {
-    @Override
+      INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
-    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
+      b = b.reshape(b.length());
-        assertInputSet(true);
+      inputArrs = new INDArray[] {input, w, b, delta};
-        if (epsilon.rank() != 3)
+      INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
-            throw new DL4JInvalidInputException("Got rank " + epsilon.rank()
+      bg = bg.reshape(bg.length());
-                    + " array as epsilon for Convolution1D backprop with shape "
+      outputArrs = new INDArray[] {epsOut, wg, bg};
-                    + Arrays.toString(epsilon.shape())
+    } else {
-                    + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + layerId());
+      inputArrs = new INDArray[] {input, w, delta};
-        Pair<INDArray,INDArray> fwd = preOutput(false,true,workspaceMgr);
+      outputArrs = new INDArray[] {epsOut, wg};
        IActivation afn = getTypedLayerConfiguration().getActivationFn();
        INDArray delta = afn.backprop(fwd.getFirst(), epsilon).getFirst(); //TODO handle activation function params
        Convolution1D c = getTypedLayerConfiguration();
        Conv1DConfig conf = Conv1DConfig.builder()
                .k(c.getKernelSize()[0])
                .s(c.getStride()[0])
                .d(c.getDilation()[0])
                .p(c.getPadding()[0])
                .dataFormat(Conv1DConfig.NCW)
                .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
                .build();
        INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
                getParam(ConvolutionParamInitializer.WEIGHT_KEY),
                RNNFormat.NCW);
        INDArray[] inputArrs;
        INDArray[] outputArrs;
        INDArray wg = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
                gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
                getRnnDataFormat());
        INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
        INDArray input = this.input.castTo(dataType);
        if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
            input = input.permute(0,2,1); //NHWC to NCHW
        }
        if(getTypedLayerConfiguration().hasBias()) {
            INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
            b = b.reshape(b.length());
            inputArrs = new INDArray[]{input, w, b, delta};
            INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
            bg = bg.reshape(bg.length());
            outputArrs = new INDArray[]{epsOut, wg, bg};
        } else {
            inputArrs = new INDArray[]{input, w, delta};
            outputArrs = new INDArray[]{epsOut, wg};
        }
        Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
        Nd4j.exec(op);
        Gradient retGradient = new DefaultGradient();
        if(getTypedLayerConfiguration().hasBias()) {
            retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
        }
        retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), 'c');
        if (getRnnDataFormat() == RNNFormat.NWC) {
            epsOut = epsOut.permute(0, 2, 1);
        }
        return new Pair<>(retGradient, epsOut);
    }
-    @Override
+    Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
-    protected Pair<INDArray, INDArray> preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+    Nd4j.exec(op);
-        Pair<INDArray,INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
+
-        INDArray p3d = preOutput.getFirst();
+    Gradient retGradient = new DefaultGradient();
-        INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
+    if (getTypedLayerConfiguration().hasBias()) {
-        preOutput.setFirst(p);
+      retGradient.setGradientFor(
-        return preOutput;
+          ConvolutionParamInitializer.BIAS_KEY,
          gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
    }
    retGradient.setGradientFor(
        ConvolutionParamInitializer.WEIGHT_KEY,
        gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
        'c');
    if (getRnnDataFormat() == RNNFormat.NWC) {
      epsOut = epsOut.permute(0, 2, 1);
    }
    return new Pair<>(retGradient, epsOut);
  }
  @Override
  protected Pair<INDArray, INDArray> preOutput4d(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
    INDArray p3d = preOutput.getFirst();
    INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
    preOutput.setFirst(p);
    return preOutput;
  }
  @Override
  protected Pair<INDArray, INDArray> preOutput(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(false);
    INDArray input = this.input.castTo(dataType);
    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
      input = input.permute(0, 2, 1); // NHWC to NCHW
    }
-    @Override
+    org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration();
-    protected Pair<INDArray,INDArray> preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+    Conv1DConfig conf =
-        assertInputSet(false);
+        Conv1DConfig.builder()
            .k(c.getKernelSize()[0])
            .s(c.getStride()[0])
            .d(c.getDilation()[0])
            .p(c.getPadding()[0])
            .dataFormat(Conv1DConfig.NCW)
            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
            .build();
-        INDArray input = this.input.castTo(dataType);
+    INDArray w =
-        if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
+        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
-            input = input.permute(0,2,1); //NHWC to NCHW
+            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
        }
-        Convolution1D c = getTypedLayerConfiguration();
+    INDArray[] inputs;
-        Conv1DConfig conf = Conv1DConfig.builder()
+    if (getTypedLayerConfiguration().hasBias()) {
-                .k(c.getKernelSize()[0])
+      INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
-                .s(c.getStride()[0])
+      b = b.reshape(b.length());
-                .d(c.getDilation()[0])
+      inputs = new INDArray[] {input, w, b};
-                .p(c.getPadding()[0])
+    } else {
-                .dataFormat(Conv1DConfig.NCW)
+      inputs = new INDArray[] {input, w};
                .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
                .build();
        INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
                getParam(ConvolutionParamInitializer.WEIGHT_KEY)
                ,RNNFormat.NCW);
        INDArray[] inputs;
        if(getTypedLayerConfiguration().hasBias()) {
            INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
            b = b.reshape(b.length());
            inputs = new INDArray[]{input, w, b};
        } else {
            inputs = new INDArray[]{input, w};
        }
        Conv1D op = new Conv1D(inputs, null, conf);
        List<LongShapeDescriptor> outShape = op.calculateOutputShape();
        op.setOutputArgument(0, Nd4j.create(outShape.get(0), false));
        Nd4j.exec(op);
        INDArray output = op.getOutputArgument(0);
        if(getRnnDataFormat() == RNNFormat.NWC) {
            output = output.permute(0,2,1);
        }
        return new Pair<>(output, null);
    }
    Conv1D op = new Conv1D(inputs, null, conf);
    List<LongShapeDescriptor> outShape = op.calculateOutputShape();
    op.setOutputArgument(0, Nd4j.create(outShape.get(0), false));
    Nd4j.exec(op);
    INDArray output = op.getOutputArgument(0);
-    @Override
+    if (getRnnDataFormat() == RNNFormat.NWC) {
-    public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
+      output = output.permute(0, 2, 1);
        INDArray act4d = super.activate(training, workspaceMgr);
        INDArray act3d = act4d.rank() > 3 ?
                act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
        if(maskArray != null) {
            INDArray maskOut = feedForwardMaskArray(maskArray, MaskState.Active, (int)act3d.size(0)).getFirst();
            Preconditions.checkState(act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
                    "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
                    act3d.shape(), maskOut.shape());
            Broadcast.mul(act3d, maskOut, act3d, 0, 2);
        }
        return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, act3d);   //Should be zero copy most of the time
    }
-    @Override
+    return new Pair<>(output, null);
-    public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
+  }
-                                                          int minibatchSize) {
+
-        INDArray reduced = Convolution2DUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0],
+  @Override
-                getTypedLayerConfiguration().getStride()[0], getTypedLayerConfiguration().getPadding()[0], getTypedLayerConfiguration().getDilation()[0],
+  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
-                getTypedLayerConfiguration().getConvolutionMode());
+    INDArray act4d = super.activate(training, workspaceMgr);
-        return new Pair<>(reduced, currentMaskState);
+    INDArray act3d =
        act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
    if (maskArray != null) {
      INDArray maskOut =
          feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst();
      Preconditions.checkState(
          act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
          "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
          act3d.shape(),
          maskOut.shape());
      Broadcast.mul(act3d, maskOut, act3d, 0, 2);
    }
-    @Override
+    return workspaceMgr.leverageTo(
-    public Convolution1D getTypedLayerConfiguration() {
+        ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time
-        return (Convolution1D)layerConfiguration;
+  }
    }
-    private RNNFormat getRnnDataFormat(){
+  @Override
-        return getTypedLayerConfiguration().getRnnDataFormat();
+  public Pair<INDArray, MaskState> feedForwardMaskArray(
-    }
+      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
    INDArray reduced =
        Convolution2DUtils.cnn1dMaskReduction(
            maskArray,
            getTypedLayerConfiguration().getKernelSize()[0],
            getTypedLayerConfiguration().getStride()[0],
            getTypedLayerConfiguration().getPadding()[0],
            getTypedLayerConfiguration().getDilation()[0],
            getTypedLayerConfiguration().getConvolutionMode());
    return new Pair<>(reduced, currentMaskState);
  }
  private RNNFormat getRnnDataFormat() {
    return getTypedLayerConfiguration().getRnnDataFormat();
  }
 /**
 *
   * @return
 */
  @Override
  public Convolution1D getTypedLayerConfiguration() {
    return (Convolution1D) super.getTypedLayerConfiguration();
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java
@ -0,0 +1,226 @@
 /*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */
 package org.deeplearning4j.nn.layers.convolution;
 import java.util.Arrays;
 import java.util.List;
 import org.deeplearning4j.exception.DL4JInvalidInputException;
 import org.deeplearning4j.nn.api.MaskState;
 import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
 import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
 import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
 import org.deeplearning4j.nn.workspace.ArrayType;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.deeplearning4j.util.Convolution1DUtils;
 import org.deeplearning4j.util.Convolution2DUtils;
 import org.nd4j.common.base.Preconditions;
 import org.nd4j.common.primitives.Pair;
 import org.nd4j.linalg.activations.IActivation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1D;
 import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1DDerivative;
 import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv1DConfig;
 import org.nd4j.linalg.api.shape.LongShapeDescriptor;
 import org.nd4j.linalg.factory.Broadcast;
 import org.nd4j.linalg.factory.Nd4j;
 public class Convolution1DNewLayer<Layer_ConfT extends Convolution1DNew>
    extends ConvolutionNewLayer<Layer_ConfT> {
  public Convolution1DNewLayer(LayerConfiguration conf, DataType dataType) {
    super(conf, dataType);
  }
  @Override
  public Pair<Gradient, INDArray> backpropGradient(
      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (epsilon.rank() != 3)
      throw new DL4JInvalidInputException(
          "Got rank "
              + epsilon.rank()
              + " array as epsilon for Convolution1D backprop with shape "
              + Arrays.toString(epsilon.shape())
              + ". Expected rank 3 array with shape [minibatchSize, features, length]. "
              + layerId());
    Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr);
    IActivation afn = getTypedLayerConfiguration().getActivationFn();
    INDArray delta =
        afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params
    Convolution1DNew c = getTypedLayerConfiguration();
    Conv1DConfig conf =
        Conv1DConfig.builder()
            .k(c.getKernelSize()[0])
            .s(c.getStride()[0])
            .d(c.getDilation()[0])
            .p(c.getPadding()[0])
            .dataFormat(Conv1DConfig.NCW)
            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
            .build();
    INDArray w =
        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
    INDArray[] inputArrs;
    INDArray[] outputArrs;
    INDArray wg =
        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
            gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat());
    INDArray epsOut =
        workspaceMgr.createUninitialized(
            ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
    INDArray input = this.input.castTo(dataType);
    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
      input = input.permute(0, 2, 1); // NHWC to NCHW
    }
    if (getTypedLayerConfiguration().hasBias()) {
      INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
      b = b.reshape(b.length());
      inputArrs = new INDArray[] {input, w, b, delta};
      INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
      bg = bg.reshape(bg.length());
      outputArrs = new INDArray[] {epsOut, wg, bg};
    } else {
      inputArrs = new INDArray[] {input, w, delta};
      outputArrs = new INDArray[] {epsOut, wg};
    }
    Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
    Nd4j.exec(op);
    Gradient retGradient = new DefaultGradient();
    if (getTypedLayerConfiguration().hasBias()) {
      retGradient.setGradientFor(
          ConvolutionParamInitializer.BIAS_KEY,
          gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
    }
    retGradient.setGradientFor(
        ConvolutionParamInitializer.WEIGHT_KEY,
        gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
        'c');
    if (getRnnDataFormat() == RNNFormat.NWC) {
      epsOut = epsOut.permute(0, 2, 1);
    }
    return new Pair<>(retGradient, epsOut);
  }
  @Override
  protected Pair<INDArray, INDArray> preOutput4d(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
    INDArray p3d = preOutput.getFirst();
    INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
    preOutput.setFirst(p);
    return preOutput;
  }
  @Override
  protected Pair<INDArray, INDArray> preOutput(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(false);
    INDArray input = this.input.castTo(dataType);
    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
      input = input.permute(0, 2, 1); // NHWC to NCHW
    }
    Convolution1DNew c = getTypedLayerConfiguration();
    Conv1DConfig conf =
        Conv1DConfig.builder()
            .k(c.getKernelSize()[0])
            .s(c.getStride()[0])
            .d(c.getDilation()[0])
            .p(c.getPadding()[0])
            .dataFormat(Conv1DConfig.NCW)
            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
            .build();
    INDArray w =
        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
    INDArray[] inputs;
    if (getTypedLayerConfiguration().hasBias()) {
      INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
      b = b.reshape(b.length());
      inputs = new INDArray[] {input, w, b};
    } else {
      inputs = new INDArray[] {input, w};
    }
    Conv1D op = new Conv1D(inputs, null, conf);
    List<LongShapeDescriptor> outShape = op.calculateOutputShape();
    op.setOutputArgument(0, Nd4j.create(outShape.get(0), false));
    Nd4j.exec(op);
    INDArray output = op.getOutputArgument(0);
    if (getRnnDataFormat() == RNNFormat.NWC) {
      output = output.permute(0, 2, 1);
    }
    return new Pair<>(output, null);
  }
  @Override
  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
    INDArray act4d = super.activate(training, workspaceMgr);
    INDArray act3d =
        act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
    if (maskArray != null) {
      INDArray maskOut =
          feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst();
      Preconditions.checkState(
          act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
          "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
          act3d.shape(),
          maskOut.shape());
      Broadcast.mul(act3d, maskOut, act3d, 0, 2);
    }
    return workspaceMgr.leverageTo(
        ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time
  }
  @Override
  public Pair<INDArray, MaskState> feedForwardMaskArray(
      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
    INDArray reduced =
        Convolution2DUtils.cnn1dMaskReduction(
            maskArray,
            getTypedLayerConfiguration().getKernelSize()[0],
            getTypedLayerConfiguration().getStride()[0],
            getTypedLayerConfiguration().getPadding()[0],
            getTypedLayerConfiguration().getDilation()[0],
            getTypedLayerConfiguration().getConvolutionMode());
    return new Pair<>(reduced, currentMaskState);
  }
  private RNNFormat getRnnDataFormat() {
    return getTypedLayerConfiguration().getRnnDataFormat();
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java
@ -0,0 +1,753 @@
 /*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */
 package org.deeplearning4j.nn.layers.convolution;
 import java.util.Arrays;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.common.config.DL4JClassLoading;
 import org.deeplearning4j.exception.DL4JInvalidInputException;
 import org.deeplearning4j.nn.api.MaskState;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.CacheMode;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
 import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
 import org.deeplearning4j.nn.layers.BaseLayer;
 import org.deeplearning4j.nn.layers.LayerHelper;
 import org.deeplearning4j.nn.layers.mkldnn.MKLDNNConvHelper;
 import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
 import org.deeplearning4j.nn.workspace.ArrayType;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.deeplearning4j.util.Convolution2DUtils;
 import org.nd4j.common.primitives.Pair;
 import org.nd4j.linalg.activations.IActivation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.memory.MemoryWorkspace;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.shape.Shape;
 import org.nd4j.linalg.convolution.Convolution;
 import org.nd4j.linalg.exception.ND4JArraySizeException;
 import org.nd4j.linalg.exception.ND4JOpProfilerException;
 import org.nd4j.linalg.factory.Nd4j;
@Slf4j
 public class ConvolutionNewLayer<
        LayerConf_T extends org.deeplearning4j.nn.conf.layers.Convolution1DNew>
    extends BaseLayer<org.deeplearning4j.nn.conf.layers.Convolution1DNew> {
  protected INDArray i2d;
  protected ConvolutionHelper helper = null;
  protected int helperCountFail = 0;
  protected ConvolutionMode convolutionMode;
  protected transient INDArray dummyBias; // Used only when: hasBias == false AND helpers are used
  protected transient INDArray dummyBiasGrad; // As above
  public ConvolutionNewLayer(LayerConfiguration conf, DataType dataType) {
    super(conf, dataType);
    initializeHelper();
    if (conf instanceof Convolution1DNew) {
      convolutionMode = ((Convolution1DNew) conf).getConvolutionMode();
    } else if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) {
      convolutionMode =
          ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode();
    }
  }
  void initializeHelper() {
    String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend");
    if ("CUDA".equalsIgnoreCase(backend)) {
      helper =
          DL4JClassLoading.createNewInstance(
              "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper",
              ConvolutionHelper.class,
              dataType);
      log.debug("CudnnConvolutionHelper successfully initialized");
      if (!helper.checkSupported()) {
        helper = null;
      }
    } else if ("CPU".equalsIgnoreCase(backend)) {
      helper = new MKLDNNConvHelper(dataType);
      log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName());
    }
    if (helper != null && !helper.checkSupported()) {
      log.debug("Removed helper {} as not supported", helper.getClass());
      helper = null;
    }
  }
  @Override
  public Type type() {
    return Type.CONVOLUTIONAL;
  }
 /**
 *
   * @return
 */
  @Override
  public Convolution1DNew getTypedLayerConfiguration() {
    return super.getTypedLayerConfiguration();
  }
  @Override
  public Pair<Gradient, INDArray> backpropGradient(
      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    INDArray weights =
        getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr);
    INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr);
    INDArray input = this.input.castTo(dataType); // No op if correct type
    if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType);
    INDArray origInput = input;
    INDArray origEps = epsilon;
    if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) {
      input = input.permute(0, 3, 1, 2); // NHWC to NCHW
      epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW
    }
    long miniBatch = input.size(0);
    int inH = (int) input.size(2);
    int inW = (int) input.size(3);
    long outDepth = weights.size(0);
    long inDepth = weights.size(1);
    int kH = (int) weights.size(2);
    int kW = (int) weights.size(3);
    int[] dilation = getTypedLayerConfiguration().getDilation();
    int[] kernel = getTypedLayerConfiguration().getKernelSize();
    int[] strides = getTypedLayerConfiguration().getStride();
    int[] pad;
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
              null,
              convolutionMode,
              dilation,
              CNN2DFormat.NCHW); // Also performs validation
      pad =
          Convolution2DUtils.getSameModeTopLeftPadding(
              outSize, new int[] {inH, inW}, kernel, strides, dilation);
    } else {
      pad = getTypedLayerConfiguration().getPadding();
      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
              pad,
              convolutionMode,
              dilation,
              CNN2DFormat.NCHW); // Also performs validation
    }
    int outH = outSize[0];
    int outW = outSize[1];
    INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
    INDArray weightGradView =
        gradientViews.get(
            ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW]
    INDArray weightGradView2df =
        Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false)
            .transpose();
    INDArray delta;
    IActivation afn = getTypedLayerConfiguration().getActivationFn();
    Pair<INDArray, INDArray> p = preOutput4d(true, true, workspaceMgr);
    INDArray z = p.getFirst();
    CNN2DFormat f = getTypedLayerConfiguration().getConvFormat();
    if (f != CNN2DFormat.NCHW) {
      z = z.permute(0, 3, 1, 2); // NHWC to NCHW
    }
    delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params
    if (helper != null
        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
      INDArray helperDelta = delta;
      if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC)
        helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC
      if (!hasBias() && !(helper instanceof MKLDNNConvHelper)) {
        // MKL-DNN supports no bias, CuDNN doesn't
        if (dummyBiasGrad == null) {
          try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
            dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut());
          }
        }
        biasGradView = dummyBiasGrad;
      }
      Pair<Gradient, INDArray> ret = null;
      try {
        ret =
            helper.backpropGradient(
                origInput,
                weights,
                bias,
                helperDelta,
                kernel,
                strides,
                pad,
                biasGradView,
                weightGradView,
                afn,
                getTypedLayerConfiguration().getCudnnAlgoMode(),
                getTypedLayerConfiguration().getCudnnBwdFilterAlgo(),
                getTypedLayerConfiguration().getCudnnBwdDataAlgo(),
                convolutionMode,
                dilation,
                getTypedLayerConfiguration().getConvFormat(),
                workspaceMgr);
      } catch (ND4JOpProfilerException e) {
        throw e; // NaN panic etc for debugging
      } catch (Exception e) {
        if (e.getMessage().contains("Failed to allocate")) {
          // This is a memory exception - don't fallback to built-in implementation
          throw e;
        }
        if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
          helperCountFail++;
          if (helper instanceof MKLDNNConvHelper) {
            log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
          } else {
            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
          }
        } else {
          throw new RuntimeException(
              "Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false",
              e);
        }
      }
      if (ret != null) {
        // Backprop dropout, if present
        INDArray gradPostDropout = ret.getRight();
        gradPostDropout = backpropDropOutIfPresent(gradPostDropout);
        ret.setSecond(gradPostDropout);
        return ret;
      }
    }
    delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW]
    // Note: due to the permute in preOut, and the fact that we essentially do a
    // preOut.muli(epsilon), this reshape
    // should be zero-copy; only possible exception being sometimes with the "identity" activation
    // case
    INDArray delta2d =
        delta.reshape('c', outDepth, miniBatch * outH * outW); // Shape.newShapeNoCopy(delta,new
    // int[]{outDepth,miniBatch*outH*outW},false);
    // Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input
    // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
    // To get this: create an array of the order we want, permute it to the order required by im2col
    // implementation, and then do im2col on that
    // to get old order from required order: permute(0,3,4,5,1,2)
    INDArray im2col2d =
        p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not
    if (im2col2d == null) {
      INDArray col =
          Nd4j.createUninitialized(
              dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
      INDArray col2 = col.permute(0, 3, 4, 5, 1, 2);
      Convolution.im2col(
          input,
          kH,
          kW,
          strides[0],
          strides[1],
          pad[0],
          pad[1],
          dilation[0],
          dilation[1],
          convolutionMode == ConvolutionMode.Same,
          col2);
      // Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape
      im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW);
    }
    // Calculate weight gradients, using cc->c mmul.
    // weightGradView2df is f order, but this is because it's transposed from c order
    // Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c
    // order, not usual f order
    Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0);
    // Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally
    // in c order for some reason)
    INDArray wPermuted =
        weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order
    INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth);
    // Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format
    // before col2im reduction)
    // Note: cc -> f mmul here, then reshape to 6d in f order
    INDArray epsNext2d =
        w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array?
    INDArray eps6d =
        Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true);
    // Calculate epsilonNext by doing im2col reduction.
    // Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW]
    // currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first
    eps6d = eps6d.permute(5, 2, 1, 0, 4, 3);
    INDArray epsNextOrig =
        workspaceMgr.createUninitialized(
            ArrayType.ACTIVATION_GRAD,
            eps6d.dataType(),
            new long[] {inDepth, miniBatch, inH, inW},
            'c');
    // Note: we are execute col2im in a way that the output array should be used in a stride 1 muli
    // in the layer below... (same strides as zs/activations)
    INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3);
    Convolution.col2im(
        eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]);
    Gradient retGradient = new DefaultGradient();
    if (getTypedLayerConfiguration().hasBias()) {
      delta2d.sum(biasGradView, 1); // biasGradView is initialized/zeroed first in sum op
      retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView);
    }
    retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c');
    weightNoiseParams.clear();
    epsNext = backpropDropOutIfPresent(epsNext);
    if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) {
      epsNext = epsNext.permute(0, 2, 3, 1); // NCHW to NHWC
    }
    return new Pair<>(retGradient, epsNext);
  }
  /**
   * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain
   * their standard non-4d preOutput method, while overriding this to return 4d activations (for use
   * in backprop) without modifying the public API
   */
  protected Pair<INDArray, INDArray> preOutput4d(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    return preOutput(training, forBackprop, workspaceMgr);
  }
  protected void validateInputRank() {
    // Input validation: expect rank 4 matrix
    if (input.rank() != 4) {
      String layerName = layerConfiguration.getName();
      if (layerName == null) layerName = "(not named)";
      throw new DL4JInvalidInputException(
          "Got rank "
              + input.rank()
              + " array as input to ConvolutionLayer (layer name = "
              + layerName
              + ", layer index = "
              + index
              + ") with shape "
              + Arrays.toString(input.shape())
              + ". "
              + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]."
              + (input.rank() == 2
                  ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)"
                  : "")
              + " "
              + layerId());
    }
  }
  protected void validateInputDepth(long inDepth) {
    CNN2DFormat format = getTypedLayerConfiguration().getConvFormat();
    int dim = format == CNN2DFormat.NHWC ? 3 : 1;
    if (input.size(dim) != inDepth) {
      String layerName = layerConfiguration.getName();
      if (layerName == null) layerName = "(not named)";
      String s =
          "Cannot do forward pass in Convolution layer (layer name = "
              + layerName
              + ", layer index = "
              + index
              + "): input array channels does not match CNN layer configuration"
              + " (data format = "
              + format
              + ", data input channels = "
              + input.size(dim)
              + ", "
              + getTypedLayerConfiguration().getConvFormat().dimensionNames()
              + "="
              + Arrays.toString(input.shape())
              + "; expected"
              + " input channels = "
              + inDepth
              + ") "
              + layerId();
      int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3;
      if (input.size(dimIfWrongFormat) == inDepth) {
        // User might have passed NCHW data to a NHWC net, or vice versa?
        s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG;
      }
      throw new DL4JInvalidInputException(s);
    }
  }
  /**
   * PreOutput method that also returns the im2col2d array (if being called for backprop), as this
   * can be re-used instead of being calculated again.
   *
   * @param training Train or test time (impacts dropout)
   * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return
   *     null for second pair entry. Note that it may still be null in the case of CuDNN and the
   *     like.
   * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array
   */
  protected Pair<INDArray, INDArray> preOutput(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(false);
    INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr);
    INDArray weights =
        getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr);
    validateInputRank();
    INDArray input = this.input.castTo(dataType);
    INDArray inputOrig = input;
    if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
      input = input.permute(0, 3, 1, 2).dup(); // NHWC to NCHW
    }
    long miniBatch = input.size(0);
    long outDepth = weights.size(0);
    long inDepth = weights.size(1);
    validateInputDepth(inDepth);
    long kH = weights.size(2);
    long kW = weights.size(3);
    int[] dilation = getTypedLayerConfiguration().getDilation();
    int[] kernel = getTypedLayerConfiguration().getKernelSize();
    int[] strides = getTypedLayerConfiguration().getStride();
    int[] pad;
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
              null,
              convolutionMode,
              dilation,
              CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method
      if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE)
        throw new ND4JArraySizeException();
      int[] inWidthHeight;
      //  if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW)
      // TODO: Switch hardcoded state later. For now, convolution is implemented as
      // switch to NCHW then permute back for NWHC
      inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)};
      /*     else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) {
          inWidthHeight =  new int[] {(int) input.size(1), (int) input.size(2)};
      }
      else
           throw new IllegalStateException("No data format configured!");*/
      pad =
          Convolution2DUtils.getSameModeTopLeftPadding(
              outSize, inWidthHeight, kernel, strides, dilation);
    } else {
      pad = getTypedLayerConfiguration().getPadding();
      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
              pad,
              convolutionMode,
              dilation,
              CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method
    }
    int outH = outSize[0];
    int outW = outSize[1];
    if (helper != null
        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
      if (preOutput != null && forBackprop) {
        return new Pair<>(preOutput, null);
      }
      // For no-bias convolutional layers: use an empty (all 0s) value for biases
      if (!hasBias()) {
        if (dummyBias == null) {
          try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
            dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut());
          }
        }
        bias = dummyBias;
      }
      INDArray ret = null;
      try {
        ret =
            helper.preOutput(
                inputOrig,
                weights,
                bias,
                kernel,
                strides,
                pad,
                getTypedLayerConfiguration().getCudnnAlgoMode(),
                getTypedLayerConfiguration().getCudnnFwdAlgo(),
                convolutionMode,
                dilation,
                getTypedLayerConfiguration().getConvFormat(),
                workspaceMgr);
      } catch (ND4JOpProfilerException e) {
        throw e; // NaN panic etc for debugging
      } catch (Exception e) {
        if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
          // This is a memory exception - don't fallback to built-in implementation
          throw e;
        }
        if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
          helperCountFail++;
          if (helper instanceof MKLDNNConvHelper) {
            log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
          } else {
            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
          }
        } else {
          throw new RuntimeException(
              "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
              e);
        }
      }
      if (ret != null) {
        return new Pair<>(ret, null);
      }
    }
    if (preOutput != null && i2d != null && forBackprop) {
      return new Pair<>(preOutput, i2d);
    }
    // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input
    // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
    // To get this: create an array of the order we want, permute it to the order required by im2col
    // implementation, and then do im2col on that
    // to get old order from required order: permute(0,3,4,5,1,2)
    // Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through
    // the rows post-reshape
    INDArray col =
        Nd4j.createUninitialized(
            weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
    int[] permute = new int[] {0, 3, 4, 5, 1, 2};
    INDArray col2 = col.permute(permute);
    INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float
    if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException();
    Convolution.im2col(
        im2ColIn,
        (int) kH,
        (int) kW,
        strides[0],
        strides[1],
        pad[0],
        pad[1],
        dilation[0],
        dilation[1],
        convolutionMode == ConvolutionMode.Same,
        col2);
    INDArray im2col2d =
        Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false);
    // Current order of weights: [depthOut,depthIn,kH,kW], c order
    // Permute to give [kW,kH,depthIn,depthOut], f order
    // Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless
    // weights aren't in c order for some reason
    INDArray permutedW = weights.permute(3, 2, 1, 0);
    INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth);
    // Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut]
    INDArray z =
        workspaceMgr.createUninitialized(
            ArrayType.ACTIVATIONS,
            weights.dataType(),
            new long[] {im2col2d.size(0), reshapedW.size(1)},
            'f');
    im2col2d.mmuli(reshapedW, z);
    // Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is
    // [miniBatch*outH*outW,depthOut] -> addiRowVector
    if (getTypedLayerConfiguration().hasBias()) {
      z.addiRowVector(bias);
    }
    // Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order:
    // [miniBatch,outDepth,outH,outW];
    z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true);
    z = z.permute(2, 3, 1, 0);
    if (training
        && cacheMode != CacheMode.NONE
        && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
        && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
      try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
        i2d = im2col2d.unsafeDuplication();
      }
    }
    if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
      z = z.permute(0, 2, 3, 1); // NCHW to NHWC
      z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z);
    }
    return new Pair<>(z, forBackprop ? im2col2d : null);
  }
  @Override
  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
    if (input == null) {
      throw new IllegalArgumentException(
          "Cannot perform forward pass with null input " + layerId());
    }
    if (cacheMode == null) cacheMode = CacheMode.NONE;
    applyDropOutIfNecessary(training, workspaceMgr);
    INDArray z = preOutput(training, false, workspaceMgr).getFirst();
    // we do cache only if cache workspace exists. Skip otherwise
    if (training
        && cacheMode != CacheMode.NONE
        && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
        && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
      try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
        preOutput = z.unsafeDuplication();
      }
    }
    // String afn = conf.getLayer().getActivationFunction();
    IActivation afn = getTypedLayerConfiguration().getActivationFn();
    if (helper != null
        && Shape.strideDescendingCAscendingF(z)
        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
      INDArray ret = null;
      try {
        ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training);
      } catch (ND4JOpProfilerException e) {
        throw e; // NaN panic etc for debugging
      } catch (Exception e) {
        if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
          // This is a memory exception - don't fallback to built-in implementation
          throw e;
        }
        if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
          helperCountFail++;
          if (helper instanceof MKLDNNConvHelper) {
            log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
          } else {
            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
          }
        } else {
          throw new RuntimeException(
              "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
              e);
        }
      }
      if (ret != null) {
        return ret;
      }
    }
    INDArray activation = afn.getActivation(z, training);
    return activation;
  }
  @Override
  public boolean hasBias() {
    return getTypedLayerConfiguration().hasBias();
  }
  @Override
  public boolean isPretrainLayer() {
    return false;
  }
  @Override
  public LayerHelper getHelper() {
    return helper;
  }
  @Override
  public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) {
    throw new UnsupportedOperationException("Not supported");
  }
  @Override
  public void setParamsTable(INDArray paramsTable) {
    // Override, as base layer does f order parameter flattening by default
    setParams(paramsTable, 'c');
  }
  @Override
  public Pair<INDArray, MaskState> feedForwardMaskArray(
      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
    if (maskArray == null) {
      // For same mode (with stride 1): output activations size is always same size as input
      // activations size -> mask array is same size
      return new Pair<>(maskArray, currentMaskState);
    }
    INDArray outMask =
        Convolution2DUtils.cnn2dMaskReduction(
            maskArray,
            getTypedLayerConfiguration().getKernelSize(),
            getTypedLayerConfiguration().getStride(),
            getTypedLayerConfiguration().getPadding(),
            getTypedLayerConfiguration().getDilation(),
            getTypedLayerConfiguration().getConvolutionMode());
    return new Pair<>(outMask, currentMaskState);
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java
@ -47,7 +47,7 @@ public class DenseLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Dens
  @Override
  public boolean hasBias() {
-    return super.hasBias();
+    return getTypedLayerConfiguration().isHasBias();
  }
  @Override
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
@ -37,195 +37,248 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.lossfunctions.ILossFunction;
-public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.layers.RnnOutputLayer> {
+public class RnnOutputLayer
    extends BaseOutputLayer<org.deeplearning4j.nn.conf.layers.RnnOutputLayer> {
-    public RnnOutputLayer(LayerConfiguration conf, DataType dataType) {
+  public RnnOutputLayer(LayerConfiguration conf, DataType dataType) {
-        super(conf, dataType);
+    super(conf, dataType);
  }
  @Override
  public Pair<Gradient, INDArray> backpropGradient(
      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (input.rank() != 3) {
      throw new UnsupportedOperationException(
          "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]."
              + " Got input with rank "
              + input.rank()
              + " and shape "
              + Arrays.toString(input.shape())
              + " - "
              + layerId());
    }
-    @Override
+    RNNFormat format = getTypedLayerConfiguration().getDataFormat();
-    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
+    int td = (format == RNNFormat.NCW) ? 2 : 1; //either NCW or NWC
-        assertInputSet(true);
+    Preconditions.checkState(
-        if (input.rank() != 3) {
+        labels.rank() == 3,
-            throw new UnsupportedOperationException(
+        "Expected rank 3 labels array, got label array with shape %ndShape",
-                    "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." +
+        labels);
-                            " Got input with rank " + input.rank() + " and shape " + Arrays.toString(input.shape()) + " - " + layerId());
+    Preconditions.checkState(
-        }
+        input.size(td) == labels.size(td),
        "Sequence lengths do not match for RnnOutputLayer input and labels:"
            + "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - "
            + "mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape.\n",
        input, "\n\n",
        labels);
-        RNNFormat format = getTypedLayerConfiguration().getDataFormat();
+    INDArray inputTemp = input;
-        int td = (format == RNNFormat.NCW) ? 2 : 1;
+    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
-        Preconditions.checkState(labels.rank() == 3, "Expected rank 3 labels array, got label array with shape %ndShape", labels);
+      this.input = input.permute(0, 2, 1);
        Preconditions.checkState(input.size(td) == labels.size(td), "Sequence lengths do not match for RnnOutputLayer input and labels:" +
                "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape", input, labels);
        INDArray inputTemp = input;
        if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
            this.input = input.permute(0, 2, 1);
        }
        this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM);
        applyDropOutIfNecessary(true, workspaceMgr);    //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients
        Pair<Gradient, INDArray> gradAndEpsilonNext = super.backpropGradient(epsilon, workspaceMgr);    //Also applies dropout
        this.input = inputTemp;
        INDArray epsilon2d = gradAndEpsilonNext.getSecond();
        INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
        if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
            epsilon3d = epsilon3d.permute(0, 2, 1);
        }
        weightNoiseParams.clear();
        //epsilon3d = backpropDropOutIfPresent(epsilon3d);
        return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d);
    }
-    /**{@inheritDoc}
+    this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM);
-     */
+
-    @Override
+    applyDropOutIfNecessary(
-    public double f1Score(INDArray examples, INDArray labels) {
+        true,
-        if (examples.rank() == 3)
+        workspaceMgr); // Edge case: we skip OutputLayer forward pass during training as this isn't
-            examples = TimeSeriesUtils.reshape3dTo2d(examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
+    // required to calculate gradients
-        if (labels.rank() == 3)
+
-            labels = TimeSeriesUtils.reshape3dTo2d(labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
+    Pair<Gradient, INDArray> gradAndEpsilonNext =
-        return super.f1Score(examples, labels);
+        super.backpropGradient(epsilon, workspaceMgr); // Also applies dropout
    this.input = inputTemp;
    INDArray epsilon2d = gradAndEpsilonNext.getSecond();
    INDArray epsilon3d =
        TimeSeriesUtils.reshape2dTo3d(
            epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
      epsilon3d = epsilon3d.permute(0, 2, 1);
    }
    weightNoiseParams.clear();
    // epsilon3d = backpropDropOutIfPresent(epsilon3d);
    return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d);
  }
  /** {@inheritDoc} */
  @Override
  public double f1Score(INDArray examples, INDArray labels) {
    if (examples.rank() == 3)
      examples =
          TimeSeriesUtils.reshape3dTo2d(
              examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
    if (labels.rank() == 3)
      labels =
          TimeSeriesUtils.reshape3dTo2d(
              labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
    return super.f1Score(examples, labels);
  }
  public INDArray getInput() {
    return input;
  }
  @Override
  public Layer.Type type() {
    return Layer.Type.RECURRENT;
  }
  @Override
  protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(false);
    if (input.rank() == 3) {
      // Case when called from RnnOutputLayer
      INDArray inputTemp = input;
      input =
          (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC)
              ? input.permute(0, 2, 1)
              : input;
      input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM);
      INDArray out = super.preOutput(training, workspaceMgr);
      this.input = inputTemp;
      return out;
    } else {
      // Case when called from BaseOutputLayer
      INDArray out = super.preOutput(training, workspaceMgr);
      return out;
    }
  }
  @Override
  protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
    INDArray labels = this.labels;
    if (labels.rank() == 3) {
      labels =
          (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC)
              ? labels.permute(0, 2, 1)
              : labels;
      return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType);
    }
    return labels;
  }
  @Override
  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
    INDArray input = this.input;
    if (input.rank() != 3)
      throw new UnsupportedOperationException(
          "Input must be rank 3. Got input with rank " + input.rank() + " " + layerId());
    INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr);
    INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr);
    applyDropOutIfNecessary(training, workspaceMgr);
    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
      input = input.permute(0, 2, 1);
    }
    INDArray input2d =
        TimeSeriesUtils.reshape3dTo2d(
            input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray act2d =
        getTypedLayerConfiguration()
            .getActivationFn()
            .getActivation(input2d.mmul(W).addiRowVector(b), training);
    if (maskArray != null) {
      if (!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())) {
        // Per output masking
        act2d.muli(maskArray.castTo(act2d.dataType()));
      } else {
        // Per time step masking
        act2d.muliColumnVector(maskArray.castTo(act2d.dataType()));
      }
    }
-    public INDArray getInput() {
+    INDArray ret =
-        return input;
+        TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
      ret = ret.permute(0, 2, 1);
    }
    return ret;
  }
  @Override
  public void setMaskArray(INDArray maskArray) {
    if (maskArray != null) {
      // Two possible cases:
      // (a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector)
      // (b) per output masking - rank 3 mask array  -> reshape to rank 2 (
      if (maskArray.rank() == 2) {
        this.maskArray =
            TimeSeriesUtils.reshapeTimeSeriesMaskToVector(
                maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
      } else if (maskArray.rank() == 3) {
        this.maskArray =
            TimeSeriesUtils.reshape3dTo2d(
                maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
      } else {
        throw new UnsupportedOperationException(
            "Invalid mask array: must be rank 2 or 3 (got: rank "
                + maskArray.rank()
                + ", shape = "
                + Arrays.toString(maskArray.shape())
                + ") "
                + layerId());
      }
    } else {
      this.maskArray = null;
    }
  }
  @Override
  public Pair<INDArray, MaskState> feedForwardMaskArray(
      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
    // If the *input* mask array is present and active, we should use it to mask the output
    if (maskArray != null && currentMaskState == MaskState.Active) {
      this.inputMaskArray =
          TimeSeriesUtils.reshapeTimeSeriesMaskToVector(
              maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
      this.inputMaskArrayState = currentMaskState;
    } else {
      this.inputMaskArray = null;
      this.inputMaskArrayState = null;
    }
-    @Override
+    return null; // Last layer in network
-    public Layer.Type type() {
+  }
-        return Layer.Type.RECURRENT;
+
  /**
   * Compute the score for each example individually, after labels and input have been set.
   *
   * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include
   *     regularization)
   * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith
   *     example
   */
  @Override
  public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) {
    // For RNN: need to sum up the score over each time step before returning.
    if (input == null || labels == null)
      throw new IllegalStateException(
          "Cannot calculate score without input and labels " + layerId());
    INDArray preOut = preOutput2d(false, workspaceMgr);
    ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction();
    INDArray scoreArray =
        lossFunction.computeScoreArray(
            getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM),
            preOut,
            getTypedLayerConfiguration().getActivationFn(),
            maskArray);
    // scoreArray: shape [minibatch*timeSeriesLength, 1]
    // Reshape it to [minibatch, timeSeriesLength] then sum over time step
    INDArray scoreArrayTs =
        TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int) input.size(0));
    INDArray summedScores = scoreArrayTs.sum(true, 1);
    if (fullNetRegTerm != 0.0) {
      summedScores.addi(fullNetRegTerm);
    }
-    @Override
+    return summedScores;
-    protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) {
+  }
        assertInputSet(false);
        if (input.rank() == 3) {
            //Case when called from RnnOutputLayer
            INDArray inputTemp = input;
            input = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? input.permute(0, 2, 1):input;
            input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM);
            INDArray out = super.preOutput(training, workspaceMgr);
            this.input = inputTemp;
            return out;
        } else {
            //Case when called from BaseOutputLayer
            INDArray out = super.preOutput(training, workspaceMgr);
            return out;
        }
    }
    @Override
    protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
        INDArray labels = this.labels;
        if (labels.rank() == 3){
            labels = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? labels.permute(0, 2, 1) : labels;
            return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType);
        }
        return labels;
    }
    @Override
    public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
        INDArray input = this.input;
        if (input.rank() != 3)
            throw new UnsupportedOperationException(
                            "Input must be rank 3. Got input with rank " + input.rank() + " " + layerId());
        INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr);
        INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr);
        applyDropOutIfNecessary(training, workspaceMgr);
        if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
            input = input.permute(0, 2, 1);
        }
        INDArray input2d = TimeSeriesUtils.reshape3dTo2d(input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM);
        INDArray act2d = getTypedLayerConfiguration().getActivationFn().getActivation(input2d.mmul(W).addiRowVector(b), training);
        if (maskArray != null) {
            if(!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())){
                //Per output masking
                act2d.muli(maskArray.castTo(act2d.dataType()));
            } else {
                //Per time step masking
                act2d.muliColumnVector(maskArray.castTo(act2d.dataType()));
            }
        }
        INDArray ret = TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
        if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
            ret = ret.permute(0, 2, 1);
        }
        return ret;
    }
    @Override
    public void setMaskArray(INDArray maskArray) {
        if (maskArray != null) {
            //Two possible cases:
            //(a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector)
            //(b) per output masking - rank 3 mask array  -> reshape to rank 2 (
            if (maskArray.rank() == 2) {
                this.maskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
            } else if (maskArray.rank() == 3) {
                this.maskArray = TimeSeriesUtils.reshape3dTo2d(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
            } else {
                throw new UnsupportedOperationException(
                                "Invalid mask array: must be rank 2 or 3 (got: rank " + maskArray.rank() + ", shape = "
                                                + Arrays.toString(maskArray.shape()) + ") " + layerId());
            }
        } else {
            this.maskArray = null;
        }
    }
    @Override
    public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
                    int minibatchSize) {
        //If the *input* mask array is present and active, we should use it to mask the output
        if (maskArray != null && currentMaskState == MaskState.Active) {
            this.inputMaskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
            this.inputMaskArrayState = currentMaskState;
        } else {
            this.inputMaskArray = null;
            this.inputMaskArrayState = null;
        }
        return null; //Last layer in network
    }
    /**Compute the score for each example individually, after labels and input have been set.
     *
     * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include regularization)
     * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example
     */
    @Override
    public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) {
        //For RNN: need to sum up the score over each time step before returning.
        if (input == null || labels == null)
            throw new IllegalStateException("Cannot calculate score without input and labels " + layerId());
        INDArray preOut = preOutput2d(false, workspaceMgr);
        ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction();
        INDArray scoreArray =
                        lossFunction.computeScoreArray(getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), preOut,
                                getTypedLayerConfiguration().getActivationFn(), maskArray);
        //scoreArray: shape [minibatch*timeSeriesLength, 1]
        //Reshape it to [minibatch, timeSeriesLength] then sum over time step
        INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0));
        INDArray summedScores = scoreArrayTs.sum(true, 1);
        if (fullNetRegTerm != 0.0) {
            summedScores.addi(fullNetRegTerm);
        }
        return summedScores;
    }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java
@ -47,7 +47,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
 import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
@Data
-public abstract class BaseWrapperLayer extends AbstractLayer {
+public abstract class BaseWrapperLayer<LayerConf_T extends LayerConfiguration> extends AbstractLayer {
  protected Layer underlying;
@ -57,8 +57,8 @@ public abstract class BaseWrapperLayer extends AbstractLayer {
  }
  @Override
-  public BaseLayerConfiguration getTypedLayerConfiguration() {
+  public LayerConf_T getTypedLayerConfiguration() {
-    return (BaseLayerConfiguration) underlying.getLayerConfiguration();
+    return (LayerConf_T) underlying.getLayerConfiguration();
  }
  /**
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
@ -712,7 +712,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork
        if (layer_conf instanceof BaseLayerConfiguration)
          ((BaseLayerConfiguration) layer_conf).setDataType(netDtype);
-        nParamsPerLayer[i] = layer_conf.initializer().numParams(layer_conf);
+        nParamsPerLayer[i] = layer_conf.numParams();
        paramLength += nParamsPerLayer[i];
      }
      log.debug(
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java
@ -0,0 +1,183 @@
 /*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */
 package org.deeplearning4j.nn.params;
 import java.util.*;
 import lombok.val;
 import org.deeplearning4j.nn.api.AbstractParamInitializer;
 import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
 import org.deeplearning4j.nn.weights.WeightInitUtil;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.indexing.NDArrayIndex;
 public class ConvolutionNewParamInitializer extends AbstractParamInitializer {
    private static final ConvolutionNewParamInitializer INSTANCE = new ConvolutionNewParamInitializer();
    public static ConvolutionNewParamInitializer getInstance() {
        return INSTANCE;
    }
    public final static String WEIGHT_KEY = DefaultParamInitializer.WEIGHT_KEY;
    public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY;
    @Override
    public long numParams(LayerConfiguration l) {
        return l.numParams();
    }
    @Override
    public List<String> paramKeys(LayerConfiguration layer) {
        ConvolutionLayer layerConf =
                (ConvolutionLayer) layer;
        if(layerConf.hasBias()){
            return Arrays.asList(WEIGHT_KEY, BIAS_KEY);
        } else {
            return weightKeys(layer);
        }
    }
    @Override
    public List<String> weightKeys(LayerConfiguration layer) {
        return Collections.singletonList(WEIGHT_KEY);
    }
    @Override
    public List<String> biasKeys(LayerConfiguration layer) {
        ConvolutionLayer layerConf =
                (ConvolutionLayer) layer;
        if(layerConf.hasBias()){
            return Collections.singletonList(BIAS_KEY);
        } else {
            return Collections.emptyList();
        }
    }
    @Override
    public boolean isWeightParam(LayerConfiguration layer, String key) {
        return WEIGHT_KEY.equals(key);
    }
    @Override
    public boolean isBiasParam(LayerConfiguration layer, String key) {
        return BIAS_KEY.equals(key);
    }
    @Override
    public Map<String, INDArray> init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) {
        Convolution1DNew layer = (Convolution1DNew) conf;
        if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");
        Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
        Convolution1DNew layerConf =
                        (Convolution1DNew) conf;
        val nOut = layerConf.getNOut();
        if(layer.hasBias()){
            //Standard case
            INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
            INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf)));
            params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
            params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
            conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
            conf.getNetConfiguration().addNetWideVariable(BIAS_KEY);
        } else {
            INDArray weightView = paramsView;
            params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
            conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
        }
        return params;
    }
    @Override
    public Map<String, INDArray> getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) {
        Convolution1DNew layerConf =
                        (Convolution1DNew) conf;
        int[] kernel = layerConf.getKernelSize();
        val nIn = layerConf.getNIn();
        val nOut = layerConf.getNOut();
        Map<String, INDArray> out = new LinkedHashMap<>();
        if(layerConf.hasBias()){
            //Standard case
            INDArray biasGradientView = gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
            INDArray weightGradientView =
                    gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf)))
                            .reshape('c', nOut, nIn, kernel[0], kernel[1]);
            out.put(BIAS_KEY, biasGradientView);
            out.put(WEIGHT_KEY, weightGradientView);
        } else {
            INDArray weightGradientView = gradientView.reshape('c', nOut, nIn, kernel[0], kernel[1]);
            out.put(WEIGHT_KEY, weightGradientView);
        }
        return out;
    }
    //1 bias per feature map
    protected INDArray createBias(LayerConfiguration conf, INDArray biasView, boolean initializeParams) {
        //the bias is a 1D tensor -- one bias per output feature map
        Convolution1DNew layerConf =
                        (Convolution1DNew) conf;
        if (initializeParams)
            biasView.assign(layerConf.getBiasInit());
        return biasView;
    }
    protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) {
        /*
         Create a 4d weight matrix of:
           (number of kernels, num input channels, kernel height, kernel width)
         Note c order is used specifically for the CNN weights, as opposed to f order elsewhere
         Inputs to the convolution layer are:
         (batch size, num input feature maps, image height, image width)
         */
        Convolution1DNew layerConf =
                        (Convolution1DNew) conf;
        if (initializeParams) {
            int[] kernel = layerConf.getKernelSize();
            int[] stride = layerConf.getStride();
            val inputDepth = layerConf.getNIn();
            val outputDepth = layerConf.getNOut();
            double fanIn = inputDepth * kernel[0] * kernel[1];
            double fanOut = outputDepth * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]);
            val weightsShape = new long[] {outputDepth, inputDepth, kernel[0], kernel[1]};
            return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView);
        } else {
            int[] kernel = layerConf.getKernelSize();
            return WeightInitUtil.reshapeWeights(
                            new long[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]}, weightView, 'c');
        }
    }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java
@ -20,7 +20,6 @@
 package org.deeplearning4j.util;
 import java.util.Arrays;
 import org.deeplearning4j.exception.DL4JInvalidConfigException;
 import org.deeplearning4j.exception.DL4JInvalidInputException;
@ -35,281 +34,332 @@ import org.nd4j.linalg.exception.ND4JArraySizeException;
 public class Convolution1DUtils {
-    private static final int ONE = 1;
+  private static final int ONE = 1;
  private Convolution1DUtils() {}
-    private Convolution1DUtils() {
+  public static int getOutputSize(
      INDArray inputData, int kernel, int strides, int padding, ConvolutionMode convolutionMode) {
    return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE);
  }
  /**
   * Returns true if the given layer has an {@link RNNFormat}. This is true for: {@link
   * Convolution1D}, {@link Subsampling1DLayer} {@link SimpleRnn} {@link LSTM} {@link
   * EmbeddingSequenceLayer}
   *
   * @param layer the layer to test
   * @return true if the input layer has an rnn format false otherwise
   */
  public static boolean hasRnnDataFormat(LayerConfiguration layer) {
    return layer instanceof Convolution1D
        || layer instanceof Convolution1D
        || layer instanceof Subsampling1DLayer
        || layer instanceof SimpleRnn
        || layer instanceof LSTM
        || layer instanceof EmbeddingSequenceLayer;
  }
  /**
   * Get the {@link RNNFormat} for the given layer. Throws an {@link IllegalArgumentException} if a
   * layer doesn't have an rnn format
   *
   * @param layer the layer to get the format for
   * @return the format for the layer
   */
  public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) {
    Preconditions.checkState(
        hasRnnDataFormat(layer),
        "ILayer of type "
            + layer.getClass().getName()
            + " and name "
            + layer.getName()
            + " does not have an RNNFormat");
    if (layer instanceof SimpleRnn) {
      SimpleRnn simpleRnn = (SimpleRnn) layer;
      return simpleRnn.getDataFormat();
    } else if (layer instanceof Convolution1D) {
      Convolution1D convolution1D = (Convolution1D) layer;
      return convolution1D.getRnnDataFormat();
    } else if (layer instanceof Convolution1D) {
      Convolution1D convolution1D = (Convolution1D) layer;
      return convolution1D.getRnnDataFormat();
    } else if (layer instanceof Subsampling1DLayer) {
      Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer;
      return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC;
    } else if (layer instanceof LSTM) {
      LSTM lstm = (LSTM) layer;
      return lstm.getDataFormat();
    } else if (layer instanceof EmbeddingSequenceLayer) {
      EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer;
      return embeddingSequenceLayer.getOutputDataFormat();
    } else {
      throw new IllegalArgumentException(
          "Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName());
    }
  }
  /**
   * Reshapes the given weight array or weight gradient to work with the specified {@link RNNFormat}
   *
   * @param w the weight array or gradient
   * @param rnnFormat the {@link RNNFormat} to use
   * @return the reshaped array.
   */
  public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) {
    if (rnnFormat == RNNFormat.NWC)
      w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2))
              .permute(2, 1, 0); // [oC, iC, k, 1] to [k, iC, oC]
    else {
      w = w.reshape(w.ordering(), w.size(2), w.size(1), w.size(0));
    }
    return w;
  }
-    public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
+  /**
-                                    ConvolutionMode convolutionMode) {
+   * Get the output size (height) for the given input data and CNN1D configuration
-        return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE);
+   *
   * @param inH Input size (height, or channels).
   * @param kernel Kernel size
   * @param strides Stride
   * @param padding Padding
   * @param convolutionMode Convolution mode (Same, Strict, Truncate)
   * @param dilation Kernel dilation
   * @return Output size (width)
   */
  public static long getOutputSize(
      long inH,
      int kernel,
      int strides,
      int padding,
      ConvolutionMode convolutionMode,
      int dilation) {
    long eKernel = effectiveKernelSize(kernel, dilation);
    if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
      return (int) Math.ceil(inH / ((double) strides));
    }
    return (inH - eKernel + 2L * padding) / strides + 1;
  }
  /**
   * Get the output size (height) for the given input data and CNN1D configuration
   *
   * @param inputData Input data
   * @param kernel Kernel size
   * @param strides Stride
   * @param padding Padding
   * @param convolutionMode Convolution mode (Same, Strict, Truncate)
   * @param dilation Kernel dilation
   * @return Output size (width)
   */
  public static int getOutputSize(
      INDArray inputData,
      int kernel,
      int strides,
      int padding,
      ConvolutionMode convolutionMode,
      int dilation) {
    if (inputData.size(2) > Integer.MAX_VALUE) throw new ND4JArraySizeException();
    int inH = (int) inputData.size(2);
    int eKernel = effectiveKernelSize(kernel, dilation);
    boolean atrous = (eKernel == kernel);
    validateShapes(inputData, eKernel, strides, padding, convolutionMode, dilation, inH, atrous);
    if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
      int outH = (int) Math.ceil(inH / ((double) strides));
      return outH;
    }
-    /**
+    int outH = (inH - eKernel + 2 * padding) / strides + 1;
-     * Returns true if the given layer has an
+    return outH;
-     * {@link RNNFormat}.
+  }
-     * This is true for:
+
-     * {@link Convolution1D},
+  public static void validateShapes(
-     * {@link Subsampling1DLayer}
+      INDArray inputData,
-     * {@link SimpleRnn}
+      int eKernel,
-     * {@link LSTM}
+      int strides,
-     * {@link EmbeddingSequenceLayer}
+      int padding,
-     * @param layer the layer to test
+      ConvolutionMode convolutionMode,
-     * @return true if the input layer has an rnn format
+      int dilation,
-     * false otherwise
+      int inShape,
-     */
+      boolean atrous) {
-    public static boolean hasRnnDataFormat(LayerConfiguration layer) {
+
-        return layer instanceof Convolution1D ||
+    int inH = inShape;
-                layer instanceof Convolution1D ||
+    boolean t = convolutionMode == ConvolutionMode.Truncate;
-                layer instanceof Subsampling1DLayer ||
+
-                layer instanceof SimpleRnn ||
+    if (t && (eKernel <= 0 || eKernel > inH + 2 * padding)) {
-                layer instanceof LSTM ||
+      StringBuilder sb = new StringBuilder();
-                layer instanceof EmbeddingSequenceLayer;
+      sb.append("Invalid input data or configuration: ");
      if (atrous) sb.append("effective ");
      sb.append("kernel height and input height must satisfy 0 < ");
      if (atrous) sb.append("effective ");
      sb.append("kernel height <= input height + 2 * padding height. \nGot ");
      if (atrous) sb.append("effective ");
      sb.append("kernel height = ")
          .append(eKernel)
          .append(", input height = ")
          .append(inH)
          .append(" and padding height = ")
          .append(padding)
          .append(" which do not satisfy 0 < ")
          .append(eKernel)
          .append(" <= ")
          .append(inH + 2 * padding)
          .append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation));
      throw new DL4JInvalidInputException(sb.toString());
    }
-    /**
+    if (convolutionMode == ConvolutionMode.Strict) {
-     * Get the {@link RNNFormat} for the given layer.
+      if ((inH - eKernel + 2 * padding) % strides != 0) {
-     * Throws an {@link IllegalArgumentException}
+        double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0;
-     * if a layer doesn't have an rnn format
+        String str = String.format("%.2f", d);
-     * @param layer the layer to get the format for
+        int truncated = (int) d;
-     * @return the format for the layer
+        int sameSize = (int) Math.ceil(inH / ((double) strides));
-     */
+
-    public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) {
+        String sb =
-        Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getName() + " does not have an RNNFormat");
+            "Invalid input data or configuration: Combination of kernel size, "
-        if(layer instanceof SimpleRnn) {
+                + "stride and padding are not "
-            SimpleRnn simpleRnn = (SimpleRnn) layer;
+                + "valid for given input height, using ConvolutionMode.Strict\n"
-            return simpleRnn.getDataFormat();
+                + "ConvolutionMode.Strict requires: output height = (input height - kernelSize + "
-        } else if(layer instanceof Convolution1D) {
+                + "2*padding)/stride + 1 to be an integer. Got: ("
-            Convolution1D convolution1D = (Convolution1D) layer;
+                + inH
-            return convolution1D.getRnnDataFormat();
+                + " - "
-        } else if(layer instanceof Convolution1D) {
+                + eKernel
-            Convolution1D convolution1D = (Convolution1D) layer;
+                + " + 2*"
-            return convolution1D.getRnnDataFormat();
+                + padding
-        } else if(layer instanceof Subsampling1DLayer) {
+                + ")/"
-            Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer;
+                + strides
-            return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC;
+                + " + 1 = "
-        } else if(layer instanceof LSTM) {
+                + str
-            LSTM lstm = (LSTM) layer;
+                + "\n"
-            return lstm.getDataFormat();
+                + "See \"Constraints on strides\" at http://cs231n.github."
-        } else if(layer instanceof EmbeddingSequenceLayer) {
+                + "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n"
-            EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer;
+                + "To truncate/crop the input, such that output height = floor("
-            return embeddingSequenceLayer.getOutputDataFormat();
+                + str
-        }
+                + ") = "
-        else {
+                + truncated
-            throw new IllegalArgumentException("Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName());
+                + ", use ConvolutionType.Truncate.\n"
-        }
+                + "Alternatively use ConvolutionType.Same, which will use padding to give an "
                + "output height of ceil("
                + inH
                + "/"
                + strides
                + ")="
                + sameSize
                + getCommonErrorMsg(inputData, eKernel, strides, padding, dilation);
        throw new DL4JInvalidConfigException(sb);
      }
    }
  }
-    /**
+  public static int effectiveKernelSize(int kernel, int dilation) {
-     * Reshapes the given weight
+    // Determine the effective kernel size, accounting for dilation
-     * array or weight gradient
+    // http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
-     * to work with the specified
+    if (dilation == 1) {
-     * {@link RNNFormat}
+      return kernel;
-     * @param w the weight array or gradient
+    } else {
-     * @param rnnFormat the {@link RNNFormat} to use
+      return kernel + (kernel - 1) * (dilation - 1);
     * @return the reshaped array.
     */
    public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) {
        if(rnnFormat == RNNFormat.NWC)
            w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2)).permute(2, 1, 0);   //[oC, iC, k, 1] to [k, iC, oC]
        else {
            w = w.reshape(w.ordering(),w.size(2),w.size(1),w.size(0));
        }
        return w;
    }
  }
-
+  private static String getCommonErrorMsg(
-    /**
+      INDArray inputData, int kernel, int strides, int padding, int dilation) {
-     * Get the output size (height) for the given input data and CNN1D configuration
+    String s =
-     *
+        "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]="
-     * @param inH             Input size (height, or channels).
+            + Arrays.toString(inputData.shape())
-     * @param kernel          Kernel size
+            + ", inputKernel="
-     * @param strides         Stride
+            + kernel;
-     * @param padding         Padding
+    if (dilation != 1) {
-     * @param convolutionMode Convolution mode (Same, Strict, Truncate)
+      int effectiveKernel = effectiveKernelSize(kernel, dilation);
-     * @param dilation        Kernel dilation
+      s += ", effectiveKernelGivenDilation=" + effectiveKernel;
     * @return Output size (width)
     */
    public static long getOutputSize(long inH, int kernel, int strides, int padding,
                                    ConvolutionMode convolutionMode, int dilation) {
        long eKernel = effectiveKernelSize(kernel, dilation);
        if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
            return (int) Math.ceil(inH / ((double) strides));
        }
        return (inH - eKernel + 2L * padding) / strides + 1;
    }
    return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation;
  }
-    /**
+  /** Check that the convolution mode is consistent with the padding specification */
-     * Get the output size (height) for the given input data and CNN1D configuration
+  public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) {
-     *
+    if (mode == ConvolutionMode.Same) {
-     * @param inputData       Input data
+      boolean nullPadding = padding == 0;
-     * @param kernel          Kernel size
+      if (!nullPadding)
-     * @param strides         Stride
+        throw new IllegalArgumentException(
-     * @param padding         Padding
+            "Padding cannot be used when using the `same' convolution mode");
     * @param convolutionMode Convolution mode (Same, Strict, Truncate)
     * @param dilation        Kernel dilation
     * @return Output size (width)
     */
    public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
                                    ConvolutionMode convolutionMode, int dilation) {
        if (inputData.size(2) > Integer.MAX_VALUE)
            throw new ND4JArraySizeException();
        int inH = (int) inputData.size(2);
        int eKernel = effectiveKernelSize(kernel, dilation);
        boolean atrous = (eKernel == kernel);
        validateShapes(inputData, eKernel, strides, padding, convolutionMode, dilation, inH, atrous);
        if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
            int outH = (int) Math.ceil(inH / ((double) strides));
            return outH;
        }
        int outH = (inH - eKernel + 2 * padding) / strides + 1;
        return outH;
    }
  }
-    public static void validateShapes(INDArray inputData, int eKernel, int strides, int padding,
+  /**
-                                      ConvolutionMode convolutionMode, int dilation, int inShape,
+   * Get top padding for same mode only.
-                                      boolean atrous) {
+   *
   * @param outSize Output size (length 2 array, height dimension first)
   * @param inSize Input size (length 2 array, height dimension first)
   * @param kernel Kernel size (length 2 array, height dimension first)
   * @param strides Strides (length 2 array, height dimension first)
   * @param dilation Dilation (length 2 array, height dimension first)
   * @return Top left padding (length 2 array, height dimension first)
   */
  public static int getSameModeTopLeftPadding(
      int outSize, int inSize, int kernel, int strides, int dilation) {
    int eKernel = effectiveKernelSize(kernel, dilation);
    // Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2
    int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2;
    Preconditions.checkState(
        outPad >= 0,
        "Invalid padding values calculated: %s - "
            + "layer configuration is invalid? Input size %s, output size %s, kernel %s, "
            + "strides %s, dilation %s",
        outPad,
        inSize,
        outSize,
        kernel,
        strides,
        dilation);
    return outPad;
  }
-        int inH = inShape;
+  public static int getSameModeBottomRightPadding(
-        boolean t = convolutionMode == ConvolutionMode.Truncate;
+      int outSize, int inSize, int kernel, int strides, int dilation) {
    int eKernel = effectiveKernelSize(kernel, dilation);
    int totalPad = ((outSize - 1) * strides + eKernel - inSize);
    int tlPad = totalPad / 2;
    int brPad = totalPad - tlPad;
    Preconditions.checkState(
        brPad >= 0,
        "Invalid padding values (right) calculated: %s - "
            + "layer configuration is invalid? Input size %s, output size %s, kernel %s, "
            + "strides %s, dilation %s",
        brPad,
        inSize,
        outSize,
        kernel,
        strides,
        dilation);
    return brPad;
  }
-        if (t && (eKernel <= 0 || eKernel > inH + 2 * padding)) {
+  /**
-            StringBuilder sb = new StringBuilder();
+   * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for
-            sb.append("Invalid input data or configuration: ");
+   * kernel size and stride, and values >= 0 for padding.
-            if (atrous) sb.append("effective ");
+   *
-            sb.append("kernel height and input height must satisfy 0 < ");
+   * @param kernel Kernel size to check
-            if (atrous) sb.append("effective ");
+   * @param stride Stride to check
-            sb.append("kernel height <= input height + 2 * padding height. \nGot ");
+   * @param padding Padding to check
-            if (atrous) sb.append("effective ");
+   */
-            sb.append("kernel height = ").append(eKernel).append(", input height = ").append(inH)
+  public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) {
                    .append(" and padding height = ").append(padding).append(" which do not satisfy 0 < ")
                    .append(eKernel).append(" <= ").append(inH + 2 * padding)
                    .append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation));
            throw new DL4JInvalidInputException(sb.toString());
        }
        if (convolutionMode == ConvolutionMode.Strict) {
            if ((inH - eKernel + 2 * padding) % strides != 0) {
                double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0;
                String str = String.format("%.2f", d);
                int truncated = (int) d;
                int sameSize = (int) Math.ceil(inH / ((double) strides));
                String sb = "Invalid input data or configuration: Combination of kernel size, " +
                        "stride and padding are not " +
                        "valid for given input height, using ConvolutionMode.Strict\n" +
                        "ConvolutionMode.Strict requires: output height = (input height - kernelSize + " +
                        "2*padding)/stride + 1 to be an integer. Got: (" +
                        inH + " - " + eKernel + " + 2*" + padding + ")/" +
                        strides + " + 1 = " +
                        str + "\n" + "See \"Constraints on strides\" at http://cs231n.github." +
                        "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n" +
                        "To truncate/crop the input, such that output height = floor(" +
                        str + ") = " +
                        truncated + ", use ConvolutionType.Truncate.\n" +
                        "Alternatively use ConvolutionType.Same, which will use padding to give an " +
                        "output height of ceil(" +
                        inH + "/" + strides + ")=" + sameSize +
                        getCommonErrorMsg(inputData, eKernel, strides, padding, dilation);
                throw new DL4JInvalidConfigException(sb);
            }
        }
    if (kernel <= 0) {
      throw new IllegalStateException(
          "Invalid kernel size: value must be positive (> 0). Got: " + kernel);
    }
-
+    if (stride <= 0) {
-    public static int effectiveKernelSize(int kernel, int dilation) {
+      throw new IllegalStateException(
-        //Determine the effective kernel size, accounting for dilation
+          "Invalid kernel size: value must be positive (> 0). Got: " + stride);
        //http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
        if (dilation == 1) {
            return kernel;
        } else {
            return kernel + (kernel - 1) * (dilation - 1);
        }
    }
-
+    if (padding < 0) {
-    private static String getCommonErrorMsg(INDArray inputData, int kernel, int strides, int padding, int dilation) {
+      throw new IllegalStateException(
-        String s = "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]=" + Arrays.toString(inputData.shape())
+          "Invalid kernel size: value must be positive (> 0). Got: " + padding);
                + ", inputKernel=" + kernel;
        if (dilation != 1) {
            int effectiveKernel = effectiveKernelSize(kernel, dilation);
            s += ", effectiveKernelGivenDilation=" + effectiveKernel;
        }
        return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation;
    }
-
+  }
    /**
     * Check that the convolution mode is consistent with the padding specification
     */
    public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) {
        if (mode == ConvolutionMode.Same) {
            boolean nullPadding = padding == 0;
            if (!nullPadding)
                throw new IllegalArgumentException("Padding cannot be used when using the `same' convolution mode");
        }
    }
    /**
     * Get top padding for same mode only.
     *
     * @param outSize  Output size (length 2 array, height dimension first)
     * @param inSize   Input size (length 2 array, height dimension first)
     * @param kernel   Kernel size (length 2 array, height dimension first)
     * @param strides  Strides  (length 2 array, height dimension first)
     * @param dilation Dilation (length 2 array, height dimension first)
     * @return Top left padding (length 2 array, height dimension first)
     */
    public static int getSameModeTopLeftPadding(int outSize, int inSize, int kernel, int strides, int dilation) {
        int eKernel = effectiveKernelSize(kernel, dilation);
        //Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2
        int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2;
        Preconditions.checkState(outPad >= 0, "Invalid padding values calculated: %s - " +
                        "layer configuration is invalid? Input size %s, output size %s, kernel %s, " +
                        "strides %s, dilation %s", outPad, inSize, outSize, kernel, strides, dilation);
        return outPad;
    }
    public static int getSameModeBottomRightPadding(int outSize, int inSize, int kernel, int strides, int dilation) {
        int eKernel = effectiveKernelSize(kernel, dilation);
        int totalPad = ((outSize - 1) * strides + eKernel - inSize);
        int tlPad = totalPad / 2;
        int brPad = totalPad - tlPad;
        Preconditions.checkState(brPad >= 0, "Invalid padding values (right) calculated: %s - " +
                "layer configuration is invalid? Input size %s, output size %s, kernel %s, " +
                "strides %s, dilation %s", brPad, inSize, outSize, kernel, strides, dilation);
        return brPad;
    }
    /**
     * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for kernel size and
     * stride, and values >= 0 for padding.
     *
     * @param kernel  Kernel size  to check
     * @param stride  Stride to check
     * @param padding Padding to check
     */
    public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) {
        if (kernel <= 0) {
            throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + kernel);
        }
        if (stride <= 0) {
            throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + stride);
        }
        if (padding < 0) {
            throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + padding);
        }
    }
 }