Fixing tests

Signed-off-by: brian <brian@brutex.de>
2023-07-25 10:59:46 +02:00 · 2023-07-25 10:59:46 +02:00 · 4dc5a116b6
commit 4dc5a116b6
parent 997143b9dd
41 changed files with 4285 additions and 1309 deletions
--- a/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java
+++ b/brutex-extended-tests/src/test/java/net/brutex/ai/nd4j/tests/ExploreParamsTest.java
@ -0,0 +1,167 @@
 /*
 *
 *    ******************************************************************************
 *    *
 *    * This program and the accompanying materials are made available under the
 *    * terms of the Apache License, Version 2.0 which is available at
 *    * https://www.apache.org/licenses/LICENSE-2.0.
 *    *
 *    *  See the NOTICE file distributed with this work for additional
 *    *  information regarding copyright ownership.
 *    * Unless required by applicable law or agreed to in writing, software
 *    * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *    * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *    * License for the specific language governing permissions and limitations
 *    * under the License.
 *    *
 *    * SPDX-License-Identifier: Apache-2.0
 *    *****************************************************************************
 *
 */
 package net.brutex.ai.nd4j.tests;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator;
 import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
 import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.layers.DenseLayer;
 import org.deeplearning4j.nn.conf.layers.OutputLayer;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.junit.jupiter.api.Test;
 import org.nd4j.common.primitives.Pair;
 import org.nd4j.evaluation.classification.Evaluation;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.lossfunctions.LossFunctions;
@Slf4j
 public class ExploreParamsTest {
  @Test
  public void testParam() {
    NeuralNetConfiguration conf =
        NeuralNetConfiguration.builder()
            .seed(12345)
                .dataType(DataType.DOUBLE)
            .layer(
                DenseLayer.builder().nIn(4).nOut(30).name("1. Dense").activation(Activation.TANH))
           .layer(DenseLayer.builder().nIn(30).nOut(10).name("2. Dense"))
            //  .layer(FrozenLayer.builder(DenseLayer.builder().nOut(6).build()).build())
            .layer(
                OutputLayer.builder()
                    .nOut(3)
                    .lossFunction(LossFunctions.LossFunction.MSE)
                    .activation(Activation.SOFTMAX))
            .build();
    MultiLayerNetwork nn = new MultiLayerNetwork(conf);
    nn.init();
    log.info(nn.summary());
    // INDArray input = Nd4j.rand(10,4);
    INDArray labels = Nd4j.zeros(9, 3);
    INDArray input =
        Nd4j.create(
            new double[][] {
              {5.15, 3.5, 1.4, 0.21},     // setosa
              {4.9, 3.2, 1.4, 0.2},       // setosa
              {4.7, 3.2, 1.23, 0.2},      // setosa
              {7, 3.25, 4.7, 1.41},       // versicolor
              {6.4, 3.2, 4.54, 1.5},      // versicolor
              {6.9, 3.1, 4.92, 1.5},      // versicolor
              {7.7, 3, 6.1, 2.3},         // virginica
              {6.3, 3.4, 5.6, 2.45},      // virginica
              {6.4, 3.12, 5.5, 1.8}       // virginica
            });
    labels.putScalar(0, 1);
    labels.putScalar(3, 1);
    labels.putScalar(6, 1);
    labels.putScalar(10, 1);
    labels.putScalar(13, 1);
    labels.putScalar(16, 1);
    labels.putScalar(20, 1);
    labels.putScalar(23, 1);
    labels.putScalar(26, 1);
    IrisDataSetIterator iter = new IrisDataSetIterator();
    //Iterable<Pair<INDArray, INDArray>> it = List.of(new Pair<INDArray, INDArray>(input, labels));
    List l = new ArrayList<>();
    for (int i=0; i< input.rows(); i++) {
      l.add(new Pair(input.getRow(i), labels.getRow(i)));
    }
    Iterable<Pair<INDArray, INDArray>> it = l;
    INDArrayDataSetIterator diter = new INDArrayDataSetIterator(it, 1);
    for (int i = 0; i < 100; i++) {
      // nn.fit(input, labels);
      // nn.fit( input, labels);
      nn.fit(diter);
      // nn.feedForward(input);
      if(i%20==0) log.info("Score: {}", nn.getScore());
    }
    Evaluation eval = nn.evaluate(iter, List.of("setosa", "vericolor", "virginica"));
    log.info("\n{}", eval.stats());
  }
  @Test
  public void testParam2() throws IOException {
    NeuralNetConfiguration conf =
            NeuralNetConfiguration.builder()
                    .seed(12345)
                    .layer(
                            DenseLayer.builder().nIn(784).nOut(20).name("1. Dense"))
                    .layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense"))
                    .layer(
                            OutputLayer.builder()
                                    .nOut(10)
                                    .lossFunction(LossFunctions.LossFunction.MSE)
                                    .activation(Activation.SOFTMAX))
                    .build();
    MultiLayerNetwork nn = new MultiLayerNetwork(conf);
    nn.init();
    log.info(nn.summary());
    NeuralNetConfiguration conf2 =
            NeuralNetConfiguration.builder()
                    .seed(12345)
                    .layer(
                            DenseLayer.builder().nIn(784).nOut(20).name("1. Dense").dropOut(0.7))
                    .layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense"))
                    .layer(
                            OutputLayer.builder()
                                    .nOut(10)
                                    .lossFunction(LossFunctions.LossFunction.MSE)
                                    .activation(Activation.SOFTMAX))
                    .build();
    MultiLayerNetwork nn2 = new MultiLayerNetwork(conf2);
    nn2.init();
    log.info(nn2.summary());
    MnistDataSetIterator iter = new MnistDataSetIterator(10, 500);
    MnistDataSetIterator iter2 = new MnistDataSetIterator(10, 50);
    for (int i = 0; i < 200; i++) {
      nn.fit(iter);
      nn2.fit(iter);
      if(i%20==0) log.info("Score: {} vs. {}", nn.getScore(), nn2.getScore());
    }
    Evaluation eval = nn.evaluate(iter2);
    Evaluation eval2 = nn2.evaluate(iter2);
    log.info("\n{} \n{}", eval.stats(), eval2.stats());
  }
 }
--- a/brutex-extended-tests/src/test/java/net/brutex/gan/App.java
+++ b/brutex-extended-tests/src/test/java/net/brutex/gan/App.java
@ -45,6 +45,7 @@ import org.datavec.image.transform.PipelineImageTransform;
 import org.datavec.image.transform.ResizeImageTransform;
 import org.datavec.image.transform.ShowImageTransform;
 import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
 import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
 import org.deeplearning4j.nn.conf.GradientNormalization;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.distribution.Distribution;
@ -65,6 +66,7 @@ import org.deeplearning4j.optimize.listeners.ScoreToChartListener;
 import org.junit.jupiter.api.Test;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.activations.impl.ActivationLReLU;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.dataset.DataSet;
 import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
@ -80,11 +82,11 @@ public class App {
  private static final int X_DIM = 20 ;
  private static final int Y_DIM = 20;
-  private static final int CHANNELS = 1;
+  private static final int CHANNELS = 3;
-  private static final int batchSize = 10;
+  private static final int batchSize = 50;
  private static final int INPUT = 128;
-  private static final int OUTPUT_PER_PANEL = 4;
+  private static final int OUTPUT_PER_PANEL = 16;
  private static final int ARRAY_SIZE_PER_SAMPLE = X_DIM*Y_DIM*CHANNELS;
  private static final IUpdater UPDATER = Adam.builder().learningRate(LEARNING_RATE).beta1(0.5).build();
@ -146,7 +148,7 @@ public class App {
        ActivationLayer.builder(new ActivationLReLU(0.2)).build(),
        DropoutLayer.builder(1 - 0.5).build(),
-        OutputLayer.builder().name("dis-output").lossFunction(LossFunction.XENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build()
+        OutputLayer.builder().name("dis-output").lossFunction(LossFunction.MCXENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build()
    };
  }
@ -196,6 +198,7 @@ public class App {
        .activation( Activation.IDENTITY )
        .layersFromArray(  layers  )
        .inputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS))
            .dataType(DataType.FLOAT)
        .build();
 ((NeuralNetConfiguration) conf).init();
    return conf;
@ -223,7 +226,7 @@ public class App {
    ImageTransform transform3 = new ResizeImageTransform(X_DIM, Y_DIM);
    ImageTransform tr = new PipelineImageTransform.Builder()
-        .addImageTransform(transform) //convert to GREY SCALE
+        //.addImageTransform(transform) //convert to GREY SCALE
        .addImageTransform(transform3)
        //.addImageTransform(transform2)
        .build();
@ -270,10 +273,10 @@ public class App {
        break;
        }
-        if(i%20 == 0) {
+        //if(i%20 == 0) {
-         // frame2 = visualize(new INDArray[]{real}, batchSize,
+         frame2 = visualize(new INDArray[]{real}, batchSize,
-         //     frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images
+         frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images
-        }
+        //}
       real.divi(255f);
 //        int batchSize = (int) real.shape()[0];
@ -290,7 +293,7 @@ public class App {
        DataSet data = DataSet.merge(Arrays.asList(realSet, fakeSet));
        dis.fit(data);
-        dis.fit(data);
+        //dis.fit(data);
        // Update the discriminator in the GAN network
        updateGan(gen, dis, gan);
@ -298,7 +301,7 @@ public class App {
        //gan.fit(new DataSet(Nd4j.rand(batchSize, INPUT), Nd4j.zeros(batchSize, 1)));
        gan.fit(new DataSet(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1)));
-
+        //Visualize and reporting
        if (j % 10 == 1) {
          System.out.println("Iteration " + j + " Visualizing...");
          INDArray[] samples = batchSize > OUTPUT_PER_PANEL ? new INDArray[OUTPUT_PER_PANEL] : new INDArray[batchSize];
@ -320,11 +323,16 @@ public class App {
          frame = visualize(samples, 1, frame == null ? new JFrame() : frame, false); //each samples only has 1 image, thus batchElements=1
        }
      }
      if (trainData.resetSupported()) {
          trainData.reset();
      } else {
          log.error("Trainingdata {} does not support reset.", trainData.toString());
      }
        // Copy the GANs generator to gen.
        updateGen(gen, gan);
    }
    gen.save(new File("mnist-mlp-generator.dlj"));
  }
@ -383,7 +391,12 @@ public class App {
  }
  private static JLabel getImage(INDArray tensor, int batchElement, boolean isOrig) {
-    final BufferedImage bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY);
+    final BufferedImage bi;
    if(CHANNELS>1) {
        bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_INT_RGB); //need to change here based on channels
    } else {
        bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY); //need to change here based on channels
    }
    final int imageSize = X_DIM * Y_DIM;
    final int offset = batchElement * imageSize;
    int pxl = offset * CHANNELS; //where to start in the INDArray
--- a/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java
+++ b/brutex-extended-tests/src/test/java/net/brutex/gan/MnistSimpleGAN.java
@ -24,12 +24,14 @@ package net.brutex.gan;
 import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
 import org.deeplearning4j.nn.conf.GradientNormalization;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.ActivationLayer;
 import org.deeplearning4j.nn.conf.layers.DenseLayer;
 import org.deeplearning4j.nn.conf.layers.DropoutLayer;
 import org.deeplearning4j.nn.conf.layers.OutputLayer;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.nn.weights.WeightInit;
 import org.junit.jupiter.api.Test;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.activations.impl.ActivationLReLU;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -98,7 +100,10 @@ public class MnistSimpleGAN {
    return new MultiLayerNetwork(discConf);
  }
-
+  @Test
  public void runTest() throws Exception {
    main(null);
  }
  public static void main(String[] args) throws Exception {
    GAN gan = new GAN.Builder()
        .generator(MnistSimpleGAN::getGenerator)
@ -108,6 +113,7 @@ public class MnistSimpleGAN {
        .updater(UPDATER)
        .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
        .gradientNormalizationThreshold(100)
        .build();
    Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000);
--- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java
+++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java
@ -2386,7 +2386,11 @@ public interface INDArray extends Serializable, AutoCloseable {
    long[] stride();
  /**
-     * Return the ordering (fortran or c  'f' and 'c' respectively) of this ndarray
+   * Return the ordering (fortran or c  'f' and 'c' respectively) of this ndarray <br/><br/>
   * C Is Contiguous layout. Mathematically speaking, row major.<br/>
   * F Is Fortran contiguous layout. Mathematically speaking, column major.<br/>
   * {@see https://en.wikipedia.org/wiki/Row-_and_column-major_order}<br/>
   *
   * @return the ordering of this ndarray
   */
  char ordering();
--- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
+++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
@ -5121,7 +5121,7 @@ public class Nd4j {
            Nd4j.backend = backend;
            updateNd4jContext();
            props = Nd4jContext.getInstance().getConf();
-            logger.info("Properties for Nd4jContext " + props);
+            log.debug("Properties for Nd4jContext {}", props);
            PropertyParser pp = new PropertyParser(props);
            String otherDtype = pp.toString(ND4JSystemProperties.DTYPE);
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/BNGradientCheckTest.java
@ -122,7 +122,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp()).seed(12345L)
                    .dist(new NormalDistribution(0, 2)).list()
-                    .layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
+                    .layer(0, Convolution2D.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
                            .activation(Activation.IDENTITY).build())
                    .layer(1,BatchNormalization.builder().useLogStd(useLogStd).build())
                    .layer(2, ActivationLayer.builder().activation(Activation.TANH).build())
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DGradientCheckTest.java
@ -91,7 +91,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
                  .updater(new NoOp())
                  .dist(new NormalDistribution(0, 1))
                  .convolutionMode(ConvolutionMode.Same)
                  .list()
                  .layer(
                      Convolution1D.builder()
                          .activation(afn)
@ -435,7 +434,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
                    .updater(new NoOp())
                    .dist(new NormalDistribution(0, 1))
                    .convolutionMode(ConvolutionMode.Same)
                    .list()
                    .layer(
                        0,
                        Convolution1D.builder()
@ -461,6 +459,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
                            .stride(stride)
                            .padding(padding)
                            .pnorm(pnorm)
                                .name("SubsamplingLayer")
                            .build())
                    .layer(
                        3,
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNN1DNewGradientCheckTest.java
@ -0,0 +1,811 @@
 /*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */
 package org.deeplearning4j.gradientcheck;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.*;
 import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.util.Convolution1DUtils;
 import org.junit.jupiter.api.Test;
 import org.nd4j.common.primitives.Pair;
 import org.nd4j.evaluation.classification.Evaluation;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.dataset.DataSet;
 import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.indexing.NDArrayIndex;
 import org.nd4j.linalg.learning.config.NoOp;
 import org.nd4j.linalg.lossfunctions.LossFunctions;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
@Slf4j
 public class CNN1DNewGradientCheckTest extends BaseDL4JTest {
  private static final boolean PRINT_RESULTS = true;
  private static final boolean RETURN_ON_FIRST_FAILURE = false;
  private static final double DEFAULT_EPS = 1e-6;
  private static final double DEFAULT_MAX_REL_ERROR = 1e-3;
  private static final double DEFAULT_MIN_ABS_ERROR = 1e-8;
  static {
    Nd4j.setDataType(DataType.DOUBLE);
  }
  @Test
  public void testCnn1D() {
    int minibatchSize = 4;
    int[] dataChannels = {4, 10}; //the input
    int[] kernels = {2,4,5,8};
    int stride = 2;
    int padding = 3;
    int seriesLength = 300;
    for (int kernel : kernels) {
      for (int dChannels : dataChannels) {
        int numLabels = ((seriesLength + (2 * padding) - kernel) / stride) + 1;
        final NeuralNetConfiguration conf =
            NeuralNetConfiguration.builder()
                .dataType(DataType.DOUBLE)
                .updater(new NoOp())
                .dist(new NormalDistribution(0, 1))
                .convolutionMode(ConvolutionMode.Same)
                .layer(
                    Convolution1DNew.builder()
                        .activation(Activation.RELU)
                        .kernelSize(kernel)
                        .stride(stride)
                        .padding(padding)
                        .nIn(dChannels) // channels
                        .nOut(3)
                        .rnnDataFormat(RNNFormat.NCW)
                        .build())
                .layer(
                    RnnOutputLayer.builder()
                        .lossFunction(LossFunctions.LossFunction.MCXENT)
                        .activation(Activation.SOFTMAX)
                        .nOut(4)
                        .build())
                .inputType(InputType.recurrent(dChannels, seriesLength))
                .build();
        INDArray input = Nd4j.rand(minibatchSize, dChannels, seriesLength);
        INDArray labels = Nd4j.zeros(minibatchSize, 4, numLabels);
        for (int i = 0; i < minibatchSize; i++) {
          for (int j = 0; j < numLabels; j++) {
            labels.putScalar(new int[] {i, i % 4, j}, 1.0);
          }
        }
        final MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();
        String msg =
            "Minibatch="
                + minibatchSize
                + ", activationFn="
                + Activation.RELU
                + ", kernel = "
                + kernel;
        System.out.println(msg);
        for (int j = 0; j < net.getnLayers(); j++)
          System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams());
 /**
        List<Pair<INDArray, INDArray>> iter = new java.util.ArrayList<>(Collections.emptyList());
        iter.add(new Pair<>(input, labels));
        for(int x=0;x<100; x++) net.fit(input, labels);
        Evaluation eval = net.evaluate(new INDArrayDataSetIterator(iter,2), Arrays.asList(new String[]{"One", "Two", "Three", "Four"}));
        // net.fit(input, labels);
        eval.eval(labels, net.output(input));
 **/
        boolean gradOK =
            GradientCheckUtil.checkGradients(
                net,
                DEFAULT_EPS,
                DEFAULT_MAX_REL_ERROR,
                DEFAULT_MIN_ABS_ERROR,
                PRINT_RESULTS,
                RETURN_ON_FIRST_FAILURE,
                input,
                labels);
        assertTrue(gradOK, msg);
        TestUtils.testModelSerialization(net);
      }
      }
  }
  @Test
  public void testCnn1DWithLocallyConnected1D() {
    Nd4j.getRandom().setSeed(1337);
    int[] minibatchSizes = {2, 3};
    int length = 25;
    int convNIn = 18;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 4;
    int[] kernels = {1,2,4};
    int stride = 1;
    int padding = 0;
    Activation[] activations = {Activation.SIGMOID};
    for (Activation afn : activations) {
      for (int minibatchSize : minibatchSizes) {
        for (int kernel : kernels) {
          INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
          INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
          for (int i = 0; i < minibatchSize; i++) {
            for (int j = 0; j < length; j++) {
              labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
            }
          }
          NeuralNetConfiguration conf =
              NeuralNetConfiguration.builder()
                  .dataType(DataType.DOUBLE)
                  .updater(new NoOp())
                  .dist(new NormalDistribution(0, 1))
                  .convolutionMode(ConvolutionMode.Same)
                  .layer(
                      Convolution1DNew.builder()
                          .activation(afn)
                          .kernelSize(kernel)
                          .stride(stride)
                          .padding(padding)
                          .nIn(convNIn)
                          .nOut(convNOut1)
                          .rnnDataFormat(RNNFormat.NCW)
                          .build())
                  .layer(
                      LocallyConnected1D.builder()
                          .activation(afn)
                          .kernelSize(kernel)
                          .stride(stride)
                          .padding(padding)
                          .nIn(convNOut1)
                          .nOut(convNOut2)
                          .hasBias(false)
                          .build())
                  .layer(
                      RnnOutputLayer.builder()
                          .lossFunction(LossFunctions.LossFunction.MCXENT)
                          .activation(Activation.SOFTMAX)
                          .nOut(finalNOut)
                          .build())
                  .inputType(InputType.recurrent(convNIn, length))
                  .build();
          String json = conf.toJson();
          NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
          assertEquals(conf, c2);
          MultiLayerNetwork net = new MultiLayerNetwork(conf);
          net.init();
          String msg =
              "Minibatch=" + minibatchSize + ", activationFn=" + afn + ", kernel = " + kernel;
          if (PRINT_RESULTS) {
            System.out.println(msg);
            //                        for (int j = 0; j < net.getnLayers(); j++)
            //                            System.out.println("ILayer " + j + " # params: " +
            // net.getLayer(j).numParams());
          }
          boolean gradOK =
              GradientCheckUtil.checkGradients(
                  net,
                  DEFAULT_EPS,
                  DEFAULT_MAX_REL_ERROR,
                  DEFAULT_MIN_ABS_ERROR,
                  PRINT_RESULTS,
                  RETURN_ON_FIRST_FAILURE,
                  input,
                  labels);
          assertTrue(gradOK, msg);
          TestUtils.testModelSerialization(net);
        }
      }
    }
  }
  @Test
  public void testCnn1DWithCropping1D() {
    Nd4j.getRandom().setSeed(1337);
    int[] minibatchSizes = {1, 3};
    int length = 7;
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 4;
    int[] kernels = {1, 2, 4};
    int stride = 1;
    int padding = 0;
    int cropping = 1;
    int croppedLength = length - 2 * cropping;
    Activation[] activations = {Activation.SIGMOID};
    SubsamplingLayer.PoolingType[] poolingTypes =
        new SubsamplingLayer.PoolingType[] {
          SubsamplingLayer.PoolingType.MAX,
          SubsamplingLayer.PoolingType.AVG,
          SubsamplingLayer.PoolingType.PNORM
        };
    for (Activation afn : activations) {
      for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
        for (int minibatchSize : minibatchSizes) {
          for (int kernel : kernels) {
            INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
            INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, croppedLength);
            for (int i = 0; i < minibatchSize; i++) {
              for (int j = 0; j < croppedLength; j++) {
                labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
              }
            }
            NeuralNetConfiguration conf =
                NeuralNetConfiguration.builder()
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp())
                    .dist(new NormalDistribution(0, 1))
                    .convolutionMode(ConvolutionMode.Same)
                    .layer(
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut1)
                            .build())
                    .layer(Cropping1D.builder(cropping).build())
                    .layer(
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut2)
                            .build())
                    .layer(
                        RnnOutputLayer.builder()
                            .lossFunction(LossFunctions.LossFunction.MCXENT)
                            .activation(Activation.SOFTMAX)
                            .nOut(finalNOut)
                            .build())
                    .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
                    .build();
            String json = conf.toJson();
            NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
            assertEquals(conf, c2);
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            String msg =
                "PoolingType="
                    + poolingType
                    + ", minibatch="
                    + minibatchSize
                    + ", activationFn="
                    + afn
                    + ", kernel = "
                    + kernel;
            if (PRINT_RESULTS) {
              System.out.println(msg);
              //                            for (int j = 0; j < net.getnLayers(); j++)
              //                                System.out.println("ILayer " + j + " # params: " +
              // net.getLayer(j).numParams());
            }
            boolean gradOK =
                GradientCheckUtil.checkGradients(
                    net,
                    DEFAULT_EPS,
                    DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR,
                    PRINT_RESULTS,
                    RETURN_ON_FIRST_FAILURE,
                    input,
                    labels);
            assertTrue(gradOK, msg);
            TestUtils.testModelSerialization(net);
          }
        }
      }
    }
  }
  @Test
  public void testCnn1DWithZeroPadding1D() {
    Nd4j.getRandom().setSeed(1337);
    int[] minibatchSizes = {1, 3};
    int length = 7;
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 4;
    int[] kernels = {1, 2, 4};
    int stride = 1;
    int pnorm = 2;
    int padding = 0;
    int zeroPadding = 2;
    int paddedLength = length + 2 * zeroPadding;
    Activation[] activations = {Activation.SIGMOID};
    SubsamplingLayer.PoolingType[] poolingTypes =
        new SubsamplingLayer.PoolingType[] {
          SubsamplingLayer.PoolingType.MAX,
          SubsamplingLayer.PoolingType.AVG,
          SubsamplingLayer.PoolingType.PNORM
        };
    for (Activation afn : activations) {
      for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
        for (int minibatchSize : minibatchSizes) {
          for (int kernel : kernels) {
            INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
            INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, paddedLength);
            for (int i = 0; i < minibatchSize; i++) {
              for (int j = 0; j < paddedLength; j++) {
                labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
              }
            }
            NeuralNetConfiguration conf =
                NeuralNetConfiguration.builder()
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp())
                    .dist(new NormalDistribution(0, 1))
                    .convolutionMode(ConvolutionMode.Same)
                    .layer(
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(2, kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut1)
                            .build())
                    .layer(ZeroPadding1DLayer.builder(zeroPadding).build())
                    .layer(
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut2)
                            .build())
                    .layer(ZeroPadding1DLayer.builder(0).build())
                    .layer(
                        Subsampling1DLayer.builder(poolingType)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .pnorm(pnorm)
                            .build())
                    .layer(
                        RnnOutputLayer.builder()
                            .lossFunction(LossFunctions.LossFunction.MCXENT)
                            .activation(Activation.SOFTMAX)
                            .nOut(finalNOut)
                            .build())
                    .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
                    .build();
            String json = conf.toJson();
            NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
            assertEquals(conf, c2);
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            String msg =
                "PoolingType="
                    + poolingType
                    + ", minibatch="
                    + minibatchSize
                    + ", activationFn="
                    + afn
                    + ", kernel = "
                    + kernel;
            if (PRINT_RESULTS) {
              System.out.println(msg);
              //                            for (int j = 0; j < net.getnLayers(); j++)
              //                                System.out.println("ILayer " + j + " # params: " +
              // net.getLayer(j).numParams());
            }
            boolean gradOK =
                GradientCheckUtil.checkGradients(
                    net,
                    DEFAULT_EPS,
                    DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR,
                    PRINT_RESULTS,
                    RETURN_ON_FIRST_FAILURE,
                    input,
                    labels);
            assertTrue(gradOK, msg);
            TestUtils.testModelSerialization(net);
          }
        }
      }
    }
  }
  @Test
  public void testCnn1DWithSubsampling1D() {
    Nd4j.getRandom().setSeed(12345);
    int[] minibatchSizes = {1, 3};
    int length = 7;
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 4;
    int[] kernels = {1, 2, 4};
    int stride = 1;
    int padding = 0;
    int pnorm = 2;
    Activation[] activations = {Activation.SIGMOID, Activation.TANH};
    SubsamplingLayer.PoolingType[] poolingTypes =
        new SubsamplingLayer.PoolingType[] {
          SubsamplingLayer.PoolingType.MAX,
          SubsamplingLayer.PoolingType.AVG,
          SubsamplingLayer.PoolingType.PNORM
        };
    for (Activation afn : activations) {
      for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
        for (int minibatchSize : minibatchSizes) {
          for (int kernel : kernels) {
            INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
            INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
            for (int i = 0; i < minibatchSize; i++) {
              for (int j = 0; j < length; j++) {
                labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
              }
            }
            NeuralNetConfiguration conf =
                NeuralNetConfiguration.builder()
                    .dataType(DataType.DOUBLE)
                    .updater(new NoOp())
                    .dist(new NormalDistribution(0, 1))
                    .convolutionMode(ConvolutionMode.Same)
                    .layer(
                        0,
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut1)
                            .build())
                    .layer(
                        1,
                        Convolution1DNew.builder()
                            .activation(afn)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .nOut(convNOut2)
                            .build())
                    .layer(
                        2,
                        Subsampling1DLayer.builder(poolingType)
                            .kernelSize(kernel)
                            .stride(stride)
                            .padding(padding)
                            .pnorm(pnorm)
                            .name("SubsamplingLayer")
                            .build())
                    .layer(
                        3,
                        RnnOutputLayer.builder()
                            .lossFunction(LossFunctions.LossFunction.MCXENT)
                            .activation(Activation.SOFTMAX)
                            .nOut(finalNOut)
                            .build())
                    .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
                    .build();
            String json = conf.toJson();
            NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
            assertEquals(conf, c2);
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            String msg =
                "PoolingType="
                    + poolingType
                    + ", minibatch="
                    + minibatchSize
                    + ", activationFn="
                    + afn
                    + ", kernel = "
                    + kernel;
            if (PRINT_RESULTS) {
              System.out.println(msg);
              //                            for (int j = 0; j < net.getnLayers(); j++)
              //                                System.out.println("ILayer " + j + " # params: " +
              // net.getLayer(j).numParams());
            }
            boolean gradOK =
                GradientCheckUtil.checkGradients(
                    net,
                    DEFAULT_EPS,
                    DEFAULT_MAX_REL_ERROR,
                    DEFAULT_MIN_ABS_ERROR,
                    PRINT_RESULTS,
                    RETURN_ON_FIRST_FAILURE,
                    input,
                    labels);
            assertTrue(gradOK, msg);
            TestUtils.testModelSerialization(net);
          }
        }
      }
    }
  }
  @Test
  public void testCnn1dWithMasking() {
    int length = 12;
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 3;
    int pnorm = 2;
    SubsamplingLayer.PoolingType[] poolingTypes =
        new SubsamplingLayer.PoolingType[] {
          SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG
        };
    for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
      for (ConvolutionMode cm :
          new ConvolutionMode[] {ConvolutionMode.Same, ConvolutionMode.Truncate}) {
        for (int stride : new int[] {1, 2}) {
          String s = cm + ", stride=" + stride + ", pooling=" + poolingType;
          log.info("Starting test: " + s);
          Nd4j.getRandom().setSeed(12345);
          NeuralNetConfiguration conf =
              NeuralNetConfiguration.builder()
                  .dataType(DataType.DOUBLE)
                  .updater(new NoOp())
                  .activation(Activation.TANH)
                  .dist(new NormalDistribution(0, 1))
                  .convolutionMode(cm)
                  .seed(12345)
                  .layer(
                      Convolution1DNew.builder()
                          .kernelSize(2)
                          .rnnDataFormat(RNNFormat.NCW)
                          .stride(stride)
                          .nIn(convNIn)
                          .nOut(convNOut1)
                          .build())
                  .layer(
                      Subsampling1DLayer.builder(poolingType)
                          .kernelSize(2)
                          .stride(stride)
                          .pnorm(pnorm)
                          .build())
                  .layer(
                      Convolution1DNew.builder()
                          .kernelSize(2)
                          .rnnDataFormat(RNNFormat.NCW)
                          .stride(stride)
                          .nIn(convNOut1)
                          .nOut(convNOut2)
                          .build())
                  .layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build())
                  .layer(
                      OutputLayer.builder()
                          .lossFunction(LossFunctions.LossFunction.MCXENT)
                          .activation(Activation.SOFTMAX)
                          .nOut(finalNOut)
                          .build())
                  .inputType(InputType.recurrent(convNIn, length))
                  .build();
          MultiLayerNetwork net = new MultiLayerNetwork(conf);
          net.init();
          INDArray f = Nd4j.rand(2, convNIn, length);
          INDArray fm = Nd4j.create(2, length);
          fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
          fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, 6)).assign(1);
          INDArray label = TestUtils.randomOneHot(2, finalNOut);
          boolean gradOK =
              GradientCheckUtil.checkGradients(
                  new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm));
          assertTrue(gradOK, s);
          TestUtils.testModelSerialization(net);
          // TODO also check that masked step values don't impact forward pass, score or gradients
          DataSet ds = new DataSet(f, label, fm, null);
          double scoreBefore = net.score(ds);
          net.setInput(f);
          net.setLabels(label);
          net.setLayerMaskArrays(fm, null);
          net.computeGradientAndScore();
          INDArray gradBefore = net.getFlattenedGradients().dup();
          f.putScalar(1, 0, 10, 10.0);
          f.putScalar(1, 1, 11, 20.0);
          double scoreAfter = net.score(ds);
          net.setInput(f);
          net.setLabels(label);
          net.setLayerMaskArrays(fm, null);
          net.computeGradientAndScore();
          INDArray gradAfter = net.getFlattenedGradients().dup();
          assertEquals(scoreBefore, scoreAfter, 1e-6);
          assertEquals(gradBefore, gradAfter);
        }
      }
    }
  }
  @Test
  public void testCnn1Causal() throws Exception {
    int convNIn = 2;
    int convNOut1 = 3;
    int convNOut2 = 4;
    int finalNOut = 3;
    int[] lengths = {11, 12, 13, 9, 10, 11};
    int[] kernels = {2, 3, 2, 4, 2, 3};
    int[] dilations = {1, 1, 2, 1, 2, 1};
    int[] strides = {1, 2, 1, 2, 1, 1};
    boolean[] masks = {false, true, false, true, false, true};
    boolean[] hasB = {true, false, true, false, true, true};
    for (int i = 0; i < lengths.length; i++) {
      int length = lengths[i];
      int k = kernels[i];
      int d = dilations[i];
      int st = strides[i];
      boolean mask = masks[i];
      boolean hasBias = hasB[i];
      // TODO has bias
      String s = "k=" + k + ", s=" + st + " d=" + d + ", seqLen=" + length;
      log.info("Starting test: " + s);
      Nd4j.getRandom().setSeed(12345);
      NeuralNetConfiguration conf =
          NeuralNetConfiguration.builder()
              .dataType(DataType.DOUBLE)
              .updater(new NoOp())
              .activation(Activation.TANH)
              .weightInit(new NormalDistribution(0, 1))
              .seed(12345)
              .layer(
                  Convolution1DNew.builder()
                      .kernelSize(k)
                      .dilation(d)
                      .hasBias(hasBias)
                      .convolutionMode(ConvolutionMode.Causal)
                      .stride(st)
                      .nOut(convNOut1)
                      .build())
              .layer(
                  Convolution1DNew.builder()
                      .kernelSize(k)
                      .dilation(d)
                      .convolutionMode(ConvolutionMode.Causal)
                      .stride(st)
                      .nOut(convNOut2)
                      .build())
              .layer(
                  RnnOutputLayer.builder()
                      .lossFunction(LossFunctions.LossFunction.MCXENT)
                      .activation(Activation.SOFTMAX)
                      .nOut(finalNOut)
                      .build())
              .inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
              .build();
      MultiLayerNetwork net = new MultiLayerNetwork(conf);
      net.init();
      INDArray f = Nd4j.rand(DataType.DOUBLE, 2, convNIn, length);
      INDArray fm = null;
      if (mask) {
        fm = Nd4j.create(2, length);
        fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
        fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, length - 2)).assign(1);
      }
      long outSize1 = Convolution1DUtils.getOutputSize(length, k, st, 0, ConvolutionMode.Causal, d);
      long outSize2 =
          Convolution1DUtils.getOutputSize(outSize1, k, st, 0, ConvolutionMode.Causal, d);
      INDArray label = TestUtils.randomOneHotTimeSeries(2, finalNOut, (int) outSize2);
      String msg =
              "Minibatch="
                      + 1
                      + ", activationFn="
                      + Activation.RELU
                      + ", kernel = "
                      + k;
      System.out.println(msg);
      for (int j = 0; j < net.getnLayers(); j++)
        System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams());
      boolean gradOK =
          GradientCheckUtil.checkGradients(
              new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm));
      assertTrue(gradOK, s);
      TestUtils.testModelSerialization(net);
    }
  }
 }
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
@ -108,8 +108,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                  .updater(new NoOp())
                  .weightInit(WeightInit.XAVIER)
                  .seed(12345L)
-                  .list()
+
-                  .layer(0, ConvolutionLayer.builder(1, 1).nOut(6).activation(afn).build())
+                  .layer(0, Convolution2D.builder().kernelSize(1).stride(1).nOut(6).activation(afn).build())
                  .layer(1, OutputLayer.builder(lf).activation(outputActivation).nOut(3).build())
                  .inputType(InputType.convolutionalFlat(1, 4, 1));
--- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java
+++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java
@ -24,6 +24,7 @@ import lombok.val;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Convolution2D;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
@ -85,7 +86,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution {
        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                enforceTrainingConfig, conf, kerasMajorVersion);
-        val builder = ConvolutionLayer.builder().name(this.name)
+        val builder = Convolution2D.builder().name(this.name)
                .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                .activation(getIActivationFromConfig(layerConfig, conf))
                .weightInit(init)
--- a/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
+++ b/cavis-dnn/cavis-dnn-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
@ -28,6 +28,7 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Convolution2D;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
@ -95,7 +96,7 @@ public class KerasConvolution2D extends KerasConvolution {
        LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
-        final var builder = ConvolutionLayer.builder().name(this.name)
+        final var builder = Convolution2D.builder().name(this.name)
                .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                .activation(getIActivationFromConfig(layerConfig, conf))
                .weightInit(init)
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/NeuralNetConfiguration.java
@ -222,6 +222,14 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
    // TODO do not put inside self to avoid serialization issues
    // innerConfigurations.add(0, this); //put this configuration at first place
    getLayerConfigurations().stream()
            .forEach(
                    lconf ->
                            lconf.setNetConfiguration(
                                    this)); // set this as net config for all layers (defined in here, not stacked
    /**
     * Inherit network wide configuration setting to those layer configurations that do not have an
     * individual setting (nor a default)
@ -230,11 +238,6 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
      lconf.runInheritance();
    }
    getLayerConfigurations().stream()
        .forEach(
            lconf ->
                lconf.setNetConfiguration(
                    this)); // set this as net config for all layers (defined in here, not stacked
    // Validate BackpropType setting
    if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH)
@ -326,7 +329,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
          LayerConfiguration layer = getFlattenedLayerConfigurations().get(i - 1);
          // convolution 1d is an edge case where it has rnn input type but the filters
          // should be the output
-          if (layer instanceof Convolution1D) {
+          if (layer instanceof Convolution1D || layer instanceof Convolution1DNew) {
            if (l instanceof DenseLayer && getInputType() instanceof InputType.InputTypeRecurrent) {
              FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l;
              if (getInputType() instanceof InputType.InputTypeRecurrent) {
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/RNNFormat.java
@ -21,7 +21,13 @@
 package org.deeplearning4j.nn.conf;
 /**
 * N is the batch size<br/>
 * C is the number of feature maps (that is,, number of channels)<br/>
 * H is the image height (not used for 1D conv as this is an RNN format<br/>
 * W is the image width<br/>
 * **/
 public enum RNNFormat implements DataFormat {
-    NCW,
+    /** n=batch size; c=channels/ features; w=width **/ NCW,
-    NWC
+    /** n=batch size; w=width; c=channels/ features **/ NWC
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractConvolutionLayer.java
@ -0,0 +1,142 @@
 /*
 *
 *    ******************************************************************************
 *    *
 *    * This program and the accompanying materials are made available under the
 *    * terms of the Apache License, Version 2.0 which is available at
 *    * https://www.apache.org/licenses/LICENSE-2.0.
 *    *
 *    *  See the NOTICE file distributed with this work for additional
 *    *  information regarding copyright ownership.
 *    * Unless required by applicable law or agreed to in writing, software
 *    * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *    * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *    * License for the specific language governing permissions and limitations
 *    * under the License.
 *    *
 *    * SPDX-License-Identifier: Apache-2.0
 *    *****************************************************************************
 *
 */
 package org.deeplearning4j.nn.conf.layers;
 import java.util.Arrays;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import lombok.*;
 import lombok.experimental.Accessors;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.util.ValidationUtils;
 /**
 * ConvolutionLayer nIn in the input layer is the number of channels nOut is the number of filters
 * to be used in the net or in other words the channels The builder specifies the filter/kernel
 * size, the stride and padding The pooling layer takes the kernel size
 *
 * <p>Supports multiple dimensions: In 1D CNN, kernel moves in 1 direction. Input and output data of
 * 1D CNN is 2 dimensional. Mostly used on Time-Series data.
 *
 * <p>In 2D CNN, kernel moves in 2 directions. Input and output data of 2D CNN is 3 dimensional.
 * Mostly used on Image data.
 *
 * <p>In 3D CNN, kernel moves in 3 directions. Input and output data of 3D CNN is 4 dimensional.
 * Mostly used on 3D Image data (MRI, CT Scans, Video).
 */
@ToString(callSuper = true)
@NoArgsConstructor
@EqualsAndHashCode(callSuper = true)
@Slf4j
@SuperBuilder
 public abstract class AbstractConvolutionLayer extends FeedForwardLayer {
  /** The kernel of this convolution with size in each n-dimensions */
  @Getter private int[] kernelSize;
  /** The stride */
  @Getter private int[] stride;
  /** The padding */
  @Getter private int[] padding;
  /** The dilation */
  @Getter private int[] dilation;
  /** If true (default): include bias parameters in the model. False: no bias. */
  @Builder.Default
  @Getter
  @Accessors(fluent = true)
  @Setter
  private boolean hasBias = true;
  /**
   * Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
   * details Default is {@link ConvolutionMode}.Truncate.
   */
  @Builder.Default @Getter @Setter
  private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
  /**
   * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
   * be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
   * false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
   */
  @Getter @Setter @Builder.Default private boolean cudnnAllowFallback = true;
  /** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
  @Getter @Setter @Builder.Default private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST;
  @Getter @Setter private ConvolutionLayer.FwdAlgo cudnnFwdAlgo;
  @Getter @Setter private ConvolutionLayer.BwdFilterAlgo cudnnBwdFilterAlgo;
  @Getter @Setter private ConvolutionLayer.BwdDataAlgo cudnnBwdDataAlgo;
  /**
   * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
   * See {@link CNN2DFormat} for more details.<br>
   * Default: NCHW
   *
   * @param format Format for activations (in and out)
   */
  @Builder.Default @Getter @Setter
  private CNN2DFormat convFormat =
          CNN2DFormat.NCHW; // default value for legacy serialization reasons
  /**
   * Number of parameters this layer has a result of its configuration.
   *
   * @return number or parameters
   */
  @Override
  public long numParams() {
    var kern = 1;
    for (int i : getKernelSize()) {
      kern = kern * i;
    }
    return nIn * nOut * kern + (hasBias() ? nOut : 0);
  }
  public abstract static class AbstractConvolutionLayerBuilder<
          C extends AbstractConvolutionLayer, B extends AbstractConvolutionLayerBuilder<C, B>>
      extends FeedForwardLayerBuilder<C, B> {
    public B kernelSize(int @NonNull ... kernelSize) {
      if (this.kernelSize != null) {
        log.warn("You are setting the kernel more than once, last call with override prior calls.");
      }
      this.kernelSize = kernelSize;
      return self();
    }
    public B stride(int @NonNull ... stride) {
      this.stride = stride;
      return self();
    }
    public B padding(int @NonNull ... padding) {
      this.padding = padding;
      return self();
    }
    public B dilation(int @NonNull ... dilation) {
      this.dilation = dilation;
      return self();
    }
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BaseLayerConfiguration.java
@ -52,6 +52,16 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
@SuperBuilder
 public abstract class BaseLayerConfiguration extends LayerConfiguration
    implements ITraininableLayerConfiguration, Serializable, Cloneable {
  /**
   * Number of parameters this layer has a result of its configuration. This default implementation
   * calls {@link #initializer()}.numParams( this ).
   *
   * @return number or parameters
   */
  @Override
  public long numParams() {
    return initializer().numParams(this);
  }
  /**
   * Set constraints to be applied to all layers. Default: no constraints.<br>
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CapsuleLayer.java
@ -45,6 +45,7 @@ import org.nd4j.linalg.factory.Nd4j;
@NoArgsConstructor
 public class CapsuleLayer extends SameDiffLayer {
    private static final String WEIGHT_PARAM = "weight";
    private static final String BIAS_PARAM = "bias";
    /**
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1D.java
@ -36,22 +36,17 @@ import org.deeplearning4j.util.ValidationUtils;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
-/*
+
 //TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
 //Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
 /**
 * This approach treats a multivariate time series with L time steps and
 * P variables as an L x 1 x P image (L rows high, 1 column wide, P
 * channels deep). The kernel should be H<L pixels high and W=1 pixels
 * wide.
-
+ *
-In 1D CNN, kernel moves in 1 direction.
+ * In 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions.
-Input and output data of 1D CNN is 2 dimensional. Mostly used on Time-Series data.
+ * Input and output data of 1D CNN is 2-dimensional. Mostly used on Time-Series data.
 In 2D CNN, kernel moves in 2 directions.
 Input and output data of 2D CNN is 3 dimensional. Mostly used on Image data.
 In 3D CNN, kernel moves in 3 directions.
 Input and output data of 3D CNN is 4 dimensional. Mostly used on 3D Image data (MRI, CT Scans, Video).
 */
@Data
@ToString(callSuper = true)
@ -223,7 +218,7 @@ public class Convolution1D extends ConvolutionLayer {
  }
  public abstract static class Convolution1DBuilder<
-          C extends ConvolutionLayer, B extends Convolution1DBuilder<C, B>>
+          C extends Convolution1D, B extends Convolution1DBuilder<C, B>>
      extends ConvolutionLayerBuilder<C, B> {
    @Override
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Convolution1DNew.java
@ -0,0 +1,250 @@
 /*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */
 package org.deeplearning4j.nn.conf.layers;
 import java.util.Collection;
 import java.util.Map;
 import lombok.*;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.ParamInitializer;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
 import org.deeplearning4j.nn.params.ConvolutionNewParamInitializer;
 import org.deeplearning4j.optimize.api.TrainingListener;
 import org.deeplearning4j.util.Convolution1DUtils;
 import org.deeplearning4j.util.ValidationUtils;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 // TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
 // Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
 /**
 * This approach treats a multivariate time series with L time steps and P variables as an L x 1 x P
 * image (L rows high, 1 column wide, P channels deep). The kernel should be H<L pixels high and W=1
 * pixels wide.
 *
 * <p>In 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions. Input and output data of
 * 1D CNN is 2-dimensional. Mostly used on Time-Series data.
 */
@Data
@Slf4j
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
@Jacksonized
@SuperBuilder
 public class Convolution1DNew extends AbstractConvolutionLayer {
  /**
   * Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
   * See {@link CNN2DFormat} for more details.<br>
   * Default: NCHW
   *
   * @param format Format for activations (in and out)
   */
  @Builder.Default
  protected CNN2DFormat dataFormat =
      CNN2DFormat.NCHW; // default value for legacy serialization reasons
  @Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW;
  @Override
  public ParamInitializer initializer() {
    return ConvolutionNewParamInitializer.getInstance();
  }
  @Override
  public org.deeplearning4j.nn.api.Layer instantiate(
      NeuralNetConfiguration conf,
      Collection<TrainingListener> trainingListeners,
      int layerIndex,
      INDArray layerParamsView,
      boolean initializeParams,
      DataType networkDataType) {
    setNetConfiguration(conf);
    LayerValidation.assertNInNOutSet("Convolution1D", getName(), layerIndex, getNIn(), getNOut());
    LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
    /*
       Object ret;
       try {
       ret = lconf.getCanConfigure()
               .getConstructor(LayerConfiguration.class, DataType.class)
               .newInstance(new Object[] { lconf, networkDataType });
       } catch (Exception e) {
         throw new RuntimeException(e);
    */
    org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer ret =
        new org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer(lconf, networkDataType);
    ret.addTrainingListeners(trainingListeners);
    ret.setIndex(layerIndex);
    ret.setParamsViewArray(layerParamsView);
    Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
    ret.setParamTable(paramTable);
    ret.setLayerConfiguration(this);
    return ret;
  }
  @Override
  public InputType getOutputType(int layerIndex, InputType inputType) {
    if (inputType == null || inputType.getType() != InputType.Type.RNN) {
      throw new IllegalStateException(
          "Invalid input for 1D CNN layer (layer index = "
              + layerIndex
              + ", layer name = \""
              + getName()
              + "\"): expect RNN input type with size > 0. Got: "
              + inputType);
    }
    InputType.InputTypeRecurrent it = (InputType.InputTypeRecurrent) inputType;
    long inputTsLength = it.getTimeSeriesLength();
    long outLength;
    if (inputTsLength < 0) {
      // Probably: user did InputType.recurrent(x) without specifying sequence length
      outLength = -1;
    } else {
      outLength =
          Convolution1DUtils.getOutputSize(
              inputTsLength,
              getKernelSize()[0],
              getStride()[0],
              getPadding()[0],
              getConvolutionMode(),
              getDilation()[0]);
    }
    return InputType.recurrent(nOut, outLength, rnnDataFormat);
  }
  @Override
  public void setNIn(InputType inputType, boolean override) {
    if (inputType == null || inputType.getType() != InputType.Type.RNN) {
      throw new IllegalStateException(
          "Invalid input for 1D CNN layer (layer name = \""
              + getName()
              + "\"): expect RNN input type with size > 0. Got: "
              + inputType);
    }
    InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
    if (nIn <= 0 || override) {
      this.nIn = r.getSize();
    }
    if (this.rnnDataFormat == null || override) this.rnnDataFormat = r.getFormat();
    if (this.dataFormat == null || override)
      this.dataFormat = rnnDataFormat == RNNFormat.NCW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC;
  }
  @Override
  public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
    if (inputType == null) {
      throw new IllegalStateException(
          "Invalid input for Convolution1D layer (layer name=\""
              + getName()
              + "\"): input is null");
    }
    return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat, getName());
  }
  /**
   * This is a report of the estimated memory consumption for the given layer
   *
   * @param inputType Input type to the layer. Memory consumption is often a function of the input
   *     type
   * @return Memory report for the layer
   */
  @Override
  public LayerMemoryReport getMemoryReport(InputType inputType) {
    return null;
  }
  protected boolean allowCausal() {
    return true;
  }
  private static final class Convolution1DNewBuilderImpl
      extends Convolution1DNewBuilder<Convolution1DNew, Convolution1DNewBuilderImpl> {
    public Convolution1DNew build() {
      Convolution1DNew l = new Convolution1DNew(this);
      if (l.getDilation() == null) {
        dilation(1, 1);
      }
      if (l.getPadding() == null) {
        padding(0);
      }
      l = new Convolution1DNew(this);
      Convolution1DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()[0]);
      Convolution1DUtils.validateCnn1DKernelStridePadding(
          l.getKernelSize()[0], l.getStride()[0], l.getPadding()[0]);
      l.initializeConstraints();
      return l;
    }
  }
  public abstract static class Convolution1DNewBuilder<
          C extends Convolution1DNew, B extends Convolution1DNewBuilder<C, B>>
      extends AbstractConvolutionLayerBuilder<C, B> {
    private int dimensions(Class arrayType) {
      return arrayType.isArray() ? 1 + dimensions(arrayType.getComponentType()) : 0;
    }
    @Override
    public B kernelSize(int @NonNull ... kernel) {
      // Todo, we always provide arrays, but only first element is really used
      if (dimensions(kernel.getClass()) > 1)
        log.warn(
            "Kernel size has '{}' dimensions, only using first dimensions for 1D convolution layer.",
            dimensions(kernel.getClass()));
      super.kernelSize(
          ValidationUtils.validate1NonNegative(new int[] {kernel[0]}, "kernelSize")[0], 1);
      return self();
    }
    public B padding(int @NonNull ... padding) {
      // Todo, we always provide arrays, but only first element is really used
      super.padding(ValidationUtils.validate1NonNegative(new int[] {padding[0]}, "padding"));
      return self();
    }
    public B dilation(int @NonNull ... dilation) {
      // Todo, we always provide arrays, but only first element is really used
      super.dilation(ValidationUtils.validate1NonNegative(new int[] {dilation[0]}, "dilation"));
      return self();
    }
    public B stride(int @NonNull ... stride) {
      // Todo, we always provide arrays, but only first element is really used
      super.stride(ValidationUtils.validate1NonNegative(new int[] {stride[0]}, "stride")[0], 1);
      return self();
    }
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
@ -61,6 +61,23 @@ import org.nd4j.linalg.api.ndarray.INDArray;
@EqualsAndHashCode(callSuper = true)
@SuperBuilder(builderMethodName = "innerBuilder")
 public class ConvolutionLayer extends FeedForwardLayer {
  public static ConvolutionLayerBuilder<?, ?> builder() {
    return innerBuilder();
  }
  public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
    return innerBuilder().kernelSize(kernelSize);
  }
  public static ConvolutionLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
    return innerBuilder().kernelSize(kernelSize).stride(stride);
  }
  public static ConvolutionLayerBuilder<?, ?> builder(
          int[] kernelSize, int[] stride, int[] padding) {
    return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding);
  }
  /**
   * Size of the convolution rows/columns
   *
@ -122,23 +139,6 @@ public class ConvolutionLayer extends FeedForwardLayer {
  @Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter
  private boolean defaultValueOverriden = false;
  public static ConvolutionLayerBuilder<?, ?> builder() {
    return innerBuilder();
  }
  public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
    return innerBuilder().kernelSize(kernelSize);
  }
  public static ConvolutionLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
    return innerBuilder().kernelSize(kernelSize).stride(stride);
  }
  public static ConvolutionLayerBuilder<?, ?> builder(
      int[] kernelSize, int[] stride, int[] padding) {
    return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding);
  }
  public boolean hasBias() {
    return hasBias;
  }
@ -429,6 +429,7 @@ public class ConvolutionLayer extends FeedForwardLayer {
    }
  }
  /*
  private static final class ConvolutionLayerBuilderImpl
      extends ConvolutionLayerBuilder<ConvolutionLayer, ConvolutionLayerBuilderImpl> {
    public ConvolutionLayer build() {
@ -473,6 +474,6 @@ public class ConvolutionLayer extends FeedForwardLayer {
      return l;
    }
  }
-
+*/
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Deconvolution2D.java
@ -38,56 +38,24 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 /**
- * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of filters to be used
+ * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of
- * in the net or in other words the channels The builder specifies the filter/kernel size, the stride and padding
+ * filters to be used in the net or in other words the channels The builder specifies the
- * The pooling layer takes the kernel size
+ * filter/kernel size, the stride and padding The pooling layer takes the kernel size
 */
@Data
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
-@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder")
+@Jacksonized
@SuperBuilder
 public class Deconvolution2D extends ConvolutionLayer {
  @Builder.Default private CNN2DFormat format = CNN2DFormat.NCHW;
@Builder.Default
 private CNN2DFormat format = CNN2DFormat.NCHW;
  protected boolean allowCausal() {
    // Causal convolution - allowed for 1D only
    return false;
  }
    private static final class Deconvolution2DBuilderImpl extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
        public Deconvolution2D build() {
            Deconvolution2D l = new Deconvolution2D(this);
            l.initializeConstraints();
            return l;
        }
    }
    public static abstract class Deconvolution2DBuilder<C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>> extends ConvolutionLayerBuilder<C, B> {
        @Override
        public B kernelSize(int... kernelSize) {
            super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
            return self();
        }
        @Override
        public B stride(int... stride) {
            super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
            return self();
        }
        @Override
        public B padding(int... padding) {
            super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
            return self();
        }
        @Override
        public B dilation(int... dilation) {
            super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
            return self();
        }
    }
  public boolean hasBias() {
    return isHasBias();
  }
@ -108,8 +76,13 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
  }
  @Override
-    public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
+  public Layer instantiate(
-                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
+      NeuralNetConfiguration conf,
      Collection<TrainingListener> trainingListeners,
      int layerIndex,
      INDArray layerParamsView,
      boolean initializeParams,
      DataType networkDataType) {
    setNetConfiguration(conf);
    LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut());
    LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
@ -135,13 +108,61 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
  @Override
  public InputType getOutputType(int layerIndex, InputType inputType) {
    if (inputType == null || inputType.getType() != InputType.Type.CNN) {
-            throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getName()
+      throw new IllegalStateException(
-                            + "\"): Expected CNN input, got " + inputType);
+          "Invalid input for Convolution layer (layer name=\""
              + getName()
              + "\"): Expected CNN input, got "
              + inputType);
    }
-        return InputTypeUtil.getOutputTypeDeconvLayer(inputType, getKernelSize(), getStride(), getPadding(), getDilation(), getConvolutionMode(),
+    return InputTypeUtil.getOutputTypeDeconvLayer(
-                        nOut, layerIndex, getName(), Deconvolution2DLayer.class);
+        inputType,
        getKernelSize(),
        getStride(),
        getPadding(),
        getDilation(),
        getConvolutionMode(),
        nOut,
        layerIndex,
        getName(),
        Deconvolution2DLayer.class);
  }
-
+  private static final class Deconvolution2DBuilderImpl
      extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
    public Deconvolution2D build() {
      Deconvolution2D l = new Deconvolution2D(this);
      l.initializeConstraints();
      return l;
    }
  }
  public abstract static class Deconvolution2DBuilder<
          C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>>
      extends ConvolutionLayerBuilder<C, B> {
    @Override
    public B kernelSize(int... kernelSize) {
      super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
      return self();
    }
    @Override
    public B stride(int... stride) {
      super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
      return self();
    }
    @Override
    public B padding(int... padding) {
      super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
      return self();
    }
    @Override
    public B dilation(int... dilation) {
      super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
      return self();
    }
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/DenseLayer.java
@ -63,6 +63,7 @@ public class DenseLayer extends FeedForwardLayer {
    LayerValidation.assertNInNOutSet(
        "DenseLayerConfiguration", getName(), layerIndex, getNIn(), getNOut());
    LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
    lconf.setNetConfiguration(conf);
    runInheritance();
    org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret =
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LayerConfiguration.java
@ -31,6 +31,7 @@ import lombok.experimental.SuperBuilder;
 import lombok.extern.slf4j.Slf4j;
 import net.brutex.ai.dnn.api.ILayerConfiguration;
 import net.brutex.ai.dnn.api.LayerType;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.ParamInitializer;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
@ -56,7 +57,7 @@ import org.nd4j.linalg.learning.regularization.Regularization;
@NoArgsConstructor
 // @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id")
@Slf4j
-@SuperBuilder
+@SuperBuilder(toBuilder = true)
 public abstract class LayerConfiguration
    implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration
@ -66,10 +67,20 @@ public abstract class LayerConfiguration
  @Getter @Setter protected List<LayerConstraint> biasConstraints;
  @Getter @Setter protected List<LayerConstraint> constraints;
  @Getter @Setter protected IWeightNoise weightNoise;
-  @Builder.Default private @Getter @Setter LinkedHashSet<String> variables = new LinkedHashSet<>();
+  @Builder.Default private @Getter @Setter @NonNull LinkedHashSet<String> variables = new LinkedHashSet<>();
  @Getter @Setter private IDropout dropOut;
  /** The type of the layer, basically defines the base class and its properties */
  @Builder.Default @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN;
  /**
   * Number of parameters this layer has a result of its configuration
   * @return number or parameters
   */
  public long numParams() {
    return initializer().numParams(this);
  }
  /**
   * A reference to the neural net configuration. This field is excluded from json serialization as
   * well as from equals check to avoid circular referenced.
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocallyConnected1D.java
@ -22,6 +22,8 @@ package org.deeplearning4j.nn.conf.layers;
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import java.util.*;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import lombok.*;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
@ -59,10 +61,12 @@ public class LocallyConnected1D extends SameDiffLayer {
  /**
   * @param nIn Number of inputs to the layer (input size)
   */
  @JsonProperty("nIn")
  private long nIn;
  /**
   * @param nOut Number of outputs (output size)
   */
  @JsonProperty("nOut")
  private long nOut;
  /**
   * @param activation Activation function for the layer
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/NoParamLayer.java
@ -34,6 +34,16 @@ import org.nd4j.linalg.learning.regularization.Regularization;
@SuperBuilder
 public abstract class NoParamLayer extends LayerConfiguration {
  /**
   * Number of parameters this layer. This will always return 0
   *
   * @return 0
   */
  @Override
  public long numParams() {
    return 0;
  }
  @Override
  public ParamInitializer initializer() {
    return EmptyParamInitializer.getInstance();
@ -58,6 +68,7 @@ public abstract class NoParamLayer extends LayerConfiguration {
  /**
   * Will always return no-Op updater.
   *
   * @return
   */
  @Override
@ -65,7 +76,7 @@ public abstract class NoParamLayer extends LayerConfiguration {
    return Updater.NONE.getIUpdaterWithDefaultConfig();
  }
-  public static abstract class NoParamLayerBuilder<C extends NoParamLayer, B extends NoParamLayerBuilder<C,B>>
+  public abstract static class NoParamLayerBuilder<
-    extends LayerConfigurationBuilder<C,B>
+          C extends NoParamLayer, B extends NoParamLayerBuilder<C, B>>
-  {}
+      extends LayerConfigurationBuilder<C, B> {}
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling1DLayer.java
@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers;
 import java.util.Collection;
 import java.util.Map;
 import lombok.EqualsAndHashCode;
 import lombok.NonNull;
 import lombok.ToString;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
@ -35,6 +36,7 @@ import org.deeplearning4j.optimize.api.TrainingListener;
 import org.deeplearning4j.util.Convolution1DUtils;
 import org.deeplearning4j.util.Convolution2DUtils;
 import org.deeplearning4j.util.ValidationUtils;
 import org.jetbrains.annotations.NotNull;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -50,9 +52,91 @@ import org.nd4j.linalg.api.ndarray.INDArray;
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
@Jacksonized
-@SuperBuilder
+@SuperBuilder(builderMethodName = "innerBuilder")
 public class Subsampling1DLayer extends SubsamplingLayer {
  public static Subsampling1DLayerBuilder<?, ?> builder() {
    return innerBuilder();
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) {
    return innerBuilder()
            .poolingType(poolingType);
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType) {
    return innerBuilder()
            .poolingType(poolingType.toPoolingType());
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(int... kernelSize) {
    return innerBuilder()
            .kernelSize(kernelSize);
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
    return innerBuilder()
            .kernelSize(kernelSize)
            .stride(stride);
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride, int[] padding) {
    return innerBuilder()
            .kernelSize(kernelSize)
            .stride(stride)
            .padding(padding);
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) {
    return innerBuilder()
            .poolingType(poolingType.toPoolingType())
            .kernelSize(kernelSize)
            .stride(stride)
            .padding(padding)
            ;
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) {
    return innerBuilder()
            .poolingType(poolingType)
            .kernelSize(kernelSize)
            .stride(stride)
            .padding(padding)
            ;
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) {
    return innerBuilder()
            .poolingType(poolingType)
            .kernelSize(kernelSize)
            ;
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize) {
    return innerBuilder()
            .poolingType(poolingType.toPoolingType())
            .kernelSize(kernelSize)
            ;
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride) {
    return innerBuilder()
            .poolingType(poolingType)
            .kernelSize(kernelSize)
            .stride(stride)
            ;
  }
  public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize, int[] stride) {
    return innerBuilder()
            .poolingType(poolingType.toPoolingType())
            .kernelSize(kernelSize)
            .stride(stride)
            ;
  }
  @Override
  public org.deeplearning4j.nn.api.Layer instantiate(
      NeuralNetConfiguration conf,
@ -176,20 +260,20 @@ public class Subsampling1DLayer extends SubsamplingLayer {
   * @return
 */
    @Override
-    public B kernelSize(int... kernelSize) {
+    public B kernelSize(int @NonNull ... kernelSize) {
-      super.kernelSize( ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]);
+      super.kernelSize(ValidationUtils.validate1NonNegative(new int[]{kernelSize[0]}, "kernelSize")[0]); //fix width = 1
      return self();
    }
    @Override
-    public B stride(int... stride) {
+    public B stride(@NotNull int... stride) {
-      super.stride( ValidationUtils.validate1NonNegative(stride, "stride")[0]);
+      super.stride( ValidationUtils.validate1NonNegative(new int[]{stride[0]}, "stride")[0]);
      return self();
    }
    @Override
-    public B padding(int... padding) {
+    public B padding(@NotNull int... padding) {
-      super.padding( ValidationUtils.validate1NonNegative(padding, "padding")[0]);
+      super.padding( ValidationUtils.validate1NonNegative(new int[]{padding[0]}, "padding"));
      return self();
    }
  }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
@ -27,10 +27,7 @@ import lombok.*;
 import lombok.experimental.SuperBuilder;
 import lombok.extern.jackson.Jacksonized;
 import org.deeplearning4j.nn.api.ParamInitializer;
-import org.deeplearning4j.nn.conf.CNN2DFormat;
+import org.deeplearning4j.nn.conf.*;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
 import org.deeplearning4j.nn.conf.memory.MemoryReport;
@ -84,6 +81,7 @@ public class SubsamplingLayer extends NoParamLayer {
   * @param padding padding in the height and width dimensions
   */
  @Builder.Default protected int[] padding = new int[] {0, 0};
   protected int pnorm;
  @Builder.Default protected double eps = 1e-8;
  /**
@ -104,6 +102,7 @@ public class SubsamplingLayer extends NoParamLayer {
   */
  protected @Builder.Default CNN2DFormat dataFormat =
      CNN2DFormat.NCHW; // default value for legacy reasons
  protected @Builder.Default RNNFormat rnnFormat = RNNFormat.NCW;
  /**
   * When doing average pooling, should the padding values be included in the divisor or not?<br>
   * Not applicable for max and p-norm pooling.<br>
@ -127,6 +126,7 @@ public class SubsamplingLayer extends NoParamLayer {
   *     average pooling
   */
  @Builder.Default protected boolean avgPoolIncludePadInDivisor = true;
  /**
   * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated
   * convolutions, which are also known as atrous convolutions.<br>
@ -301,7 +301,7 @@ public class SubsamplingLayer extends NoParamLayer {
  public void setNIn(InputType inputType, boolean override) {
    // No op: subsampling layer doesn't have nIn value
    if (!defaultValueOverridden || override) {
-      this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat();
+      this.rnnFormat = ((InputType.InputTypeRecurrent) inputType).getFormat();
      defaultValueOverridden = true;
    }
  }
@ -355,14 +355,6 @@ public class SubsamplingLayer extends NoParamLayer {
        .build();
  }
  public int getPnorm() {
    return pnorm;
  }
  public double getEps() {
    return eps;
  }
  public enum PoolingType {
    MAX,
    AVG,
@ -394,33 +386,33 @@ public class SubsamplingLayer extends NoParamLayer {
      return self();
    }
-    public B eps(int eps) {
+    public B eps(double eps) {
      ValidationUtils.validateNonNegative(eps, "eps");
      this.eps$value = eps;
      this.eps$set = true;
      return self();
    }
-    public B kernelSize(int... kernelSize) {
+    public B kernelSize(int @NonNull... kernelSize) {
-      this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize");
+      this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize");
      this.kernelSize$set = true;
      return self();
    }
-    public B stride(int... stride) {
+    public B stride(int @NonNull ... stride) {
-      this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride");
+      this.stride$value = ValidationUtils.validate2NonNegative(stride, true, "stride");
      this.stride$set = true;
      return self();
    }
-    public B padding(int... padding) {
+    public B padding(int @NonNull ... padding) {
-      this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding");
+      this.padding$value = ValidationUtils.validate2NonNegative(padding, true, "padding");
      this.padding$set = true;
      return self();
    }
-    public B dilation(int... dilation) {
+    public B dilation(int @NonNull ... dilation) {
-      this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation");
+      this.dilation$value = ValidationUtils.validate2NonNegative(dilation, true, "dilation");
      this.dilation$set = true;
      return self();
    }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/misc/FrozenLayer.java
@ -74,6 +74,7 @@ public class FrozenLayer extends LayerConfiguration {
      boolean initializeParams,
      DataType networkDataType) {
    innerConfiguration.setNetConfiguration(conf);
    // Need to be able to instantiate a layer, from a config - for JSON -> net type situations
    org.deeplearning4j.nn.api.Layer underlying =
        innerConfiguration.instantiate(
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/AbstractSameDiffLayer.java
@ -20,6 +20,7 @@
 package org.deeplearning4j.nn.conf.layers.samediff;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@ -52,7 +53,8 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
@EqualsAndHashCode(callSuper = true, doNotUseGetters = true)
@NoArgsConstructor
@SuperBuilder
-public abstract class AbstractSameDiffLayer extends LayerConfiguration {
+public abstract class AbstractSameDiffLayer extends LayerConfiguration
    implements org.deeplearning4j.nn.api.ITraininableLayerConfiguration {
  /**
   * The regularization for the parameters (excluding biases) - for example {@link WeightDecay}
@ -63,16 +65,14 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
   * @param regularization Regularization to apply for the network parameters/weights (excluding
   *     biases)
   */
-  @Getter
+  @Getter protected List<Regularization> regularization;
  protected List<Regularization> regularization;
  /**
   * The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the
   * regularization for the biases only - for example {@link WeightDecay}
   *
   * @param regularizationBias Regularization to apply for the network biases only
   */
-  @Getter
+  @Getter protected List<Regularization> regularizationBias;
  protected List<Regularization> regularizationBias;
  /**
   * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
   * org.nd4j.linalg.learning.config.Nesterovs}
@ -87,20 +87,22 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
   * @param biasUpdater Updater to use for bias parameters
   */
  protected @Getter @Setter IUpdater biasUpdater;
-@Getter  @Setter
+
-  protected GradientNormalization gradientNormalization;
+  @Getter @Setter protected GradientNormalization gradientNormalization;
-@Getter @Setter
+  @Getter @Setter protected double gradientNormalizationThreshold = Double.NaN;
-  protected double gradientNormalizationThreshold = Double.NaN;
+  @Getter @Setter private SDLayerParams layerParams;
-@Getter @Setter
+
-  private SDLayerParams layerParams;
+  @Getter @Setter private DataType dataType;
  @Override
  public void runInheritance(@NotNull NeuralNetConfiguration conf) {
    super.runInheritance(conf);
    if (this.biasUpdater == null) this.biasUpdater = conf.getBiasUpdater();
    if (this.updater == null) this.updater = conf.getUpdater();
-    if (this.regularizationBias == null || regularizationBias.isEmpty()) this.regularizationBias = conf.getRegularizationBias();
+    if (this.regularizationBias == null || regularizationBias.isEmpty())
-    if (this.regularization == null || regularization.isEmpty()) this.regularization = conf.getRegularization();
+      this.regularizationBias = conf.getRegularizationBias();
    if (this.regularization == null || regularization.isEmpty())
      this.regularization = conf.getRegularization();
    // if( this.weightInit == null) this.weightInit = conf.getWeightInit();
    if (this.gradientNormalization == null)
      this.gradientNormalization = conf.getGradientNormalization();
@ -109,6 +111,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
      this.gradientNormalizationThreshold = conf.getGradientNormalizationThreshold();
    }
  }
  @Override
  public List<Regularization> getRegularizationByParam(String paramName) {
    if (layerParams.isWeightParam(paramName)) {
@ -119,6 +122,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
    return null;
  }
  @JsonIgnore
  public SDLayerParams getLayerParams() {
    if (layerParams == null) {
      layerParams = new SDLayerParams();
@ -138,7 +142,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
    return null;
  }
  /**
   * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String,
   * long...)} and {@link SDLayerParams#addBiasParam(String, long...)}
@ -207,7 +210,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
        fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array);
  }
  /**
   * This method generates an "all ones" mask array for use in the SameDiff model when none is
   * provided.
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
@ -80,10 +80,8 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
  protected DataType dataType;
  protected @Getter @Setter int iterationCount;
  protected @Getter @Setter int epochCount;
-  @JsonIgnore
+  @JsonIgnore private @Getter @Setter IModel net;
-  private @Getter @Setter IModel net;
+  @JsonIgnore @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration;
  @JsonIgnore
  @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration;
  public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) {
    //noinspection unchecked
@ -95,19 +93,18 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
    }
    this.dataType = dataType;
    if (layerConfiguration.getNetConfiguration() == null) {
-      throw new RuntimeException("You cannot create a layer from a layer configuration, that is not part of any neural network configuration.");
+      throw new RuntimeException(
          "You cannot create a layer from a layer configuration, that is not part of any neural network configuration.");
    }
    this.net = layerConfiguration.getNetConfiguration().getNet();
  }
  public void addTrainingListeners(TrainingListener... listeners) {
-    if(listeners != null)
+    if (listeners != null) trainingListeners.addAll(List.of(listeners));
    trainingListeners.addAll(List.of(listeners));
  }
  public void addTrainingListeners(Collection<TrainingListener> listeners) {
-    if(listeners != null)
+    if (listeners != null) trainingListeners.addAll(listeners);
    trainingListeners.addAll(listeners);
  }
  @Override
@ -565,7 +562,8 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
   */
  @Override
  public void setParamTable(Map<String, INDArray> paramTable) {
-    log.warn("Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName());
+    log.warn(
        "Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName());
    // throw new RuntimeException("Not implemented");
  }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
@ -662,6 +662,7 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>
   */
  public boolean hasBias() {
    // Overridden by layers supporting no bias mode: dense, output, convolutional, embedding
    //return true;
    return true;
  }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/FrozenLayer.java
@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.ITraininableLayerConfiguration;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.conf.CacheMode;
 import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration;
 import org.deeplearning4j.nn.conf.misc.DummyConfig;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
@ -88,6 +89,8 @@ public class FrozenLayer extends BaseWrapperLayer {
        return underlying.activate(input, false, workspaceMgr);
    }
    @Override
    public void fit() {
        if (!logFit) {
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DLayer.java
@ -51,21 +51,26 @@ public class Convolution1DLayer extends ConvolutionLayer {
    super(conf, dataType);
  }
  @Override
-    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
+  public Pair<Gradient, INDArray> backpropGradient(
      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (epsilon.rank() != 3)
-            throw new DL4JInvalidInputException("Got rank " + epsilon.rank()
+      throw new DL4JInvalidInputException(
          "Got rank "
              + epsilon.rank()
              + " array as epsilon for Convolution1D backprop with shape "
              + Arrays.toString(epsilon.shape())
-                    + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + layerId());
+              + ". Expected rank 3 array with shape [minibatchSize, features, length]. "
              + layerId());
    Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr);
    IActivation afn = getTypedLayerConfiguration().getActivationFn();
-        INDArray delta = afn.backprop(fwd.getFirst(), epsilon).getFirst(); //TODO handle activation function params
+    INDArray delta =
        afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params
-        Convolution1D c = getTypedLayerConfiguration();
+    org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration();
-        Conv1DConfig conf = Conv1DConfig.builder()
+    Conv1DConfig conf =
        Conv1DConfig.builder()
            .k(c.getKernelSize()[0])
            .s(c.getStride()[0])
            .d(c.getDilation()[0])
@ -74,16 +79,18 @@ public class Convolution1DLayer extends ConvolutionLayer {
            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
            .build();
-        INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
+    INDArray w =
-                getParam(ConvolutionParamInitializer.WEIGHT_KEY),
+        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
-                RNNFormat.NCW);
+            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
    INDArray[] inputArrs;
    INDArray[] outputArrs;
-        INDArray wg = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
+    INDArray wg =
-                gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
+        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
-                getRnnDataFormat());
+            gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat());
-        INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
+    INDArray epsOut =
        workspaceMgr.createUninitialized(
            ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
    INDArray input = this.input.castTo(dataType);
    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
      input = input.permute(0, 2, 1); // NHWC to NCHW
@ -106,9 +113,14 @@ public class Convolution1DLayer extends ConvolutionLayer {
    Gradient retGradient = new DefaultGradient();
    if (getTypedLayerConfiguration().hasBias()) {
-            retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
+      retGradient.setGradientFor(
          ConvolutionParamInitializer.BIAS_KEY,
          gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
    }
-        retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), 'c');
+    retGradient.setGradientFor(
        ConvolutionParamInitializer.WEIGHT_KEY,
        gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
        'c');
    if (getRnnDataFormat() == RNNFormat.NWC) {
      epsOut = epsOut.permute(0, 2, 1);
    }
@ -116,7 +128,8 @@ public class Convolution1DLayer extends ConvolutionLayer {
  }
  @Override
-    protected Pair<INDArray, INDArray> preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+  protected Pair<INDArray, INDArray> preOutput4d(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
    INDArray p3d = preOutput.getFirst();
    INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
@ -125,7 +138,8 @@ public class Convolution1DLayer extends ConvolutionLayer {
  }
  @Override
-    protected Pair<INDArray,INDArray> preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+  protected Pair<INDArray, INDArray> preOutput(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(false);
    INDArray input = this.input.castTo(dataType);
@ -133,8 +147,9 @@ public class Convolution1DLayer extends ConvolutionLayer {
      input = input.permute(0, 2, 1); // NHWC to NCHW
    }
-        Convolution1D c = getTypedLayerConfiguration();
+    org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration();
-        Conv1DConfig conf = Conv1DConfig.builder()
+    Conv1DConfig conf =
        Conv1DConfig.builder()
            .k(c.getKernelSize()[0])
            .s(c.getStride()[0])
            .d(c.getDilation()[0])
@ -143,11 +158,9 @@ public class Convolution1DLayer extends ConvolutionLayer {
            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
            .build();
-
+    INDArray w =
-        INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
+        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
-                getParam(ConvolutionParamInitializer.WEIGHT_KEY)
+            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
                ,RNNFormat.NCW);
    INDArray[] inputs;
    if (getTypedLayerConfiguration().hasBias()) {
@ -171,39 +184,51 @@ public class Convolution1DLayer extends ConvolutionLayer {
    return new Pair<>(output, null);
  }
  @Override
  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
    INDArray act4d = super.activate(training, workspaceMgr);
-        INDArray act3d = act4d.rank() > 3 ?
+    INDArray act3d =
-                act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
+        act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
    if (maskArray != null) {
-            INDArray maskOut = feedForwardMaskArray(maskArray, MaskState.Active, (int)act3d.size(0)).getFirst();
+      INDArray maskOut =
-            Preconditions.checkState(act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
+          feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst();
      Preconditions.checkState(
          act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
          "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
-                    act3d.shape(), maskOut.shape());
+          act3d.shape(),
          maskOut.shape());
      Broadcast.mul(act3d, maskOut, act3d, 0, 2);
    }
-        return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, act3d);   //Should be zero copy most of the time
+    return workspaceMgr.leverageTo(
        ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time
  }
  @Override
-    public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
+  public Pair<INDArray, MaskState> feedForwardMaskArray(
-                                                          int minibatchSize) {
+      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
-        INDArray reduced = Convolution2DUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0],
+    INDArray reduced =
-                getTypedLayerConfiguration().getStride()[0], getTypedLayerConfiguration().getPadding()[0], getTypedLayerConfiguration().getDilation()[0],
+        Convolution2DUtils.cnn1dMaskReduction(
            maskArray,
            getTypedLayerConfiguration().getKernelSize()[0],
            getTypedLayerConfiguration().getStride()[0],
            getTypedLayerConfiguration().getPadding()[0],
            getTypedLayerConfiguration().getDilation()[0],
            getTypedLayerConfiguration().getConvolutionMode());
    return new Pair<>(reduced, currentMaskState);
  }
    @Override
    public Convolution1D getTypedLayerConfiguration() {
        return (Convolution1D)layerConfiguration;
    }
  private RNNFormat getRnnDataFormat() {
    return getTypedLayerConfiguration().getRnnDataFormat();
  }
 /**
 *
   * @return
 */
  @Override
  public Convolution1D getTypedLayerConfiguration() {
    return (Convolution1D) super.getTypedLayerConfiguration();
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/Convolution1DNewLayer.java
@ -0,0 +1,226 @@
 /*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */
 package org.deeplearning4j.nn.layers.convolution;
 import java.util.Arrays;
 import java.util.List;
 import org.deeplearning4j.exception.DL4JInvalidInputException;
 import org.deeplearning4j.nn.api.MaskState;
 import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
 import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
 import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
 import org.deeplearning4j.nn.workspace.ArrayType;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.deeplearning4j.util.Convolution1DUtils;
 import org.deeplearning4j.util.Convolution2DUtils;
 import org.nd4j.common.base.Preconditions;
 import org.nd4j.common.primitives.Pair;
 import org.nd4j.linalg.activations.IActivation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1D;
 import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1DDerivative;
 import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv1DConfig;
 import org.nd4j.linalg.api.shape.LongShapeDescriptor;
 import org.nd4j.linalg.factory.Broadcast;
 import org.nd4j.linalg.factory.Nd4j;
 public class Convolution1DNewLayer<Layer_ConfT extends Convolution1DNew>
    extends ConvolutionNewLayer<Layer_ConfT> {
  public Convolution1DNewLayer(LayerConfiguration conf, DataType dataType) {
    super(conf, dataType);
  }
  @Override
  public Pair<Gradient, INDArray> backpropGradient(
      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (epsilon.rank() != 3)
      throw new DL4JInvalidInputException(
          "Got rank "
              + epsilon.rank()
              + " array as epsilon for Convolution1D backprop with shape "
              + Arrays.toString(epsilon.shape())
              + ". Expected rank 3 array with shape [minibatchSize, features, length]. "
              + layerId());
    Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr);
    IActivation afn = getTypedLayerConfiguration().getActivationFn();
    INDArray delta =
        afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params
    Convolution1DNew c = getTypedLayerConfiguration();
    Conv1DConfig conf =
        Conv1DConfig.builder()
            .k(c.getKernelSize()[0])
            .s(c.getStride()[0])
            .d(c.getDilation()[0])
            .p(c.getPadding()[0])
            .dataFormat(Conv1DConfig.NCW)
            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
            .build();
    INDArray w =
        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
    INDArray[] inputArrs;
    INDArray[] outputArrs;
    INDArray wg =
        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
            gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat());
    INDArray epsOut =
        workspaceMgr.createUninitialized(
            ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
    INDArray input = this.input.castTo(dataType);
    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
      input = input.permute(0, 2, 1); // NHWC to NCHW
    }
    if (getTypedLayerConfiguration().hasBias()) {
      INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
      b = b.reshape(b.length());
      inputArrs = new INDArray[] {input, w, b, delta};
      INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
      bg = bg.reshape(bg.length());
      outputArrs = new INDArray[] {epsOut, wg, bg};
    } else {
      inputArrs = new INDArray[] {input, w, delta};
      outputArrs = new INDArray[] {epsOut, wg};
    }
    Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
    Nd4j.exec(op);
    Gradient retGradient = new DefaultGradient();
    if (getTypedLayerConfiguration().hasBias()) {
      retGradient.setGradientFor(
          ConvolutionParamInitializer.BIAS_KEY,
          gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
    }
    retGradient.setGradientFor(
        ConvolutionParamInitializer.WEIGHT_KEY,
        gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
        'c');
    if (getRnnDataFormat() == RNNFormat.NWC) {
      epsOut = epsOut.permute(0, 2, 1);
    }
    return new Pair<>(retGradient, epsOut);
  }
  @Override
  protected Pair<INDArray, INDArray> preOutput4d(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
    INDArray p3d = preOutput.getFirst();
    INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
    preOutput.setFirst(p);
    return preOutput;
  }
  @Override
  protected Pair<INDArray, INDArray> preOutput(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(false);
    INDArray input = this.input.castTo(dataType);
    if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
      input = input.permute(0, 2, 1); // NHWC to NCHW
    }
    Convolution1DNew c = getTypedLayerConfiguration();
    Conv1DConfig conf =
        Conv1DConfig.builder()
            .k(c.getKernelSize()[0])
            .s(c.getStride()[0])
            .d(c.getDilation()[0])
            .p(c.getPadding()[0])
            .dataFormat(Conv1DConfig.NCW)
            .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
            .build();
    INDArray w =
        Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
            getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
    INDArray[] inputs;
    if (getTypedLayerConfiguration().hasBias()) {
      INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
      b = b.reshape(b.length());
      inputs = new INDArray[] {input, w, b};
    } else {
      inputs = new INDArray[] {input, w};
    }
    Conv1D op = new Conv1D(inputs, null, conf);
    List<LongShapeDescriptor> outShape = op.calculateOutputShape();
    op.setOutputArgument(0, Nd4j.create(outShape.get(0), false));
    Nd4j.exec(op);
    INDArray output = op.getOutputArgument(0);
    if (getRnnDataFormat() == RNNFormat.NWC) {
      output = output.permute(0, 2, 1);
    }
    return new Pair<>(output, null);
  }
  @Override
  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
    INDArray act4d = super.activate(training, workspaceMgr);
    INDArray act3d =
        act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
    if (maskArray != null) {
      INDArray maskOut =
          feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst();
      Preconditions.checkState(
          act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
          "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
          act3d.shape(),
          maskOut.shape());
      Broadcast.mul(act3d, maskOut, act3d, 0, 2);
    }
    return workspaceMgr.leverageTo(
        ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time
  }
  @Override
  public Pair<INDArray, MaskState> feedForwardMaskArray(
      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
    INDArray reduced =
        Convolution2DUtils.cnn1dMaskReduction(
            maskArray,
            getTypedLayerConfiguration().getKernelSize()[0],
            getTypedLayerConfiguration().getStride()[0],
            getTypedLayerConfiguration().getPadding()[0],
            getTypedLayerConfiguration().getDilation()[0],
            getTypedLayerConfiguration().getConvolutionMode());
    return new Pair<>(reduced, currentMaskState);
  }
  private RNNFormat getRnnDataFormat() {
    return getTypedLayerConfiguration().getRnnDataFormat();
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
@ -20,7 +20,6 @@
 package org.deeplearning4j.nn.layers.convolution;
 import java.util.Arrays;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.common.config.DL4JClassLoading;
@ -29,6 +28,7 @@ import org.deeplearning4j.nn.api.MaskState;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.CacheMode;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
 import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
@ -51,7 +51,8 @@ import org.nd4j.linalg.exception.ND4JOpProfilerException;
 import org.nd4j.linalg.factory.Nd4j;
@Slf4j
-public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.ConvolutionLayer> {
+public class ConvolutionLayer
    extends BaseLayer<org.deeplearning4j.nn.conf.layers.ConvolutionLayer> {
  protected INDArray i2d;
  protected ConvolutionHelper helper = null;
@ -63,13 +64,22 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
  public ConvolutionLayer(LayerConfiguration conf, DataType dataType) {
    super(conf, dataType);
    initializeHelper();
-        convolutionMode = ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode();
+    if (conf instanceof Convolution1DNew) {
      convolutionMode =
              ((Convolution1DNew) conf).getConvolutionMode();
    } else
    if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) {
      convolutionMode =
          ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode();
    }
  }
  void initializeHelper() {
    String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend");
    if ("CUDA".equalsIgnoreCase(backend)) {
-            helper = DL4JClassLoading.createNewInstance(
+      helper =
          DL4JClassLoading.createNewInstance(
              "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper",
              ConvolutionHelper.class,
              dataType);
@ -94,14 +104,15 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
  }
  @Override
-    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
+  public Pair<Gradient, INDArray> backpropGradient(
      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
-        INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr);
+    INDArray weights =
        getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr);
    INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr);
    INDArray input = this.input.castTo(dataType); // No op if correct type
-        if(epsilon.dataType() != dataType)
+    if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType);
            epsilon = epsilon.castTo(dataType);
    INDArray origInput = input;
    INDArray origEps = epsilon;
@ -110,7 +121,6 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
      epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW
    }
    long miniBatch = input.size(0);
    int inH = (int) input.size(2);
    int inW = (int) input.size(3);
@ -126,23 +136,41 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
    int[] pad;
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
-            outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, CNN2DFormat.NCHW); //Also performs validation
+      outSize =
-            pad = Convolution2DUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation);
+          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
              null,
              convolutionMode,
              dilation,
              CNN2DFormat.NCHW); // Also performs validation
      pad =
          Convolution2DUtils.getSameModeTopLeftPadding(
              outSize, new int[] {inH, inW}, kernel, strides, dilation);
    } else {
      pad = getTypedLayerConfiguration().getPadding();
-            outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, CNN2DFormat.NCHW); //Also performs validation
+      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
              pad,
              convolutionMode,
              dilation,
              CNN2DFormat.NCHW); // Also performs validation
    }
    int outH = outSize[0];
    int outW = outSize[1];
    INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
-        INDArray weightGradView = gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY); //4d, c order. Shape: [outDepth,inDepth,kH,kW]
+    INDArray weightGradView =
-        INDArray weightGradView2df = Shape
+        gradientViews.get(
-                .newShapeNoCopy(weightGradView, new long[]{outDepth, inDepth * kH * kW}, false).transpose();
+            ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW]
-
+    INDArray weightGradView2df =
-
+        Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false)
            .transpose();
    INDArray delta;
    IActivation afn = getTypedLayerConfiguration().getActivationFn();
@ -155,7 +183,8 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
    }
    delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params
-        if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
+    if (helper != null
        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
      INDArray helperDelta = delta;
      if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC)
        helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC
@ -172,10 +201,25 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
      Pair<Gradient, INDArray> ret = null;
      try {
-                ret = helper.backpropGradient(origInput, weights, bias, helperDelta, kernel, strides,
+        ret =
-                        pad, biasGradView, weightGradView, afn,
+            helper.backpropGradient(
-                        getTypedLayerConfiguration().getCudnnAlgoMode(), getTypedLayerConfiguration().getCudnnBwdFilterAlgo(), getTypedLayerConfiguration().getCudnnBwdDataAlgo(),
+                origInput,
-                        convolutionMode, dilation, getTypedLayerConfiguration().getConvFormat(), workspaceMgr);
+                weights,
                bias,
                helperDelta,
                kernel,
                strides,
                pad,
                biasGradView,
                weightGradView,
                afn,
                getTypedLayerConfiguration().getCudnnAlgoMode(),
                getTypedLayerConfiguration().getCudnnBwdFilterAlgo(),
                getTypedLayerConfiguration().getCudnnBwdDataAlgo(),
                convolutionMode,
                dilation,
                getTypedLayerConfiguration().getConvFormat(),
                workspaceMgr);
      } catch (ND4JOpProfilerException e) {
        throw e; // NaN panic etc for debugging
      } catch (Exception e) {
@ -192,7 +236,9 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
          }
        } else {
-                    throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false", e);
+          throw new RuntimeException(
              "Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false",
              e);
        }
      }
@ -207,46 +253,82 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
    delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW]
-        //Note: due to the permute in preOut, and the fact that we essentially do a preOut.muli(epsilon), this reshape
+    // Note: due to the permute in preOut, and the fact that we essentially do a
-        // should be zero-copy; only possible exception being sometimes with the "identity" activation case
+    // preOut.muli(epsilon), this reshape
-        INDArray delta2d = delta.reshape('c', outDepth, miniBatch * outH * outW); //Shape.newShapeNoCopy(delta,new int[]{outDepth,miniBatch*outH*outW},false);
+    // should be zero-copy; only possible exception being sometimes with the "identity" activation
    // case
    INDArray delta2d =
        delta.reshape(
            'c',
            outDepth,
            miniBatch * outH
                * outW); // Shape.newShapeNoCopy(delta,new
                         // int[]{outDepth,miniBatch*outH*outW},false);
-        //Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
+    // Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input
-        //To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that
+    // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
    // To get this: create an array of the order we want, permute it to the order required by im2col
    // implementation, and then do im2col on that
    // to get old order from required order: permute(0,3,4,5,1,2)
-        INDArray im2col2d = p.getSecond(); //Re-use im2col2d array from forward pass if available; recalculate if not
+    INDArray im2col2d =
        p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not
    if (im2col2d == null) {
-            INDArray col = Nd4j.createUninitialized(dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
+      INDArray col =
          Nd4j.createUninitialized(
              dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
      INDArray col2 = col.permute(0, 3, 4, 5, 1, 2);
-            Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], dilation[0], dilation[1],
+      Convolution.im2col(
-                    convolutionMode == ConvolutionMode.Same, col2);
+          input,
          kH,
          kW,
          strides[0],
          strides[1],
          pad[0],
          pad[1],
          dilation[0],
          dilation[1],
          convolutionMode == ConvolutionMode.Same,
          col2);
      // Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape
      im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW);
    }
    // Calculate weight gradients, using cc->c mmul.
    // weightGradView2df is f order, but this is because it's transposed from c order
-        //Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c order, not usual f order
+    // Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c
    // order, not usual f order
    Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0);
-        //Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally in c order for some reason)
+    // Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally
-        INDArray wPermuted = weights.permute(3, 2, 1, 0); //Start with c order weights, switch order to f order
+    // in c order for some reason)
    INDArray wPermuted =
        weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order
    INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth);
-        //Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format before col2im reduction)
+    // Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format
    // before col2im reduction)
    // Note: cc -> f mmul here, then reshape to 6d in f order
-        INDArray epsNext2d = w2d.mmul(delta2d); //TODO can we reuse im2col array instead of allocating new result array?
+    INDArray epsNext2d =
-        INDArray eps6d = Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true);
+        w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array?
    INDArray eps6d =
        Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true);
    // Calculate epsilonNext by doing im2col reduction.
    // Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW]
    // currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first
    eps6d = eps6d.permute(5, 2, 1, 0, 4, 3);
-        INDArray epsNextOrig = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, eps6d.dataType(), new long[] {inDepth, miniBatch, inH, inW}, 'c');
+    INDArray epsNextOrig =
        workspaceMgr.createUninitialized(
            ArrayType.ACTIVATION_GRAD,
            eps6d.dataType(),
            new long[] {inDepth, miniBatch, inH, inW},
            'c');
-        //Note: we are execute col2im in a way that the output array should be used in a stride 1 muli in the layer below... (same strides as zs/activations)
+    // Note: we are execute col2im in a way that the output array should be used in a stride 1 muli
    // in the layer below... (same strides as zs/activations)
    INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3);
-        Convolution.col2im(eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]);
+    Convolution.col2im(
        eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]);
    Gradient retGradient = new DefaultGradient();
    if (getTypedLayerConfiguration().hasBias()) {
@ -267,11 +349,12 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
  }
  /**
-     * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain their standard
+   * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain
-     * non-4d preOutput method, while overriding this to return 4d activations (for use in backprop) without modifying
+   * their standard non-4d preOutput method, while overriding this to return 4d activations (for use
-     * the public API
+   * in backprop) without modifying the public API
   */
-    protected Pair<INDArray, INDArray> preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+  protected Pair<INDArray, INDArray> preOutput4d(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    return preOutput(training, forBackprop, workspaceMgr);
  }
@ -279,16 +362,23 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
    // Input validation: expect rank 4 matrix
    if (input.rank() != 4) {
      String layerName = layerConfiguration.getName();
-            if (layerName == null)
+      if (layerName == null) layerName = "(not named)";
-                layerName = "(not named)";
+      throw new DL4JInvalidInputException(
-            throw new DL4JInvalidInputException("Got rank " + input.rank()
+          "Got rank "
-                    + " array as input to ConvolutionLayer (layer name = " + layerName + ", layer index = "
+              + input.rank()
-                    + index + ") with shape " + Arrays.toString(input.shape()) + ". "
+              + " array as input to ConvolutionLayer (layer name = "
              + layerName
              + ", layer index = "
              + index
              + ") with shape "
              + Arrays.toString(input.shape())
              + ". "
              + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]."
              + (input.rank() == 2
                  ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)"
                  : "")
-                    + " " + layerId());
+              + " "
              + layerId());
    }
  }
@ -297,13 +387,26 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
    int dim = format == CNN2DFormat.NHWC ? 3 : 1;
    if (input.size(dim) != inDepth) {
      String layerName = layerConfiguration.getName();
-            if (layerName == null)
+      if (layerName == null) layerName = "(not named)";
                layerName = "(not named)";
-            String s = "Cannot do forward pass in Convolution layer (layer name = " + layerName
+      String s =
-                    + ", layer index = " + index + "): input array channels does not match CNN layer configuration"
+          "Cannot do forward pass in Convolution layer (layer name = "
-                    + " (data format = " + format + ", data input channels = " + input.size(dim) + ", " + getTypedLayerConfiguration().getConvFormat().dimensionNames()
+              + layerName
-                    + "=" + Arrays.toString(input.shape()) + "; expected" + " input channels = " + inDepth + ") "
+              + ", layer index = "
              + index
              + "): input array channels does not match CNN layer configuration"
              + " (data format = "
              + format
              + ", data input channels = "
              + input.size(dim)
              + ", "
              + getTypedLayerConfiguration().getConvFormat().dimensionNames()
              + "="
              + Arrays.toString(input.shape())
              + "; expected"
              + " input channels = "
              + inDepth
              + ") "
              + layerId();
      int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3;
@ -312,24 +415,26 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
        s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG;
      }
      throw new DL4JInvalidInputException(s);
    }
  }
  /**
-     * PreOutput method that also returns the im2col2d array (if being called for backprop), as this can be re-used
+   * PreOutput method that also returns the im2col2d array (if being called for backprop), as this
-     * instead of being calculated again.
+   * can be re-used instead of being calculated again.
   *
   * @param training Train or test time (impacts dropout)
-     * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return null for second
+   * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return
-     *                    pair entry. Note that it may still be null in the case of CuDNN and the like.
+   *     null for second pair entry. Note that it may still be null in the case of CuDNN and the
   *     like.
   * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array
   */
-    protected Pair<INDArray, INDArray> preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
+  protected Pair<INDArray, INDArray> preOutput(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(false);
    INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr);
-        INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr);
+    INDArray weights =
        getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr);
    validateInputRank();
@ -347,17 +452,15 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
    long kH = weights.size(2);
    long kW = weights.size(3);
    int[] dilation = getTypedLayerConfiguration().getDilation();
    int[] kernel = getTypedLayerConfiguration().getKernelSize();
    int[] strides = getTypedLayerConfiguration().getStride();
    int[] pad;
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
-            outSize = Convolution2DUtils.getOutputSize(
+      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
@ -379,15 +482,13 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
      }
      else
           throw new IllegalStateException("No data format configured!");*/
-            pad = Convolution2DUtils.getSameModeTopLeftPadding(
+      pad =
-                    outSize,
+          Convolution2DUtils.getSameModeTopLeftPadding(
-                    inWidthHeight,
+              outSize, inWidthHeight, kernel, strides, dilation);
                    kernel,
                    strides,
                    dilation);
    } else {
      pad = getTypedLayerConfiguration().getPadding();
-            outSize = Convolution2DUtils.getOutputSize(
+      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
@ -400,8 +501,8 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
    int outH = outSize[0];
    int outW = outSize[1];
-
+    if (helper != null
-        if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
+        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
      if (preOutput != null && forBackprop) {
        return new Pair<>(preOutput, null);
      }
@ -418,8 +519,20 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
      INDArray ret = null;
      try {
-                ret = helper.preOutput(inputOrig, weights, bias, kernel, strides, pad, getTypedLayerConfiguration().getCudnnAlgoMode(),
+        ret =
-                        getTypedLayerConfiguration().getCudnnFwdAlgo(), convolutionMode, dilation, getTypedLayerConfiguration().getConvFormat(), workspaceMgr);
+            helper.preOutput(
                inputOrig,
                weights,
                bias,
                kernel,
                strides,
                pad,
                getTypedLayerConfiguration().getCudnnAlgoMode(),
                getTypedLayerConfiguration().getCudnnFwdAlgo(),
                convolutionMode,
                dilation,
                getTypedLayerConfiguration().getConvFormat(),
                workspaceMgr);
      } catch (ND4JOpProfilerException e) {
        throw e; // NaN panic etc for debugging
      } catch (Exception e) {
@ -436,7 +549,9 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
          }
        } else {
-                    throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", e);
+          throw new RuntimeException(
              "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
              e);
        }
      }
      if (ret != null) {
@ -448,49 +563,67 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
      return new Pair<>(preOutput, i2d);
    }
-        //im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
+    // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input
-        //To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that
+    // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
    // To get this: create an array of the order we want, permute it to the order required by im2col
    // implementation, and then do im2col on that
    // to get old order from required order: permute(0,3,4,5,1,2)
-        //Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through the rows post-reshape
+    // Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through
-        INDArray col = Nd4j.createUninitialized(weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
+    // the rows post-reshape
    INDArray col =
        Nd4j.createUninitialized(
            weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
    int[] permute = new int[] {0, 3, 4, 5, 1, 2};
    INDArray col2 = col.permute(permute);
    INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float
-        if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE)
+    if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException();
            throw new ND4JArraySizeException();
    Convolution.im2col(
        im2ColIn,
        (int) kH,
        (int) kW,
-                strides[0], strides[1],
+        strides[0],
-                pad[0], pad[1],
+        strides[1],
-                dilation[0], dilation[1],
+        pad[0],
        pad[1],
        dilation[0],
        dilation[1],
        convolutionMode == ConvolutionMode.Same,
        col2);
-
+    INDArray im2col2d =
-        INDArray im2col2d = Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false);
+        Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false);
    // Current order of weights: [depthOut,depthIn,kH,kW], c order
    // Permute to give [kW,kH,depthIn,depthOut], f order
-        //Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless weights aren't in c order for some reason
+    // Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless
    // weights aren't in c order for some reason
    INDArray permutedW = weights.permute(3, 2, 1, 0);
    INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth);
    // Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut]
-        INDArray z = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, weights.dataType(), new long[]{im2col2d.size(0), reshapedW.size(1)}, 'f');
+    INDArray z =
        workspaceMgr.createUninitialized(
            ArrayType.ACTIVATIONS,
            weights.dataType(),
            new long[] {im2col2d.size(0), reshapedW.size(1)},
            'f');
    im2col2d.mmuli(reshapedW, z);
-        //Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is [miniBatch*outH*outW,depthOut] -> addiRowVector
+    // Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is
    // [miniBatch*outH*outW,depthOut] -> addiRowVector
    if (getTypedLayerConfiguration().hasBias()) {
      z.addiRowVector(bias);
    }
-        //Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order: [miniBatch,outDepth,outH,outW];
+    // Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order:
    // [miniBatch,outDepth,outH,outW];
    z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true);
    z = z.permute(2, 3, 1, 0);
-        if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
+    if (training
        && cacheMode != CacheMode.NONE
        && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
        && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
      try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
        i2d = im2col2d.unsafeDuplication();
      }
@ -507,18 +640,21 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
  @Override
  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
    if (input == null) {
-            throw new IllegalArgumentException("Cannot perform forward pass with null input " + layerId());
+      throw new IllegalArgumentException(
          "Cannot perform forward pass with null input " + layerId());
    }
-        if (cacheMode == null)
+    if (cacheMode == null) cacheMode = CacheMode.NONE;
            cacheMode = CacheMode.NONE;
    applyDropOutIfNecessary(training, workspaceMgr);
    INDArray z = preOutput(training, false, workspaceMgr).getFirst();
    // we do cache only if cache workspace exists. Skip otherwise
-        if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
+    if (training
        && cacheMode != CacheMode.NONE
        && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
        && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
      try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
        preOutput = z.unsafeDuplication();
      }
@ -527,7 +663,9 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
    // String afn = conf.getLayer().getActivationFunction();
    IActivation afn = getTypedLayerConfiguration().getActivationFn();
-        if (helper != null && Shape.strideDescendingCAscendingF(z) && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
+    if (helper != null
        && Shape.strideDescendingCAscendingF(z)
        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
      INDArray ret = null;
      try {
        ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training);
@ -547,7 +685,9 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
          }
        } else {
-                    throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", e);
+          throw new RuntimeException(
              "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
              e);
        }
      }
@ -587,15 +727,22 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
  }
  @Override
-    public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
+  public Pair<INDArray, MaskState> feedForwardMaskArray(
      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
    if (maskArray == null) {
-            //For same mode (with stride 1): output activations size is always same size as input activations size -> mask array is same size
+      // For same mode (with stride 1): output activations size is always same size as input
      // activations size -> mask array is same size
      return new Pair<>(maskArray, currentMaskState);
    }
-        INDArray outMask = Convolution2DUtils.cnn2dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize(), getTypedLayerConfiguration().getStride(),
+    INDArray outMask =
-                getTypedLayerConfiguration().getPadding(), getTypedLayerConfiguration().getDilation(), getTypedLayerConfiguration().getConvolutionMode());
+        Convolution2DUtils.cnn2dMaskReduction(
            maskArray,
            getTypedLayerConfiguration().getKernelSize(),
            getTypedLayerConfiguration().getStride(),
            getTypedLayerConfiguration().getPadding(),
            getTypedLayerConfiguration().getDilation(),
            getTypedLayerConfiguration().getConvolutionMode());
    return new Pair<>(outMask, currentMaskState);
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionNewLayer.java
@ -0,0 +1,753 @@
 /*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */
 package org.deeplearning4j.nn.layers.convolution;
 import java.util.Arrays;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.common.config.DL4JClassLoading;
 import org.deeplearning4j.exception.DL4JInvalidInputException;
 import org.deeplearning4j.nn.api.MaskState;
 import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.CacheMode;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
 import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
 import org.deeplearning4j.nn.layers.BaseLayer;
 import org.deeplearning4j.nn.layers.LayerHelper;
 import org.deeplearning4j.nn.layers.mkldnn.MKLDNNConvHelper;
 import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
 import org.deeplearning4j.nn.workspace.ArrayType;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.deeplearning4j.util.Convolution2DUtils;
 import org.nd4j.common.primitives.Pair;
 import org.nd4j.linalg.activations.IActivation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.memory.MemoryWorkspace;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.shape.Shape;
 import org.nd4j.linalg.convolution.Convolution;
 import org.nd4j.linalg.exception.ND4JArraySizeException;
 import org.nd4j.linalg.exception.ND4JOpProfilerException;
 import org.nd4j.linalg.factory.Nd4j;
@Slf4j
 public class ConvolutionNewLayer<
        LayerConf_T extends org.deeplearning4j.nn.conf.layers.Convolution1DNew>
    extends BaseLayer<org.deeplearning4j.nn.conf.layers.Convolution1DNew> {
  protected INDArray i2d;
  protected ConvolutionHelper helper = null;
  protected int helperCountFail = 0;
  protected ConvolutionMode convolutionMode;
  protected transient INDArray dummyBias; // Used only when: hasBias == false AND helpers are used
  protected transient INDArray dummyBiasGrad; // As above
  public ConvolutionNewLayer(LayerConfiguration conf, DataType dataType) {
    super(conf, dataType);
    initializeHelper();
    if (conf instanceof Convolution1DNew) {
      convolutionMode = ((Convolution1DNew) conf).getConvolutionMode();
    } else if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) {
      convolutionMode =
          ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode();
    }
  }
  void initializeHelper() {
    String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend");
    if ("CUDA".equalsIgnoreCase(backend)) {
      helper =
          DL4JClassLoading.createNewInstance(
              "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper",
              ConvolutionHelper.class,
              dataType);
      log.debug("CudnnConvolutionHelper successfully initialized");
      if (!helper.checkSupported()) {
        helper = null;
      }
    } else if ("CPU".equalsIgnoreCase(backend)) {
      helper = new MKLDNNConvHelper(dataType);
      log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName());
    }
    if (helper != null && !helper.checkSupported()) {
      log.debug("Removed helper {} as not supported", helper.getClass());
      helper = null;
    }
  }
  @Override
  public Type type() {
    return Type.CONVOLUTIONAL;
  }
 /**
 *
   * @return
 */
  @Override
  public Convolution1DNew getTypedLayerConfiguration() {
    return super.getTypedLayerConfiguration();
  }
  @Override
  public Pair<Gradient, INDArray> backpropGradient(
      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    INDArray weights =
        getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr);
    INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr);
    INDArray input = this.input.castTo(dataType); // No op if correct type
    if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType);
    INDArray origInput = input;
    INDArray origEps = epsilon;
    if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) {
      input = input.permute(0, 3, 1, 2); // NHWC to NCHW
      epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW
    }
    long miniBatch = input.size(0);
    int inH = (int) input.size(2);
    int inW = (int) input.size(3);
    long outDepth = weights.size(0);
    long inDepth = weights.size(1);
    int kH = (int) weights.size(2);
    int kW = (int) weights.size(3);
    int[] dilation = getTypedLayerConfiguration().getDilation();
    int[] kernel = getTypedLayerConfiguration().getKernelSize();
    int[] strides = getTypedLayerConfiguration().getStride();
    int[] pad;
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
              null,
              convolutionMode,
              dilation,
              CNN2DFormat.NCHW); // Also performs validation
      pad =
          Convolution2DUtils.getSameModeTopLeftPadding(
              outSize, new int[] {inH, inW}, kernel, strides, dilation);
    } else {
      pad = getTypedLayerConfiguration().getPadding();
      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
              pad,
              convolutionMode,
              dilation,
              CNN2DFormat.NCHW); // Also performs validation
    }
    int outH = outSize[0];
    int outW = outSize[1];
    INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
    INDArray weightGradView =
        gradientViews.get(
            ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW]
    INDArray weightGradView2df =
        Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false)
            .transpose();
    INDArray delta;
    IActivation afn = getTypedLayerConfiguration().getActivationFn();
    Pair<INDArray, INDArray> p = preOutput4d(true, true, workspaceMgr);
    INDArray z = p.getFirst();
    CNN2DFormat f = getTypedLayerConfiguration().getConvFormat();
    if (f != CNN2DFormat.NCHW) {
      z = z.permute(0, 3, 1, 2); // NHWC to NCHW
    }
    delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params
    if (helper != null
        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
      INDArray helperDelta = delta;
      if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC)
        helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC
      if (!hasBias() && !(helper instanceof MKLDNNConvHelper)) {
        // MKL-DNN supports no bias, CuDNN doesn't
        if (dummyBiasGrad == null) {
          try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
            dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut());
          }
        }
        biasGradView = dummyBiasGrad;
      }
      Pair<Gradient, INDArray> ret = null;
      try {
        ret =
            helper.backpropGradient(
                origInput,
                weights,
                bias,
                helperDelta,
                kernel,
                strides,
                pad,
                biasGradView,
                weightGradView,
                afn,
                getTypedLayerConfiguration().getCudnnAlgoMode(),
                getTypedLayerConfiguration().getCudnnBwdFilterAlgo(),
                getTypedLayerConfiguration().getCudnnBwdDataAlgo(),
                convolutionMode,
                dilation,
                getTypedLayerConfiguration().getConvFormat(),
                workspaceMgr);
      } catch (ND4JOpProfilerException e) {
        throw e; // NaN panic etc for debugging
      } catch (Exception e) {
        if (e.getMessage().contains("Failed to allocate")) {
          // This is a memory exception - don't fallback to built-in implementation
          throw e;
        }
        if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
          helperCountFail++;
          if (helper instanceof MKLDNNConvHelper) {
            log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
          } else {
            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
          }
        } else {
          throw new RuntimeException(
              "Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false",
              e);
        }
      }
      if (ret != null) {
        // Backprop dropout, if present
        INDArray gradPostDropout = ret.getRight();
        gradPostDropout = backpropDropOutIfPresent(gradPostDropout);
        ret.setSecond(gradPostDropout);
        return ret;
      }
    }
    delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW]
    // Note: due to the permute in preOut, and the fact that we essentially do a
    // preOut.muli(epsilon), this reshape
    // should be zero-copy; only possible exception being sometimes with the "identity" activation
    // case
    INDArray delta2d =
        delta.reshape('c', outDepth, miniBatch * outH * outW); // Shape.newShapeNoCopy(delta,new
    // int[]{outDepth,miniBatch*outH*outW},false);
    // Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input
    // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
    // To get this: create an array of the order we want, permute it to the order required by im2col
    // implementation, and then do im2col on that
    // to get old order from required order: permute(0,3,4,5,1,2)
    INDArray im2col2d =
        p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not
    if (im2col2d == null) {
      INDArray col =
          Nd4j.createUninitialized(
              dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
      INDArray col2 = col.permute(0, 3, 4, 5, 1, 2);
      Convolution.im2col(
          input,
          kH,
          kW,
          strides[0],
          strides[1],
          pad[0],
          pad[1],
          dilation[0],
          dilation[1],
          convolutionMode == ConvolutionMode.Same,
          col2);
      // Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape
      im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW);
    }
    // Calculate weight gradients, using cc->c mmul.
    // weightGradView2df is f order, but this is because it's transposed from c order
    // Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c
    // order, not usual f order
    Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0);
    // Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally
    // in c order for some reason)
    INDArray wPermuted =
        weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order
    INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth);
    // Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format
    // before col2im reduction)
    // Note: cc -> f mmul here, then reshape to 6d in f order
    INDArray epsNext2d =
        w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array?
    INDArray eps6d =
        Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true);
    // Calculate epsilonNext by doing im2col reduction.
    // Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW]
    // currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first
    eps6d = eps6d.permute(5, 2, 1, 0, 4, 3);
    INDArray epsNextOrig =
        workspaceMgr.createUninitialized(
            ArrayType.ACTIVATION_GRAD,
            eps6d.dataType(),
            new long[] {inDepth, miniBatch, inH, inW},
            'c');
    // Note: we are execute col2im in a way that the output array should be used in a stride 1 muli
    // in the layer below... (same strides as zs/activations)
    INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3);
    Convolution.col2im(
        eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]);
    Gradient retGradient = new DefaultGradient();
    if (getTypedLayerConfiguration().hasBias()) {
      delta2d.sum(biasGradView, 1); // biasGradView is initialized/zeroed first in sum op
      retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView);
    }
    retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c');
    weightNoiseParams.clear();
    epsNext = backpropDropOutIfPresent(epsNext);
    if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) {
      epsNext = epsNext.permute(0, 2, 3, 1); // NCHW to NHWC
    }
    return new Pair<>(retGradient, epsNext);
  }
  /**
   * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain
   * their standard non-4d preOutput method, while overriding this to return 4d activations (for use
   * in backprop) without modifying the public API
   */
  protected Pair<INDArray, INDArray> preOutput4d(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    return preOutput(training, forBackprop, workspaceMgr);
  }
  protected void validateInputRank() {
    // Input validation: expect rank 4 matrix
    if (input.rank() != 4) {
      String layerName = layerConfiguration.getName();
      if (layerName == null) layerName = "(not named)";
      throw new DL4JInvalidInputException(
          "Got rank "
              + input.rank()
              + " array as input to ConvolutionLayer (layer name = "
              + layerName
              + ", layer index = "
              + index
              + ") with shape "
              + Arrays.toString(input.shape())
              + ". "
              + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]."
              + (input.rank() == 2
                  ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)"
                  : "")
              + " "
              + layerId());
    }
  }
  protected void validateInputDepth(long inDepth) {
    CNN2DFormat format = getTypedLayerConfiguration().getConvFormat();
    int dim = format == CNN2DFormat.NHWC ? 3 : 1;
    if (input.size(dim) != inDepth) {
      String layerName = layerConfiguration.getName();
      if (layerName == null) layerName = "(not named)";
      String s =
          "Cannot do forward pass in Convolution layer (layer name = "
              + layerName
              + ", layer index = "
              + index
              + "): input array channels does not match CNN layer configuration"
              + " (data format = "
              + format
              + ", data input channels = "
              + input.size(dim)
              + ", "
              + getTypedLayerConfiguration().getConvFormat().dimensionNames()
              + "="
              + Arrays.toString(input.shape())
              + "; expected"
              + " input channels = "
              + inDepth
              + ") "
              + layerId();
      int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3;
      if (input.size(dimIfWrongFormat) == inDepth) {
        // User might have passed NCHW data to a NHWC net, or vice versa?
        s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG;
      }
      throw new DL4JInvalidInputException(s);
    }
  }
  /**
   * PreOutput method that also returns the im2col2d array (if being called for backprop), as this
   * can be re-used instead of being calculated again.
   *
   * @param training Train or test time (impacts dropout)
   * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return
   *     null for second pair entry. Note that it may still be null in the case of CuDNN and the
   *     like.
   * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array
   */
  protected Pair<INDArray, INDArray> preOutput(
      boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(false);
    INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr);
    INDArray weights =
        getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr);
    validateInputRank();
    INDArray input = this.input.castTo(dataType);
    INDArray inputOrig = input;
    if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
      input = input.permute(0, 3, 1, 2).dup(); // NHWC to NCHW
    }
    long miniBatch = input.size(0);
    long outDepth = weights.size(0);
    long inDepth = weights.size(1);
    validateInputDepth(inDepth);
    long kH = weights.size(2);
    long kW = weights.size(3);
    int[] dilation = getTypedLayerConfiguration().getDilation();
    int[] kernel = getTypedLayerConfiguration().getKernelSize();
    int[] strides = getTypedLayerConfiguration().getStride();
    int[] pad;
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
              null,
              convolutionMode,
              dilation,
              CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method
      if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE)
        throw new ND4JArraySizeException();
      int[] inWidthHeight;
      //  if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW)
      // TODO: Switch hardcoded state later. For now, convolution is implemented as
      // switch to NCHW then permute back for NWHC
      inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)};
      /*     else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) {
          inWidthHeight =  new int[] {(int) input.size(1), (int) input.size(2)};
      }
      else
           throw new IllegalStateException("No data format configured!");*/
      pad =
          Convolution2DUtils.getSameModeTopLeftPadding(
              outSize, inWidthHeight, kernel, strides, dilation);
    } else {
      pad = getTypedLayerConfiguration().getPadding();
      outSize =
          Convolution2DUtils.getOutputSize(
              input,
              kernel,
              strides,
              pad,
              convolutionMode,
              dilation,
              CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method
    }
    int outH = outSize[0];
    int outW = outSize[1];
    if (helper != null
        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
      if (preOutput != null && forBackprop) {
        return new Pair<>(preOutput, null);
      }
      // For no-bias convolutional layers: use an empty (all 0s) value for biases
      if (!hasBias()) {
        if (dummyBias == null) {
          try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
            dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut());
          }
        }
        bias = dummyBias;
      }
      INDArray ret = null;
      try {
        ret =
            helper.preOutput(
                inputOrig,
                weights,
                bias,
                kernel,
                strides,
                pad,
                getTypedLayerConfiguration().getCudnnAlgoMode(),
                getTypedLayerConfiguration().getCudnnFwdAlgo(),
                convolutionMode,
                dilation,
                getTypedLayerConfiguration().getConvFormat(),
                workspaceMgr);
      } catch (ND4JOpProfilerException e) {
        throw e; // NaN panic etc for debugging
      } catch (Exception e) {
        if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
          // This is a memory exception - don't fallback to built-in implementation
          throw e;
        }
        if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
          helperCountFail++;
          if (helper instanceof MKLDNNConvHelper) {
            log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
          } else {
            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
          }
        } else {
          throw new RuntimeException(
              "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
              e);
        }
      }
      if (ret != null) {
        return new Pair<>(ret, null);
      }
    }
    if (preOutput != null && i2d != null && forBackprop) {
      return new Pair<>(preOutput, i2d);
    }
    // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input
    // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
    // To get this: create an array of the order we want, permute it to the order required by im2col
    // implementation, and then do im2col on that
    // to get old order from required order: permute(0,3,4,5,1,2)
    // Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through
    // the rows post-reshape
    INDArray col =
        Nd4j.createUninitialized(
            weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
    int[] permute = new int[] {0, 3, 4, 5, 1, 2};
    INDArray col2 = col.permute(permute);
    INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float
    if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException();
    Convolution.im2col(
        im2ColIn,
        (int) kH,
        (int) kW,
        strides[0],
        strides[1],
        pad[0],
        pad[1],
        dilation[0],
        dilation[1],
        convolutionMode == ConvolutionMode.Same,
        col2);
    INDArray im2col2d =
        Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false);
    // Current order of weights: [depthOut,depthIn,kH,kW], c order
    // Permute to give [kW,kH,depthIn,depthOut], f order
    // Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless
    // weights aren't in c order for some reason
    INDArray permutedW = weights.permute(3, 2, 1, 0);
    INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth);
    // Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut]
    INDArray z =
        workspaceMgr.createUninitialized(
            ArrayType.ACTIVATIONS,
            weights.dataType(),
            new long[] {im2col2d.size(0), reshapedW.size(1)},
            'f');
    im2col2d.mmuli(reshapedW, z);
    // Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is
    // [miniBatch*outH*outW,depthOut] -> addiRowVector
    if (getTypedLayerConfiguration().hasBias()) {
      z.addiRowVector(bias);
    }
    // Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order:
    // [miniBatch,outDepth,outH,outW];
    z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true);
    z = z.permute(2, 3, 1, 0);
    if (training
        && cacheMode != CacheMode.NONE
        && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
        && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
      try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
        i2d = im2col2d.unsafeDuplication();
      }
    }
    if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
      z = z.permute(0, 2, 3, 1); // NCHW to NHWC
      z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z);
    }
    return new Pair<>(z, forBackprop ? im2col2d : null);
  }
  @Override
  public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
    if (input == null) {
      throw new IllegalArgumentException(
          "Cannot perform forward pass with null input " + layerId());
    }
    if (cacheMode == null) cacheMode = CacheMode.NONE;
    applyDropOutIfNecessary(training, workspaceMgr);
    INDArray z = preOutput(training, false, workspaceMgr).getFirst();
    // we do cache only if cache workspace exists. Skip otherwise
    if (training
        && cacheMode != CacheMode.NONE
        && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
        && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
      try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
        preOutput = z.unsafeDuplication();
      }
    }
    // String afn = conf.getLayer().getActivationFunction();
    IActivation afn = getTypedLayerConfiguration().getActivationFn();
    if (helper != null
        && Shape.strideDescendingCAscendingF(z)
        && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
      INDArray ret = null;
      try {
        ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training);
      } catch (ND4JOpProfilerException e) {
        throw e; // NaN panic etc for debugging
      } catch (Exception e) {
        if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
          // This is a memory exception - don't fallback to built-in implementation
          throw e;
        }
        if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
          helperCountFail++;
          if (helper instanceof MKLDNNConvHelper) {
            log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
          } else {
            log.warn("CuDNN execution failed - falling back on built-in implementation", e);
          }
        } else {
          throw new RuntimeException(
              "Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
              e);
        }
      }
      if (ret != null) {
        return ret;
      }
    }
    INDArray activation = afn.getActivation(z, training);
    return activation;
  }
  @Override
  public boolean hasBias() {
    return getTypedLayerConfiguration().hasBias();
  }
  @Override
  public boolean isPretrainLayer() {
    return false;
  }
  @Override
  public LayerHelper getHelper() {
    return helper;
  }
  @Override
  public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) {
    throw new UnsupportedOperationException("Not supported");
  }
  @Override
  public void setParamsTable(INDArray paramsTable) {
    // Override, as base layer does f order parameter flattening by default
    setParams(paramsTable, 'c');
  }
  @Override
  public Pair<INDArray, MaskState> feedForwardMaskArray(
      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
    if (maskArray == null) {
      // For same mode (with stride 1): output activations size is always same size as input
      // activations size -> mask array is same size
      return new Pair<>(maskArray, currentMaskState);
    }
    INDArray outMask =
        Convolution2DUtils.cnn2dMaskReduction(
            maskArray,
            getTypedLayerConfiguration().getKernelSize(),
            getTypedLayerConfiguration().getStride(),
            getTypedLayerConfiguration().getPadding(),
            getTypedLayerConfiguration().getDilation(),
            getTypedLayerConfiguration().getConvolutionMode());
    return new Pair<>(outMask, currentMaskState);
  }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/feedforward/dense/DenseLayer.java
@ -47,7 +47,7 @@ public class DenseLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Dens
  @Override
  public boolean hasBias() {
-    return super.hasBias();
+    return getTypedLayerConfiguration().isHasBias();
  }
  @Override
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/RnnOutputLayer.java
@ -37,27 +37,41 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.lossfunctions.ILossFunction;
-public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.layers.RnnOutputLayer> {
+public class RnnOutputLayer
    extends BaseOutputLayer<org.deeplearning4j.nn.conf.layers.RnnOutputLayer> {
  public RnnOutputLayer(LayerConfiguration conf, DataType dataType) {
    super(conf, dataType);
  }
  @Override
-    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
+  public Pair<Gradient, INDArray> backpropGradient(
      INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (input.rank() != 3) {
      throw new UnsupportedOperationException(
-                    "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." +
+          "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]."
-                            " Got input with rank " + input.rank() + " and shape " + Arrays.toString(input.shape()) + " - " + layerId());
+              + " Got input with rank "
              + input.rank()
              + " and shape "
              + Arrays.toString(input.shape())
              + " - "
              + layerId());
    }
    RNNFormat format = getTypedLayerConfiguration().getDataFormat();
-        int td = (format == RNNFormat.NCW) ? 2 : 1;
+    int td = (format == RNNFormat.NCW) ? 2 : 1; //either NCW or NWC
-        Preconditions.checkState(labels.rank() == 3, "Expected rank 3 labels array, got label array with shape %ndShape", labels);
+    Preconditions.checkState(
-        Preconditions.checkState(input.size(td) == labels.size(td), "Sequence lengths do not match for RnnOutputLayer input and labels:" +
+        labels.rank() == 3,
-                "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape", input, labels);
+        "Expected rank 3 labels array, got label array with shape %ndShape",
-
+        labels);
    Preconditions.checkState(
        input.size(td) == labels.size(td),
        "Sequence lengths do not match for RnnOutputLayer input and labels:"
            + "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - "
            + "mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape.\n",
        input, "\n\n",
        labels);
    INDArray inputTemp = input;
    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
@ -66,13 +80,19 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
    this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM);
-        applyDropOutIfNecessary(true, workspaceMgr);    //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients
+    applyDropOutIfNecessary(
        true,
        workspaceMgr); // Edge case: we skip OutputLayer forward pass during training as this isn't
    // required to calculate gradients
-        Pair<Gradient, INDArray> gradAndEpsilonNext = super.backpropGradient(epsilon, workspaceMgr);    //Also applies dropout
+    Pair<Gradient, INDArray> gradAndEpsilonNext =
        super.backpropGradient(epsilon, workspaceMgr); // Also applies dropout
    this.input = inputTemp;
    INDArray epsilon2d = gradAndEpsilonNext.getSecond();
-        INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
+    INDArray epsilon3d =
        TimeSeriesUtils.reshape2dTo3d(
            epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
      epsilon3d = epsilon3d.permute(0, 2, 1);
    }
@ -82,14 +102,17 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
    return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d);
  }
-    /**{@inheritDoc}
+  /** {@inheritDoc} */
     */
  @Override
  public double f1Score(INDArray examples, INDArray labels) {
    if (examples.rank() == 3)
-            examples = TimeSeriesUtils.reshape3dTo2d(examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
+      examples =
          TimeSeriesUtils.reshape3dTo2d(
              examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
    if (labels.rank() == 3)
-            labels = TimeSeriesUtils.reshape3dTo2d(labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
+      labels =
          TimeSeriesUtils.reshape3dTo2d(
              labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
    return super.f1Score(examples, labels);
  }
@ -108,7 +131,10 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
    if (input.rank() == 3) {
      // Case when called from RnnOutputLayer
      INDArray inputTemp = input;
-            input = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? input.permute(0, 2, 1):input;
+      input =
          (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC)
              ? input.permute(0, 2, 1)
              : input;
      input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM);
      INDArray out = super.preOutput(training, workspaceMgr);
      this.input = inputTemp;
@ -124,7 +150,10 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
  protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
    INDArray labels = this.labels;
    if (labels.rank() == 3) {
-            labels = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? labels.permute(0, 2, 1) : labels;
+      labels =
          (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC)
              ? labels.permute(0, 2, 1)
              : labels;
      return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType);
    }
    return labels;
@ -143,9 +172,14 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
      input = input.permute(0, 2, 1);
    }
-        INDArray input2d = TimeSeriesUtils.reshape3dTo2d(input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM);
+    INDArray input2d =
        TimeSeriesUtils.reshape3dTo2d(
            input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM);
-        INDArray act2d = getTypedLayerConfiguration().getActivationFn().getActivation(input2d.mmul(W).addiRowVector(b), training);
+    INDArray act2d =
        getTypedLayerConfiguration()
            .getActivationFn()
            .getActivation(input2d.mmul(W).addiRowVector(b), training);
    if (maskArray != null) {
      if (!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())) {
        // Per output masking
@ -156,7 +190,8 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
      }
    }
-        INDArray ret = TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
+    INDArray ret =
        TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
    if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
      ret = ret.permute(0, 2, 1);
    }
@ -170,13 +205,21 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
      // (a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector)
      // (b) per output masking - rank 3 mask array  -> reshape to rank 2 (
      if (maskArray.rank() == 2) {
-                this.maskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
+        this.maskArray =
            TimeSeriesUtils.reshapeTimeSeriesMaskToVector(
                maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
      } else if (maskArray.rank() == 3) {
-                this.maskArray = TimeSeriesUtils.reshape3dTo2d(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
+        this.maskArray =
            TimeSeriesUtils.reshape3dTo2d(
                maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
      } else {
        throw new UnsupportedOperationException(
-                                "Invalid mask array: must be rank 2 or 3 (got: rank " + maskArray.rank() + ", shape = "
+            "Invalid mask array: must be rank 2 or 3 (got: rank "
-                                                + Arrays.toString(maskArray.shape()) + ") " + layerId());
+                + maskArray.rank()
                + ", shape = "
                + Arrays.toString(maskArray.shape())
                + ") "
                + layerId());
      }
    } else {
      this.maskArray = null;
@ -184,12 +227,14 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
  }
  @Override
-    public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
+  public Pair<INDArray, MaskState> feedForwardMaskArray(
-                    int minibatchSize) {
+      INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
    // If the *input* mask array is present and active, we should use it to mask the output
    if (maskArray != null && currentMaskState == MaskState.Active) {
-            this.inputMaskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
+      this.inputMaskArray =
          TimeSeriesUtils.reshapeTimeSeriesMaskToVector(
              maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
      this.inputMaskArrayState = currentMaskState;
    } else {
      this.inputMaskArray = null;
@ -199,27 +244,35 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
    return null; // Last layer in network
  }
-    /**Compute the score for each example individually, after labels and input have been set.
+  /**
   * Compute the score for each example individually, after labels and input have been set.
   *
-     * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include regularization)
+   * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include
-     * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example
+   *     regularization)
   * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith
   *     example
   */
  @Override
  public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) {
    // For RNN: need to sum up the score over each time step before returning.
    if (input == null || labels == null)
-            throw new IllegalStateException("Cannot calculate score without input and labels " + layerId());
+      throw new IllegalStateException(
          "Cannot calculate score without input and labels " + layerId());
    INDArray preOut = preOutput2d(false, workspaceMgr);
    ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction();
    INDArray scoreArray =
-                        lossFunction.computeScoreArray(getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), preOut,
+        lossFunction.computeScoreArray(
-                                getTypedLayerConfiguration().getActivationFn(), maskArray);
+            getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM),
            preOut,
            getTypedLayerConfiguration().getActivationFn(),
            maskArray);
    // scoreArray: shape [minibatch*timeSeriesLength, 1]
    // Reshape it to [minibatch, timeSeriesLength] then sum over time step
-        INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0));
+    INDArray scoreArrayTs =
        TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int) input.size(0));
    INDArray summedScores = scoreArrayTs.sum(true, 1);
    if (fullNetRegTerm != 0.0) {
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/wrapper/BaseWrapperLayer.java
@ -47,7 +47,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
 import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
@Data
-public abstract class BaseWrapperLayer extends AbstractLayer {
+public abstract class BaseWrapperLayer<LayerConf_T extends LayerConfiguration> extends AbstractLayer {
  protected Layer underlying;
@ -57,8 +57,8 @@ public abstract class BaseWrapperLayer extends AbstractLayer {
  }
  @Override
-  public BaseLayerConfiguration getTypedLayerConfiguration() {
+  public LayerConf_T getTypedLayerConfiguration() {
-    return (BaseLayerConfiguration) underlying.getLayerConfiguration();
+    return (LayerConf_T) underlying.getLayerConfiguration();
  }
  /**
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
@ -712,7 +712,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork
        if (layer_conf instanceof BaseLayerConfiguration)
          ((BaseLayerConfiguration) layer_conf).setDataType(netDtype);
-        nParamsPerLayer[i] = layer_conf.initializer().numParams(layer_conf);
+        nParamsPerLayer[i] = layer_conf.numParams();
        paramLength += nParamsPerLayer[i];
      }
      log.debug(
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/ConvolutionNewParamInitializer.java
@ -0,0 +1,183 @@
 /*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */
 package org.deeplearning4j.nn.params;
 import java.util.*;
 import lombok.val;
 import org.deeplearning4j.nn.api.AbstractParamInitializer;
 import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
 import org.deeplearning4j.nn.weights.WeightInitUtil;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.indexing.NDArrayIndex;
 public class ConvolutionNewParamInitializer extends AbstractParamInitializer {
    private static final ConvolutionNewParamInitializer INSTANCE = new ConvolutionNewParamInitializer();
    public static ConvolutionNewParamInitializer getInstance() {
        return INSTANCE;
    }
    public final static String WEIGHT_KEY = DefaultParamInitializer.WEIGHT_KEY;
    public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY;
    @Override
    public long numParams(LayerConfiguration l) {
        return l.numParams();
    }
    @Override
    public List<String> paramKeys(LayerConfiguration layer) {
        ConvolutionLayer layerConf =
                (ConvolutionLayer) layer;
        if(layerConf.hasBias()){
            return Arrays.asList(WEIGHT_KEY, BIAS_KEY);
        } else {
            return weightKeys(layer);
        }
    }
    @Override
    public List<String> weightKeys(LayerConfiguration layer) {
        return Collections.singletonList(WEIGHT_KEY);
    }
    @Override
    public List<String> biasKeys(LayerConfiguration layer) {
        ConvolutionLayer layerConf =
                (ConvolutionLayer) layer;
        if(layerConf.hasBias()){
            return Collections.singletonList(BIAS_KEY);
        } else {
            return Collections.emptyList();
        }
    }
    @Override
    public boolean isWeightParam(LayerConfiguration layer, String key) {
        return WEIGHT_KEY.equals(key);
    }
    @Override
    public boolean isBiasParam(LayerConfiguration layer, String key) {
        return BIAS_KEY.equals(key);
    }
    @Override
    public Map<String, INDArray> init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) {
        Convolution1DNew layer = (Convolution1DNew) conf;
        if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");
        Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
        Convolution1DNew layerConf =
                        (Convolution1DNew) conf;
        val nOut = layerConf.getNOut();
        if(layer.hasBias()){
            //Standard case
            INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
            INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf)));
            params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
            params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
            conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
            conf.getNetConfiguration().addNetWideVariable(BIAS_KEY);
        } else {
            INDArray weightView = paramsView;
            params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
            conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
        }
        return params;
    }
    @Override
    public Map<String, INDArray> getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) {
        Convolution1DNew layerConf =
                        (Convolution1DNew) conf;
        int[] kernel = layerConf.getKernelSize();
        val nIn = layerConf.getNIn();
        val nOut = layerConf.getNOut();
        Map<String, INDArray> out = new LinkedHashMap<>();
        if(layerConf.hasBias()){
            //Standard case
            INDArray biasGradientView = gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
            INDArray weightGradientView =
                    gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf)))
                            .reshape('c', nOut, nIn, kernel[0], kernel[1]);
            out.put(BIAS_KEY, biasGradientView);
            out.put(WEIGHT_KEY, weightGradientView);
        } else {
            INDArray weightGradientView = gradientView.reshape('c', nOut, nIn, kernel[0], kernel[1]);
            out.put(WEIGHT_KEY, weightGradientView);
        }
        return out;
    }
    //1 bias per feature map
    protected INDArray createBias(LayerConfiguration conf, INDArray biasView, boolean initializeParams) {
        //the bias is a 1D tensor -- one bias per output feature map
        Convolution1DNew layerConf =
                        (Convolution1DNew) conf;
        if (initializeParams)
            biasView.assign(layerConf.getBiasInit());
        return biasView;
    }
    protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) {
        /*
         Create a 4d weight matrix of:
           (number of kernels, num input channels, kernel height, kernel width)
         Note c order is used specifically for the CNN weights, as opposed to f order elsewhere
         Inputs to the convolution layer are:
         (batch size, num input feature maps, image height, image width)
         */
        Convolution1DNew layerConf =
                        (Convolution1DNew) conf;
        if (initializeParams) {
            int[] kernel = layerConf.getKernelSize();
            int[] stride = layerConf.getStride();
            val inputDepth = layerConf.getNIn();
            val outputDepth = layerConf.getNOut();
            double fanIn = inputDepth * kernel[0] * kernel[1];
            double fanOut = outputDepth * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]);
            val weightsShape = new long[] {outputDepth, inputDepth, kernel[0], kernel[1]};
            return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView);
        } else {
            int[] kernel = layerConf.getKernelSize();
            return WeightInitUtil.reshapeWeights(
                            new long[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]}, weightView, 'c');
        }
    }
 }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/util/Convolution1DUtils.java
@ -20,7 +20,6 @@
 package org.deeplearning4j.util;
 import java.util.Arrays;
 import org.deeplearning4j.exception.DL4JInvalidConfigException;
 import org.deeplearning4j.exception.DL4JInvalidInputException;
@ -37,47 +36,45 @@ public class Convolution1DUtils {
  private static final int ONE = 1;
  private Convolution1DUtils() {}
-    private Convolution1DUtils() {
+  public static int getOutputSize(
-    }
+      INDArray inputData, int kernel, int strides, int padding, ConvolutionMode convolutionMode) {
    public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
                                    ConvolutionMode convolutionMode) {
    return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE);
  }
  /**
-     * Returns true if the given layer has an
+   * Returns true if the given layer has an {@link RNNFormat}. This is true for: {@link
-     * {@link RNNFormat}.
+   * Convolution1D}, {@link Subsampling1DLayer} {@link SimpleRnn} {@link LSTM} {@link
-     * This is true for:
+   * EmbeddingSequenceLayer}
-     * {@link Convolution1D},
+   *
     * {@link Subsampling1DLayer}
     * {@link SimpleRnn}
     * {@link LSTM}
     * {@link EmbeddingSequenceLayer}
   * @param layer the layer to test
-     * @return true if the input layer has an rnn format
+   * @return true if the input layer has an rnn format false otherwise
     * false otherwise
   */
  public static boolean hasRnnDataFormat(LayerConfiguration layer) {
-        return layer instanceof Convolution1D ||
+    return layer instanceof Convolution1D
-                layer instanceof Convolution1D ||
+        || layer instanceof Convolution1D
-                layer instanceof Subsampling1DLayer ||
+        || layer instanceof Subsampling1DLayer
-                layer instanceof SimpleRnn ||
+        || layer instanceof SimpleRnn
-                layer instanceof LSTM ||
+        || layer instanceof LSTM
-                layer instanceof EmbeddingSequenceLayer;
+        || layer instanceof EmbeddingSequenceLayer;
  }
  /**
-     * Get the {@link RNNFormat} for the given layer.
+   * Get the {@link RNNFormat} for the given layer. Throws an {@link IllegalArgumentException} if a
-     * Throws an {@link IllegalArgumentException}
+   * layer doesn't have an rnn format
-     * if a layer doesn't have an rnn format
+   *
   * @param layer the layer to get the format for
   * @return the format for the layer
   */
  public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) {
-        Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getName() + " does not have an RNNFormat");
+    Preconditions.checkState(
        hasRnnDataFormat(layer),
        "ILayer of type "
            + layer.getClass().getName()
            + " and name "
            + layer.getName()
            + " does not have an RNNFormat");
    if (layer instanceof SimpleRnn) {
      SimpleRnn simpleRnn = (SimpleRnn) layer;
      return simpleRnn.getDataFormat();
@ -96,24 +93,24 @@ public class Convolution1DUtils {
    } else if (layer instanceof EmbeddingSequenceLayer) {
      EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer;
      return embeddingSequenceLayer.getOutputDataFormat();
-        }
+    } else {
-        else {
+      throw new IllegalArgumentException(
-            throw new IllegalArgumentException("Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName());
+          "Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName());
    }
  }
  /**
-     * Reshapes the given weight
+   * Reshapes the given weight array or weight gradient to work with the specified {@link RNNFormat}
-     * array or weight gradient
+   *
     * to work with the specified
     * {@link RNNFormat}
   * @param w the weight array or gradient
   * @param rnnFormat the {@link RNNFormat} to use
   * @return the reshaped array.
   */
  public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) {
    if (rnnFormat == RNNFormat.NWC)
-            w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2)).permute(2, 1, 0);   //[oC, iC, k, 1] to [k, iC, oC]
+      w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2))
              .permute(2, 1, 0); // [oC, iC, k, 1] to [k, iC, oC]
    else {
      w = w.reshape(w.ordering(), w.size(2), w.size(1), w.size(0));
    }
@ -121,7 +118,6 @@ public class Convolution1DUtils {
    return w;
  }
  /**
   * Get the output size (height) for the given input data and CNN1D configuration
   *
@ -133,8 +129,13 @@ public class Convolution1DUtils {
   * @param dilation Kernel dilation
   * @return Output size (width)
   */
-    public static long getOutputSize(long inH, int kernel, int strides, int padding,
+  public static long getOutputSize(
-                                    ConvolutionMode convolutionMode, int dilation) {
+      long inH,
      int kernel,
      int strides,
      int padding,
      ConvolutionMode convolutionMode,
      int dilation) {
    long eKernel = effectiveKernelSize(kernel, dilation);
    if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
      return (int) Math.ceil(inH / ((double) strides));
@ -153,10 +154,14 @@ public class Convolution1DUtils {
   * @param dilation Kernel dilation
   * @return Output size (width)
   */
-    public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
+  public static int getOutputSize(
-                                    ConvolutionMode convolutionMode, int dilation) {
+      INDArray inputData,
-        if (inputData.size(2) > Integer.MAX_VALUE)
+      int kernel,
-            throw new ND4JArraySizeException();
+      int strides,
      int padding,
      ConvolutionMode convolutionMode,
      int dilation) {
    if (inputData.size(2) > Integer.MAX_VALUE) throw new ND4JArraySizeException();
    int inH = (int) inputData.size(2);
    int eKernel = effectiveKernelSize(kernel, dilation);
    boolean atrous = (eKernel == kernel);
@ -171,8 +176,14 @@ public class Convolution1DUtils {
    return outH;
  }
-    public static void validateShapes(INDArray inputData, int eKernel, int strides, int padding,
+  public static void validateShapes(
-                                      ConvolutionMode convolutionMode, int dilation, int inShape,
+      INDArray inputData,
      int eKernel,
      int strides,
      int padding,
      ConvolutionMode convolutionMode,
      int dilation,
      int inShape,
      boolean atrous) {
    int inH = inShape;
@ -186,15 +197,21 @@ public class Convolution1DUtils {
      if (atrous) sb.append("effective ");
      sb.append("kernel height <= input height + 2 * padding height. \nGot ");
      if (atrous) sb.append("effective ");
-            sb.append("kernel height = ").append(eKernel).append(", input height = ").append(inH)
+      sb.append("kernel height = ")
-                    .append(" and padding height = ").append(padding).append(" which do not satisfy 0 < ")
+          .append(eKernel)
-                    .append(eKernel).append(" <= ").append(inH + 2 * padding)
+          .append(", input height = ")
          .append(inH)
          .append(" and padding height = ")
          .append(padding)
          .append(" which do not satisfy 0 < ")
          .append(eKernel)
          .append(" <= ")
          .append(inH + 2 * padding)
          .append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation));
      throw new DL4JInvalidInputException(sb.toString());
    }
    if (convolutionMode == ConvolutionMode.Strict) {
      if ((inH - eKernel + 2 * padding) % strides != 0) {
        double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0;
@ -202,27 +219,41 @@ public class Convolution1DUtils {
        int truncated = (int) d;
        int sameSize = (int) Math.ceil(inH / ((double) strides));
-                String sb = "Invalid input data or configuration: Combination of kernel size, " +
+        String sb =
-                        "stride and padding are not " +
+            "Invalid input data or configuration: Combination of kernel size, "
-                        "valid for given input height, using ConvolutionMode.Strict\n" +
+                + "stride and padding are not "
-                        "ConvolutionMode.Strict requires: output height = (input height - kernelSize + " +
+                + "valid for given input height, using ConvolutionMode.Strict\n"
-                        "2*padding)/stride + 1 to be an integer. Got: (" +
+                + "ConvolutionMode.Strict requires: output height = (input height - kernelSize + "
-                        inH + " - " + eKernel + " + 2*" + padding + ")/" +
+                + "2*padding)/stride + 1 to be an integer. Got: ("
-                        strides + " + 1 = " +
+                + inH
-                        str + "\n" + "See \"Constraints on strides\" at http://cs231n.github." +
+                + " - "
-                        "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n" +
+                + eKernel
-                        "To truncate/crop the input, such that output height = floor(" +
+                + " + 2*"
-                        str + ") = " +
+                + padding
-                        truncated + ", use ConvolutionType.Truncate.\n" +
+                + ")/"
-                        "Alternatively use ConvolutionType.Same, which will use padding to give an " +
+                + strides
-                        "output height of ceil(" +
+                + " + 1 = "
-                        inH + "/" + strides + ")=" + sameSize +
+                + str
-                        getCommonErrorMsg(inputData, eKernel, strides, padding, dilation);
+                + "\n"
                + "See \"Constraints on strides\" at http://cs231n.github."
                + "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n"
                + "To truncate/crop the input, such that output height = floor("
                + str
                + ") = "
                + truncated
                + ", use ConvolutionType.Truncate.\n"
                + "Alternatively use ConvolutionType.Same, which will use padding to give an "
                + "output height of ceil("
                + inH
                + "/"
                + strides
                + ")="
                + sameSize
                + getCommonErrorMsg(inputData, eKernel, strides, padding, dilation);
        throw new DL4JInvalidConfigException(sb);
      }
    }
  }
  public static int effectiveKernelSize(int kernel, int dilation) {
@ -235,9 +266,13 @@ public class Convolution1DUtils {
    }
  }
-    private static String getCommonErrorMsg(INDArray inputData, int kernel, int strides, int padding, int dilation) {
+  private static String getCommonErrorMsg(
-        String s = "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]=" + Arrays.toString(inputData.shape())
+      INDArray inputData, int kernel, int strides, int padding, int dilation) {
-                + ", inputKernel=" + kernel;
+    String s =
        "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]="
            + Arrays.toString(inputData.shape())
            + ", inputKernel="
            + kernel;
    if (dilation != 1) {
      int effectiveKernel = effectiveKernelSize(kernel, dilation);
      s += ", effectiveKernelGivenDilation=" + effectiveKernel;
@ -245,16 +280,13 @@ public class Convolution1DUtils {
    return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation;
  }
-
+  /** Check that the convolution mode is consistent with the padding specification */
    /**
     * Check that the convolution mode is consistent with the padding specification
     */
  public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) {
    if (mode == ConvolutionMode.Same) {
      boolean nullPadding = padding == 0;
      if (!nullPadding)
-                throw new IllegalArgumentException("Padding cannot be used when using the `same' convolution mode");
+        throw new IllegalArgumentException(
-
+            "Padding cannot be used when using the `same' convolution mode");
    }
  }
@ -268,30 +300,48 @@ public class Convolution1DUtils {
   * @param dilation Dilation (length 2 array, height dimension first)
   * @return Top left padding (length 2 array, height dimension first)
   */
-    public static int getSameModeTopLeftPadding(int outSize, int inSize, int kernel, int strides, int dilation) {
+  public static int getSameModeTopLeftPadding(
      int outSize, int inSize, int kernel, int strides, int dilation) {
    int eKernel = effectiveKernelSize(kernel, dilation);
    // Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2
    int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2;
-        Preconditions.checkState(outPad >= 0, "Invalid padding values calculated: %s - " +
+    Preconditions.checkState(
-                        "layer configuration is invalid? Input size %s, output size %s, kernel %s, " +
+        outPad >= 0,
-                        "strides %s, dilation %s", outPad, inSize, outSize, kernel, strides, dilation);
+        "Invalid padding values calculated: %s - "
            + "layer configuration is invalid? Input size %s, output size %s, kernel %s, "
            + "strides %s, dilation %s",
        outPad,
        inSize,
        outSize,
        kernel,
        strides,
        dilation);
    return outPad;
  }
-    public static int getSameModeBottomRightPadding(int outSize, int inSize, int kernel, int strides, int dilation) {
+  public static int getSameModeBottomRightPadding(
      int outSize, int inSize, int kernel, int strides, int dilation) {
    int eKernel = effectiveKernelSize(kernel, dilation);
    int totalPad = ((outSize - 1) * strides + eKernel - inSize);
    int tlPad = totalPad / 2;
    int brPad = totalPad - tlPad;
-        Preconditions.checkState(brPad >= 0, "Invalid padding values (right) calculated: %s - " +
+    Preconditions.checkState(
-                "layer configuration is invalid? Input size %s, output size %s, kernel %s, " +
+        brPad >= 0,
-                "strides %s, dilation %s", brPad, inSize, outSize, kernel, strides, dilation);
+        "Invalid padding values (right) calculated: %s - "
            + "layer configuration is invalid? Input size %s, output size %s, kernel %s, "
            + "strides %s, dilation %s",
        brPad,
        inSize,
        outSize,
        kernel,
        strides,
        dilation);
    return brPad;
  }
  /**
-     * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for kernel size and
+   * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for
-     * stride, and values >= 0 for padding.
+   * kernel size and stride, and values >= 0 for padding.
   *
   * @param kernel Kernel size to check
   * @param stride Stride to check
@ -300,16 +350,16 @@ public class Convolution1DUtils {
  public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) {
    if (kernel <= 0) {
-            throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + kernel);
+      throw new IllegalStateException(
          "Invalid kernel size: value must be positive (> 0). Got: " + kernel);
    }
    if (stride <= 0) {
-            throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + stride);
+      throw new IllegalStateException(
-
+          "Invalid kernel size: value must be positive (> 0). Got: " + stride);
    }
    if (padding < 0) {
-            throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + padding);
+      throw new IllegalStateException(
          "Invalid kernel size: value must be positive (> 0). Got: " + padding);
    }
  }
 }