Merge pull request #8892 from KonduitAI/master

Merge latest development work
2020-04-28 20:38:56 +10:00 · 2020-04-28 20:38:56 +10:00 · 1930d99908
commit 1930d99908
parent ff60b96cf7 dad6bc5ed2
475 changed files with 10073 additions and 2958 deletions
--- a/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/computationgraph/TestGraphLocalExecution.java
+++ b/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/computationgraph/TestGraphLocalExecution.java
@ -98,7 +98,7 @@ public class TestGraphLocalExecution extends BaseDL4JTest {

    @Override
    public long getTimeoutMilliseconds() {
-        return 90000L;
+        return 120_000L;
    }

    @Test
@ -156,7 +156,7 @@ public class TestGraphLocalExecution extends BaseDL4JTest {
                    .dataSource(ds, dsP)
                    .modelSaver(new FileModelSaver(modelSave))
                    .scoreFunction(new TestSetLossScoreFunction())
-                    .terminationConditions(new MaxTimeCondition(5, TimeUnit.SECONDS),
+                    .terminationConditions(new MaxTimeCondition(20, TimeUnit.SECONDS),
                            new MaxCandidatesCondition(3))
                    .build();

--- a/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/computationgraph/TestGraphLocalExecutionGenetic.java
+++ b/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/computationgraph/TestGraphLocalExecutionGenetic.java
@ -87,7 +87,7 @@ public class TestGraphLocalExecutionGenetic extends BaseDL4JTest {

    @Override
    public long getTimeoutMilliseconds() {
-        return 45000L;
+        return 120_000L;
    }

    @Test
@ -154,8 +154,8 @@ public class TestGraphLocalExecutionGenetic extends BaseDL4JTest {
                    .dataSource(ds, dsP)
                    .modelSaver(new FileModelSaver(modelSave))
                    .scoreFunction(new TestSetLossScoreFunction())
-                    .terminationConditions(new MaxTimeCondition(5, TimeUnit.SECONDS),
-                            new MaxCandidatesCondition(10))
+                    .terminationConditions(new MaxTimeCondition(20, TimeUnit.SECONDS),
+                            new MaxCandidatesCondition(3))
                    .build();

            IOptimizationRunner runner = new LocalOptimizationRunner(configuration, new ComputationGraphTaskCreator(new ClassificationEvaluator()));
--- a/datavec/datavec-api/src/main/java/org/datavec/api/records/reader/impl/LineRecordReader.java
+++ b/datavec/datavec-api/src/main/java/org/datavec/api/records/reader/impl/LineRecordReader.java
@ -18,6 +18,7 @@ package org.datavec.api.records.reader.impl;

 import lombok.Getter;
 import lombok.Setter;
+import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.LineIterator;
 import org.datavec.api.conf.Configuration;
@ -43,6 +44,7 @@ import java.util.*;
 *
 * @author Adam Gibson
 */
+@Slf4j
 public class LineRecordReader extends BaseRecordReader {


@ -58,6 +60,13 @@ public class LineRecordReader extends BaseRecordReader {
    @Override
    public void initialize(InputSplit split) throws IOException, InterruptedException {
        super.initialize(split);
+        if(!(inputSplit instanceof StringSplit || inputSplit instanceof InputStreamInputSplit)){
+            final ArrayList<URI> uris = new ArrayList<>();
+            final Iterator<URI> uriIterator = inputSplit.locationsIterator();
+            while(uriIterator.hasNext()) uris.add(uriIterator.next());
+
+            this.locations = uris.toArray(new URI[0]);
+        }
        this.iter = getIterator(0);
        this.initialized = true;
    }
@ -66,7 +75,6 @@ public class LineRecordReader extends BaseRecordReader {
    public void initialize(Configuration conf, InputSplit split) throws IOException, InterruptedException {
        this.conf = conf;
        initialize(split);
-        this.initialized = true;
    }

    @Override
@ -89,7 +97,7 @@ public class LineRecordReader extends BaseRecordReader {
                    iter = getIterator(splitIndex);
                    onLocationOpen(locations[splitIndex]);
                } catch (IOException e) {
-                    e.printStackTrace();
+                    log.error("",e);
                }

                if (iter.hasNext()) {
@ -120,7 +128,7 @@ public class LineRecordReader extends BaseRecordReader {
                    iter = getIterator(splitIndex);
                    onLocationOpen(locations[splitIndex]);
                } catch (IOException e) {
-                    e.printStackTrace();
+                    log.error("",e);
                }

                return iter.hasNext();
@ -205,11 +213,6 @@ public class LineRecordReader extends BaseRecordReader {
                }
            }
        } else {
-            final ArrayList<URI> uris = new ArrayList<>();
-            final Iterator<URI> uriIterator = inputSplit.locationsIterator();
-            while(uriIterator.hasNext()) uris.add(uriIterator.next());
-
-            this.locations = uris.toArray(new URI[uris.size()]);
            if (locations.length > 0) {
                InputStream inputStream = streamCreatorFn.apply(locations[location]);
                try {
--- a/datavec/datavec-api/src/main/java/org/datavec/api/split/NumberedFileInputSplit.java
+++ b/datavec/datavec-api/src/main/java/org/datavec/api/split/NumberedFileInputSplit.java
@ -16,6 +16,7 @@

 package org.datavec.api.split;

+import lombok.extern.slf4j.Slf4j;
 import org.datavec.api.util.files.UriFromPathIterator;
 import org.datavec.api.writable.WritableType;

@ -34,6 +35,7 @@ import java.util.regex.Pattern;
 * NumberedFileInputSplit utilizes String.format(), hence the requirement for "%d" to represent
 * the integer index.
 */
+@Slf4j
 public class NumberedFileInputSplit implements InputSplit {
    private final String baseString;
    private final int minIdx;
@ -93,7 +95,7 @@ public class NumberedFileInputSplit implements InputSplit {
            try {
                writeFile.createNewFile();
            } catch (IOException e) {
-                e.printStackTrace();
+                log.error("",e);
            }


--- a/datavec/datavec-api/src/main/java/org/datavec/api/util/files/UriFromPathIterator.java
+++ b/datavec/datavec-api/src/main/java/org/datavec/api/util/files/UriFromPathIterator.java
@ -23,6 +23,7 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
+import java.util.regex.Pattern;

 /**
 * A simple utility method to convert a {@code Iterator<String>} to an {@code Iterator<URI>}, where each
@ -32,6 +33,7 @@ import java.util.NoSuchElementException;
 */
@AllArgsConstructor
 public class UriFromPathIterator implements Iterator<URI> {
+    final Pattern schemaPattern = Pattern.compile("^.*?:/.*");

    private final Iterator<String> paths;

@ -42,16 +44,17 @@ public class UriFromPathIterator implements Iterator<URI> {

    @Override
    public URI next() {
+
        if (!hasNext()) {
            throw new NoSuchElementException("No next element");
        }
        try {
            String s = paths.next();
-            if(!s.matches(".*:/.*")){
+            if(schemaPattern.matcher(s).matches()){
+                return new URI(s);
+            } else {
                //No scheme - assume file for backward compatibility
                return new File(s).toURI();
-            } else {
-                return new URI(s);
            }

        } catch (URISyntaxException e) {
--- a/datavec/datavec-api/src/main/java/org/datavec/api/writable/Text.java
+++ b/datavec/datavec-api/src/main/java/org/datavec/api/writable/Text.java
@ -162,7 +162,6 @@ public class Text extends BinaryComparable implements WritableComparable<BinaryC
            return -1; // not found
        } catch (CharacterCodingException e) {
            // can't get here
-            e.printStackTrace();
            return -1;
        }
    }
--- a/datavec/datavec-arrow/src/main/java/org/datavec/arrow/recordreader/ArrowRecordReader.java
+++ b/datavec/datavec-arrow/src/main/java/org/datavec/arrow/recordreader/ArrowRecordReader.java
@ -17,6 +17,7 @@
 package org.datavec.arrow.recordreader;

 import lombok.Getter;
+import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.datavec.api.conf.Configuration;
 import org.datavec.api.records.Record;
@ -50,6 +51,7 @@ import static org.datavec.arrow.ArrowConverter.readFromBytes;
 * @author Adam Gibson
 *
 */
+@Slf4j
 public class ArrowRecordReader implements RecordReader {

    private InputSplit split;
@ -132,7 +134,7 @@ public class ArrowRecordReader implements RecordReader {
            currIdx++;
            this.currentPath = url;
        }catch(Exception e) {
-            e.printStackTrace();
+            log.error("",e);
        }

    }
@ -242,7 +244,7 @@ public class ArrowRecordReader implements RecordReader {
            try {
                currentBatch.close();
            } catch (IOException e) {
-                e.printStackTrace();
+                log.error("",e);
            }
        }
    }
--- a/datavec/datavec-arrow/src/test/java/org/datavec/arrow/ArrowConverterTest.java
+++ b/datavec/datavec-arrow/src/test/java/org/datavec/arrow/ArrowConverterTest.java
@ -16,6 +16,7 @@

 package org.datavec.arrow;

+import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@ -56,7 +57,7 @@ import static junit.framework.TestCase.assertTrue;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-
+@Slf4j
 public class ArrowConverterTest extends BaseND4JTest {

    private static BufferAllocator bufferAllocator = new RootAllocator(Long.MAX_VALUE);
@ -343,7 +344,7 @@ public class ArrowConverterTest extends BaseND4JTest {
        try(ArrowFileWriter arrowFileWriter = new ArrowFileWriter(schemaRoot1,null,newChannel(byteArrayOutputStream))) {
            arrowFileWriter.writeBatch();
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }

        byte[] arr = byteArrayOutputStream.toByteArray();
--- a/datavec/datavec-data/datavec-data-audio/src/main/java/org/datavec/audio/Wave.java
+++ b/datavec/datavec-data/datavec-data-audio/src/main/java/org/datavec/audio/Wave.java
@ -61,7 +61,7 @@ public class Wave implements Serializable {
            initWaveWithInputStream(inputStream);
            inputStream.close();
        } catch (IOException e) {
-            e.printStackTrace();
+            System.out.println(e.toString());
        }
    }

@ -96,7 +96,7 @@ public class Wave implements Serializable {
                data = new byte[inputStream.available()];
                inputStream.read(data);
            } catch (IOException e) {
-                e.printStackTrace();
+                System.err.println(e.toString());
            }
            // end load data
        } else {
--- a/datavec/datavec-data/datavec-data-audio/src/main/java/org/datavec/audio/WaveFileManager.java
+++ b/datavec/datavec-data/datavec-data-audio/src/main/java/org/datavec/audio/WaveFileManager.java
@ -16,10 +16,12 @@

 package org.datavec.audio;

+import lombok.extern.slf4j.Slf4j;
+
 import java.io.FileOutputStream;
 import java.io.IOException;

-
+@Slf4j
 public class WaveFileManager {

    private Wave wave;
@ -78,7 +80,7 @@ public class WaveFileManager {
            fos.write(wave.getBytes());
            fos.close();
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }
    }

--- a/datavec/datavec-data/datavec-data-audio/src/main/java/org/datavec/audio/WaveHeader.java
+++ b/datavec/datavec-data/datavec-data-audio/src/main/java/org/datavec/audio/WaveHeader.java
@ -16,6 +16,8 @@

 package org.datavec.audio;

+import lombok.extern.slf4j.Slf4j;
+
 import java.io.IOException;
 import java.io.InputStream;

@ -25,6 +27,7 @@ import java.io.InputStream;
 * 
 * @author Jacquet Wong
 */
+@Slf4j
 public class WaveHeader {

    public static final String RIFF_HEADER = "RIFF";
@ -109,7 +112,7 @@ public class WaveHeader {
            // dis.close();

        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
            return false;
        }

--- a/datavec/datavec-data/datavec-data-audio/src/main/java/org/datavec/audio/fingerprint/FingerprintManager.java
+++ b/datavec/datavec-data/datavec-data-audio/src/main/java/org/datavec/audio/fingerprint/FingerprintManager.java
@ -17,6 +17,7 @@
 package org.datavec.audio.fingerprint;


+import lombok.extern.slf4j.Slf4j;
 import org.datavec.audio.Wave;
 import org.datavec.audio.WaveHeader;
 import org.datavec.audio.dsp.Resampler;
@ -38,6 +39,7 @@ import java.util.List;
 * @author jacquet
 *
 */
+@Slf4j
 public class FingerprintManager {

    private FingerprintProperties fingerprintProperties = FingerprintProperties.getInstance();
@ -153,7 +155,7 @@ public class FingerprintManager {
            fingerprint = getFingerprintFromInputStream(fis);
            fis.close();
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }
        return fingerprint;
    }
@ -170,7 +172,7 @@ public class FingerprintManager {
            fingerprint = new byte[inputStream.available()];
            inputStream.read(fingerprint);
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }
        return fingerprint;
    }
@ -190,7 +192,7 @@ public class FingerprintManager {
            fileOutputStream.write(fingerprint);
            fileOutputStream.close();
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }
    }

--- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/BaseImageLoader.java
+++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/BaseImageLoader.java
@ -81,7 +81,7 @@ public abstract class BaseImageLoader implements Serializable {
            String fileName = file.toString();
            if (fileName.endsWith(".tgz") || fileName.endsWith(".tar.gz") || fileName.endsWith(".gz")
                            || fileName.endsWith(".zip"))
-                ArchiveUtils.unzipFileTo(file.getAbsolutePath(), fullDir.getAbsolutePath());
+                ArchiveUtils.unzipFileTo(file.getAbsolutePath(), fullDir.getAbsolutePath(), false);
        } catch (IOException e) {
            throw new IllegalStateException("Unable to fetch images", e);
        }
--- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/CifarLoader.java
+++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/CifarLoader.java
@ -186,7 +186,7 @@ public class CifarLoader extends NativeImageLoader implements Serializable {
                labels.add(line);
            }
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }
    }

@ -300,7 +300,7 @@ public class CifarLoader extends NativeImageLoader implements Serializable {
            try {
                FileUtils.write(meanVarPath, uMean + "," + uStd + "," + vMean + "," + vStd);
            } catch (IOException e) {
-                e.printStackTrace();
+                log.error("",e);
            }
            meanStdStored = true;
        } else if (uMean == 0 && meanStdStored) {
@ -312,7 +312,7 @@ public class CifarLoader extends NativeImageLoader implements Serializable {
                vStd = Double.parseDouble(values[3]);

            } catch (IOException e) {
-                e.printStackTrace();
+                log.error("",e);
            }
        }
        for (int i = 0; i < result.numExamples(); i++) {
@ -356,12 +356,12 @@ public class CifarLoader extends NativeImageLoader implements Serializable {
                    dataSets.add(new DataSet(asMatrix(matConversion.getSecond()), matConversion.getFirst()));
                    batchNumCount++;
                } catch (Exception e) {
-                    e.printStackTrace();
+                    log.error("",e);
                    break;
                }
            }
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }

        if(dataSets.size() == 0){
--- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/LFWLoader.java
+++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/loader/LFWLoader.java
@ -235,7 +235,7 @@ public class LFWLoader extends BaseImageLoader implements Serializable {
            InputSplit data = train ? inputSplit[0] : inputSplit[1];
            recordReader.initialize(data);
        } catch (IOException | InterruptedException e) {
-            e.printStackTrace();
+            log.error("",e);
        }
        return recordReader;
    }
@ -250,7 +250,7 @@ public class LFWLoader extends BaseImageLoader implements Serializable {
            InputSplit data = train ? inputSplit[0] : inputSplit[1];
            recordReader.initialize(data);
        } catch (IOException | InterruptedException e) {
-            e.printStackTrace();
+            log.error("",e);
        }
        return recordReader;
    }
--- a/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/BaseImageRecordReader.java
+++ b/datavec/datavec-data/datavec-data-image/src/main/java/org/datavec/image/recordreader/BaseImageRecordReader.java
@ -225,7 +225,7 @@ public abstract class BaseImageRecordReader extends BaseRecordReader {
                finishedInputStreamSplit = true;
                return Arrays.<Writable>asList(ndArrayWritable);
            } catch (IOException e) {
-                e.printStackTrace();
+                log.error("",e);
            }
        }
        if (iter != null) {
--- a/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/loader/TestNativeImageLoader.java
+++ b/datavec/datavec-data/datavec-data-image/src/test/java/org/datavec/image/loader/TestNativeImageLoader.java
@ -16,6 +16,7 @@

 package org.datavec.image.loader;

+import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.apache.commons.io.IOUtils;
 import org.bytedeco.javacpp.Loader;
@ -53,6 +54,7 @@ import static org.junit.Assert.fail;
 *
 * @author saudet
 */
+@Slf4j
 public class TestNativeImageLoader {
    static final long seed = 10;
    static final Random rng = new Random(seed);
@ -123,7 +125,7 @@ public class TestNativeImageLoader {
        try {
            array6 = loader5.asMatrix(new ClassPathResource(path2MitosisFile).getFile().getAbsolutePath());
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
        assertEquals(5, array6.rank());
@ -156,7 +158,7 @@ public class TestNativeImageLoader {
        try {
            array8 = loader7.asMatrix(new ClassPathResource(path2MitosisFile).getFile().getAbsolutePath());
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }
        assertEquals(5, array8.rank());
        assertEquals(pages2, array8.size(0));
@ -172,7 +174,7 @@ public class TestNativeImageLoader {
        try {
            array9 = loader8.asMatrix(new ClassPathResource(braintiff).getFile().getAbsolutePath());
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
        assertEquals(5, array9.rank());
--- a/datavec/datavec-data/datavec-data-nlp/src/main/java/org/datavec/nlp/tokenization/tokenizer/UimaTokenizer.java
+++ b/datavec/datavec-data/datavec-data-nlp/src/main/java/org/datavec/nlp/tokenization/tokenizer/UimaTokenizer.java
@ -66,7 +66,7 @@ public class UimaTokenizer implements Tokenizer {


        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            throw new RuntimeException(e);
        }

--- a/datavec/datavec-local/src/main/java/org/datavec/local/transforms/functions/SequenceRecordReaderFunction.java
+++ b/datavec/datavec-local/src/main/java/org/datavec/local/transforms/functions/SequenceRecordReaderFunction.java
@ -17,6 +17,7 @@
 package org.datavec.local.transforms.functions;


+import lombok.extern.slf4j.Slf4j;
 import org.datavec.api.records.reader.SequenceRecordReader;
 import org.datavec.api.writable.Writable;
 import org.nd4j.linalg.function.Function;
@ -32,6 +33,7 @@ import java.util.List;
 * sequence data into a {@code List<List<Writable>>}
 * @author Alex Black
 */
+@Slf4j
 public class SequenceRecordReaderFunction
                implements Function<Pair<String, InputStream>, List<List<Writable>>> {
    protected SequenceRecordReader sequenceRecordReader;
@ -46,7 +48,7 @@ public class SequenceRecordReaderFunction
        try (DataInputStream dis = (DataInputStream) value.getRight()) {
            return sequenceRecordReader.sequenceRecord(uri, dis);
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }

        throw new IllegalStateException("Something went wrong");
--- a/datavec/datavec-python/src/main/java/org/datavec/python/PythonExecutioner.java
+++ b/datavec/datavec-python/src/main/java/org/datavec/python/PythonExecutioner.java
@ -20,6 +20,7 @@ package org.datavec.python;

 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.io.IOUtils;
+import org.bytedeco.cpython.global.python;
 import org.bytedeco.numpy.global.numpy;
 import org.nd4j.linalg.api.concurrency.AffinityManager;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -343,6 +344,19 @@ public class PythonExecutioner {
                if (path == null) {
                    log.info("Setting python default path");
                    File[] packages = numpy.cachePackages();
+
+                    //// TODO: fix in javacpp
+                    File sitePackagesWindows = new File(python.cachePackage(), "site-packages");
+                    File[] packages2 = new File[packages.length + 1];
+                    for (int i = 0;i < packages.length; i++){
+                        //System.out.println(packages[i].getAbsolutePath());
+                        packages2[i] = packages[i];
+                    }
+                    packages2[packages.length] = sitePackagesWindows;
+                    //System.out.println(sitePackagesWindows.getAbsolutePath());
+                    packages = packages2;
+                    //////////
+
                    Py_SetPath(packages);
                } else {
                    log.info("Setting python path " + path);
--- a/datavec/datavec-python/src/main/java/org/datavec/python/PythonProcess.java
+++ b/datavec/datavec-python/src/main/java/org/datavec/python/PythonProcess.java
@ -0,0 +1,132 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+
+package org.datavec.python;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.io.IOUtils;
+import org.bytedeco.javacpp.Loader;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+@Slf4j
+public class PythonProcess {
+    private static String pythonExecutable = Loader.load(org.bytedeco.cpython.python.class);
+    public static String runAndReturn(String... arguments)throws IOException, InterruptedException{
+        String[] allArgs = new String[arguments.length + 1];
+        for (int i = 0; i < arguments.length; i++){
+            allArgs[i + 1] = arguments[i];
+        }
+        allArgs[0] = pythonExecutable;
+        log.info("Executing command: " + Arrays.toString(allArgs));
+        ProcessBuilder pb = new ProcessBuilder(allArgs);
+        Process process = pb.start();
+        String out = IOUtils.toString(process.getInputStream(), StandardCharsets.UTF_8);
+        process.waitFor();
+        return out;
+
+    }
+
+    public static void run(String... arguments)throws IOException, InterruptedException{
+        String[] allArgs = new String[arguments.length + 1];
+        for (int i = 0; i < arguments.length; i++){
+            allArgs[i + 1] = arguments[i];
+        }
+        allArgs[0] = pythonExecutable;
+        log.info("Executing command: " + Arrays.toString(allArgs));
+        ProcessBuilder pb = new ProcessBuilder(allArgs);
+        pb.inheritIO().start().waitFor();
+    }
+    public static void pipInstall(String packageName) throws PythonException{
+        try{
+            run("-m", "pip", "install", packageName);
+        }catch(Exception e){
+            throw new PythonException("Error installing package " + packageName, e);
+        }
+
+    }
+
+    public static void pipInstall(String packageName, String version) throws PythonException{
+        pipInstall(packageName + "==" + version);
+    }
+
+    public static void pipUninstall(String packageName) throws PythonException{
+        try{
+            run("-m", "pip", "uninstall", packageName);
+        }catch(Exception e){
+            throw new PythonException("Error uninstalling package " + packageName, e);
+        }
+
+    }
+    public static void pipInstallFromGit(String gitRepoUrl) throws PythonException{
+        if (!gitRepoUrl.contains("://")){
+            gitRepoUrl = "git://" + gitRepoUrl;
+        }
+        try{
+            run("-m", "pip", "install", "git+", gitRepoUrl);
+        }catch(Exception e){
+            throw new PythonException("Error installing package from " + gitRepoUrl, e);
+        }
+
+    }
+
+    public static String getPackageVersion(String packageName) throws PythonException{
+        String out;
+        try{
+           out = runAndReturn("-m", "pip", "show", packageName);
+        } catch (Exception e){
+            throw new PythonException("Error finding version for package " + packageName, e);
+        }
+
+        if (!out.contains("Version: ")){
+            throw new PythonException("Can't find package " + packageName);
+        }
+        String pkgVersion  = out.split("Version: ")[1].split(System.lineSeparator())[0];
+        return pkgVersion;
+    }
+
+    public static boolean isPackageInstalled(String packageName)throws PythonException{
+        try{
+            String out = runAndReturn("-m", "pip", "show", packageName);
+            return !out.isEmpty();
+        }catch (Exception e){
+            throw new PythonException("Error checking if package is installed: " +packageName, e);
+        }
+
+    }
+
+    public static void pipInstallFromRequirementsTxt(String path) throws PythonException{
+        try{
+            run("-m", "pip", "install","-r", path);
+        }catch (Exception e){
+            throw new PythonException("Error installing packages from " + path, e);
+        }
+    }
+
+    public static void pipInstallFromSetupScript(String path, boolean inplace) throws PythonException{
+
+            try{
+                run(path, inplace?"develop":"install");
+            }catch (Exception e){
+                throw new PythonException("Error installing package from " + path, e);
+            }
+
+    }
+
+}
--- a/datavec/datavec-python/src/main/java/org/datavec/python/keras/Model.java
+++ b/datavec/datavec-python/src/main/java/org/datavec/python/keras/Model.java
@ -0,0 +1,144 @@
+package org.datavec.python.keras;
+
+import org.datavec.python.Python;
+import org.datavec.python.PythonException;
+import org.datavec.python.PythonObject;
+import org.datavec.python.PythonProcess;
+import org.nd4j.linalg.api.ndarray.INDArray;
+
+public class Model {
+
+    private PythonObject pyModel;
+
+
+    private static PythonObject installAndImportTF() throws PythonException{
+        if (!PythonProcess.isPackageInstalled("tensorflow")){
+            PythonProcess.pipInstall("tensorflow");
+        }
+        return Python.importModule("tensorflow");
+    }
+    private static PythonObject getKerasModule() throws PythonException{
+        PythonObject tf = installAndImportTF();
+        PythonObject keras = tf.attr("keras");
+        tf.del();
+        return keras;
+    }
+
+    private static PythonObject loadModel(String s) throws PythonException{
+        PythonObject models = getKerasModule().attr("models");
+        PythonObject loadModelF = models.attr("load_model");
+        PythonObject model = loadModelF.call(s);
+        models.del();
+        loadModelF.del();
+        return model;
+    }
+
+    public Model(String path) throws PythonException{
+        pyModel = loadModel(path);
+    }
+
+    public INDArray[] predict(INDArray... inputs) throws PythonException{
+        PythonObject predictF = pyModel.attr("predict");
+        PythonObject inputList = new PythonObject(inputs);
+        PythonObject pyOut = predictF.call(inputList);
+        INDArray[] out;
+        if (Python.isinstance(pyOut, Python.listType())){
+            out = new INDArray[Python.len(pyOut).toInt()];
+            for(int i = 0; i < out.length; i++){
+                out[i] = pyOut.get(i).toNumpy().getNd4jArray();
+            }
+        }
+        else{
+            out = new INDArray[]{
+                    pyOut.toNumpy().getNd4jArray()};
+            }
+
+        predictF.del();
+        inputList.del();
+        pyOut.del();
+        return out;
+    }
+
+    public int numInputs(){
+        PythonObject inputs = pyModel.attr("inputs");
+        PythonObject pyNumInputs = Python.len(inputs);
+        int ret = pyNumInputs.toInt();
+        inputs.del();
+        pyNumInputs.del();
+        return ret;
+    }
+    public int numOutputs(){
+        PythonObject outputs = pyModel.attr("outputs");
+        PythonObject pyNumOutputs = Python.len(outputs);
+        int ret = pyNumOutputs.toInt();
+        outputs.del();
+        pyNumOutputs.del();
+        return ret;
+    }
+
+    public long[][] inputShapes(){
+        long[][] ret = new long[numInputs()][];
+        for (int i = 0; i < ret.length; i++){
+            ret[i] = inputShapeAt(i);
+        }
+        return ret;
+    }
+
+    public long[][] outputShapes(){
+        long[][] ret = new long[numOutputs()][];
+        for (int i = 0; i < ret.length; i++){
+            ret[i] = outputShapeAt(i);
+        }
+        return ret;
+    }
+
+    public long[] inputShapeAt(int input){
+        PythonObject inputs = pyModel.attr("inputs");
+        PythonObject tensor = inputs.get(input);
+        PythonObject tensorShape = tensor.attr("shape");
+        PythonObject shapeList = Python.list(tensorShape);
+        PythonObject pyNdim = Python.len(shapeList);
+        int ndim = pyNdim.toInt();
+        long[] shape = new long[ndim];
+        for(int i = 0; i < shape.length; i++){
+            PythonObject pyDim = shapeList.get(i);
+            if (pyDim == null || !Python.isinstance(pyDim, Python.intType())){
+                shape[i] = -1;
+            }
+            else{
+                shape[i] = pyDim.toLong();
+            }
+        }
+        pyNdim.del();
+        shapeList.del();
+        tensorShape.del();
+        tensor.del();
+        inputs.del();
+        return shape;
+    }
+
+    public long[] outputShapeAt(int output){
+        PythonObject inputs = pyModel.attr("outputs");
+        PythonObject tensor = inputs.get(output);
+        PythonObject tensorShape = tensor.attr("shape");
+        PythonObject shapeList = Python.list(tensorShape);
+        PythonObject pyNdim = Python.len(shapeList);
+        int ndim = pyNdim.toInt();
+        long[] shape = new long[ndim];
+        for(int i = 0; i < shape.length; i++){
+            PythonObject pyDim = shapeList.get(i);
+            if (pyDim == null || !Python.isinstance(pyDim, Python.intType())){
+                shape[i] = -1;
+            }
+            else{
+                shape[i] = pyDim.toLong();
+            }
+        }
+        pyNdim.del();
+        shapeList.del();
+        tensorShape.del();
+        tensor.del();
+        inputs.del();
+        return shape;
+    }
+}
--- a/datavec/datavec-spark-inference-parent/datavec-spark-inference-client/src/main/java/org/datavec/spark/transform/client/DataVecTransformClient.java
+++ b/datavec/datavec-spark-inference-parent/datavec-spark-inference-client/src/main/java/org/datavec/spark/transform/client/DataVecTransformClient.java
@ -74,7 +74,6 @@ public class DataVecTransformClient implements DataVecTransformService {

        } catch (UnirestException e) {
            log.error("Error in setCSVTransformProcess()", e);
-            e.printStackTrace();
        }
    }

@ -94,7 +93,6 @@ public class DataVecTransformClient implements DataVecTransformService {
            return TransformProcess.fromJson(s);
        } catch (UnirestException e) {
            log.error("Error in getCSVTransformProcess()",e);
-            e.printStackTrace();
        }

        return null;
@ -119,7 +117,6 @@ public class DataVecTransformClient implements DataVecTransformService {
            return singleCsvRecord;
        } catch (UnirestException e) {
            log.error("Error in transformIncremental(SingleCSVRecord)",e);
-            e.printStackTrace();
        }
        return null;
    }
@ -140,8 +137,7 @@ public class DataVecTransformClient implements DataVecTransformService {
                    .getBody();
            return batchCSVRecord1;
        } catch (UnirestException e) {
-            log.error("Error in transform(BatchCSVRecord)", e);
-            e.printStackTrace();
+            log.error("",e);
        }

        return null;
@ -162,7 +158,6 @@ public class DataVecTransformClient implements DataVecTransformService {
            return batchCSVRecord1;
        } catch (UnirestException e) {
            log.error("Error in transform(BatchCSVRecord)", e);
-            e.printStackTrace();
        }

        return null;
@ -181,7 +176,6 @@ public class DataVecTransformClient implements DataVecTransformService {
            return batchArray1;
        } catch (UnirestException e) {
            log.error("Error in transformArray(BatchCSVRecord)",e);
-            e.printStackTrace();
        }

        return null;
@ -200,7 +194,6 @@ public class DataVecTransformClient implements DataVecTransformService {
            return array;
        } catch (UnirestException e) {
            log.error("Error in transformArrayIncremental(SingleCSVRecord)",e);
-            e.printStackTrace();
        }

        return null;
@ -231,7 +224,6 @@ public class DataVecTransformClient implements DataVecTransformService {
            return array;
        } catch (UnirestException e) {
            log.error("Error in transformSequenceArrayIncremental",e);
-            e.printStackTrace();
        }

        return null;
@ -252,7 +244,6 @@ public class DataVecTransformClient implements DataVecTransformService {
            return batchArray1;
        } catch (UnirestException e) {
            log.error("Error in transformSequenceArray",e);
-            e.printStackTrace();
        }

        return null;
@ -274,7 +265,6 @@ public class DataVecTransformClient implements DataVecTransformService {
            return batchCSVRecord1;
        } catch (UnirestException e) {
            log.error("Error in transformSequence");
-            e.printStackTrace();
        }

        return null;
@ -295,7 +285,6 @@ public class DataVecTransformClient implements DataVecTransformService {
            return singleCsvRecord;
        } catch (UnirestException e) {
            log.error("Error in transformSequenceIncremental");
-            e.printStackTrace();
        }
        return null;
    }
--- a/datavec/datavec-spark-inference-parent/datavec-spark-inference-model/src/main/java/org/datavec/spark/transform/CSVSparkTransform.java
+++ b/datavec/datavec-spark-inference-parent/datavec-spark-inference-model/src/main/java/org/datavec/spark/transform/CSVSparkTransform.java
@ -18,6 +18,7 @@ package org.datavec.spark.transform;

 import lombok.AllArgsConstructor;
 import lombok.Getter;
+import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@ -53,7 +54,7 @@ import static org.datavec.local.transforms.LocalTransformExecutor.executeToSeque
 * @author Adan Gibson
 */
@AllArgsConstructor
-
+@Slf4j
 public class CSVSparkTransform {
    @Getter
    private TransformProcess transformProcess;
@ -252,7 +253,7 @@ public class CSVSparkTransform {
        try {
            return new Base64NDArrayBody(Nd4jBase64.base64String(arr));
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }

        return null;
--- a/datavec/datavec-spark-inference-parent/datavec-spark-inference-server/src/main/java/org/datavec/spark/transform/ImageSparkTransformServer.java
+++ b/datavec/datavec-spark-inference-parent/datavec-spark-inference-server/src/main/java/org/datavec/spark/transform/ImageSparkTransformServer.java
@ -88,7 +88,7 @@ public class ImageSparkTransformServer extends SparkTransformServer {
                log.info("Transform process initialized");
                return ok(objectMapper.writeValueAsString(transform.getImageTransformProcess())).as(contentType);
            } catch (Exception e) {
-                e.printStackTrace();
+                log.error("",e);
                return internalServerError();
            }
        });
@ -100,7 +100,7 @@ public class ImageSparkTransformServer extends SparkTransformServer {
                log.info("Transform process initialized");
                return ok(objectMapper.writeValueAsString(transformProcess)).as(contentType);
            } catch (Exception e) {
-                e.printStackTrace();
+                log.error("",e);
                return internalServerError();
            }
        });
@ -112,7 +112,7 @@ public class ImageSparkTransformServer extends SparkTransformServer {
                    return badRequest();
                return ok(objectMapper.writeValueAsString(transformIncrementalArray(record))).as(contentType);
            } catch (Exception e) {
-                e.printStackTrace();
+                log.error("",e);
                return internalServerError();
            }
        });
@ -130,7 +130,7 @@ public class ImageSparkTransformServer extends SparkTransformServer {

                return ok(objectMapper.writeValueAsString(transformIncrementalArray(record))).as(contentType);
            } catch (Exception e) {
-                e.printStackTrace();
+                log.error("",e);
                return internalServerError();
            }
        });
@ -142,7 +142,7 @@ public class ImageSparkTransformServer extends SparkTransformServer {
                    return badRequest();
                return ok(objectMapper.writeValueAsString(transformArray(batch))).as(contentType);
            } catch (Exception e) {
-                e.printStackTrace();
+                log.error("",e);
                return internalServerError();
            }
        });
@ -169,7 +169,7 @@ public class ImageSparkTransformServer extends SparkTransformServer {

                return ok(objectMapper.writeValueAsString(transformArray(batch))).as(contentType);
            } catch (Exception e) {
-                e.printStackTrace();
+                log.error("",e);
                return internalServerError();
            }
        });
--- a/datavec/datavec-spark/src/test/java/org/datavec/spark/BaseSparkTest.java
+++ b/datavec/datavec-spark/src/test/java/org/datavec/spark/BaseSparkTest.java
@ -16,6 +16,7 @@

 package org.datavec.spark;

+import lombok.extern.slf4j.Slf4j;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.junit.After;
@ -23,6 +24,7 @@ import org.junit.Before;

 import java.io.Serializable;

+@Slf4j
 public abstract class BaseSparkTest implements Serializable {
    protected static JavaSparkContext sc;

@ -40,7 +42,7 @@ public abstract class BaseSparkTest implements Serializable {
                try {
                    Thread.sleep(100L);
                } catch (InterruptedException e) {
-                    e.printStackTrace();
+                    log.error("",e);
                }
            } else {
                break;
--- a/deeplearning4j/deeplearning4j-common-tests/src/main/java/org/deeplearning4j/BaseDL4JTest.java
+++ b/deeplearning4j/deeplearning4j-common-tests/src/main/java/org/deeplearning4j/BaseDL4JTest.java
@ -68,7 +68,7 @@ public abstract class BaseDL4JTest {
     * Override this method to set the default timeout for methods in the test class
     */
    public long getTimeoutMilliseconds(){
-        return 30000;
+        return 60_000;
    }

    /**
--- a/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/api/storage/impl/RemoteUIStatsStorageRouter.java
+++ b/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/api/storage/impl/RemoteUIStatsStorageRouter.java
@ -43,7 +43,8 @@ import java.util.concurrent.atomic.AtomicLong;
 */
@Slf4j
 public class RemoteUIStatsStorageRouter implements StatsStorageRouter, Serializable, Closeable {
-
+    private static final String ROUTE_IS_DOWN = "Info posted to RemoteUIStatsStorageRouter but router is shut down.";
+    private static final String MAX_WARNINGS_REACHED = "RemoteUIStatsStorageRouter: Reached max shutdown warnings. No further warnings will be produced.";
    /**
     * Default path for posting data to the UI - i.e., http://localhost:9000/remoteReceive or similar
     */
@ -163,10 +164,10 @@ public class RemoteUIStatsStorageRouter implements StatsStorageRouter, Serializa
        if (shutdown.get()) {
            long count = shutdownWarnCount.getAndIncrement();
            if (count <= MAX_SHUTDOWN_WARN_COUNT) {
-                log.warn("Info posted to RemoteUIStatsStorageRouter but router is shut down.");
+                log.warn(ROUTE_IS_DOWN);
            }
            if (count == MAX_SHUTDOWN_WARN_COUNT) {
-                log.warn("RemoteUIStatsStorageRouter: Reached max shutdown warnings. No further warnings will be produced.");
+                log.warn(MAX_WARNINGS_REACHED);
            }
        } else {
            for (StorageMetaData m : storageMetaData) {
@ -186,10 +187,10 @@ public class RemoteUIStatsStorageRouter implements StatsStorageRouter, Serializa
        if (shutdown.get()) {
            long count = shutdownWarnCount.getAndIncrement();
            if (count <= MAX_SHUTDOWN_WARN_COUNT) {
-                log.warn("Info posted to RemoteUIStatsStorageRouter but router is shut down.");
+                log.warn(ROUTE_IS_DOWN);
            }
            if (count == MAX_SHUTDOWN_WARN_COUNT) {
-                log.warn("RemoteUIStatsStorageRouter: Reached max shutdown warnings. No further warnings will be produced.");
+                log.warn(MAX_WARNINGS_REACHED);
            }
        } else {
            for (Persistable p : staticInfo) {
@ -209,10 +210,10 @@ public class RemoteUIStatsStorageRouter implements StatsStorageRouter, Serializa
        if (shutdown.get()) {
            long count = shutdownWarnCount.getAndIncrement();
            if (count <= MAX_SHUTDOWN_WARN_COUNT) {
-                log.warn("Info posted to RemoteUIStatsStorageRouter but router is shut down.");
+                log.warn(ROUTE_IS_DOWN);
            }
            if (count == MAX_SHUTDOWN_WARN_COUNT) {
-                log.warn("RemoteUIStatsStorageRouter: Reached max shutdown warnings. No further warnings will be produced.");
+                log.warn(MAX_WARNINGS_REACHED);
            }
        } else {
            for (Persistable p : updates) {
--- a/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/parallelism/AsyncIterator.java
+++ b/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/parallelism/AsyncIterator.java
@ -67,10 +67,7 @@ public class AsyncIterator<T extends Object> implements Iterator<T> {
            nextElement = buffer.take();

            // same on this run
-            if (nextElement == terminator)
-                return false;
-
-            return true;
+            return (nextElement != terminator);
        } catch (Exception e) {
            log.error("Premature end of loop!");
            return false;
--- a/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/perf/listener/SystemInfoPrintListener.java
+++ b/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/perf/listener/SystemInfoPrintListener.java
@ -43,7 +43,7 @@ public class SystemInfoPrintListener implements TrainingListener {
    private boolean printOnBackwardPass;
    private boolean printOnGradientCalculation;

-
+    private static final String SYSTEM_INFO = "System info on epoch end: ";
    @Override
    public void iterationDone(Model model, int iteration, int epoch) {

@ -65,7 +65,7 @@ public class SystemInfoPrintListener implements TrainingListener {
            return;

        SystemInfo systemInfo = new SystemInfo();
-        log.info("System info on epoch end: ");
+        log.info(SYSTEM_INFO);
        log.info(systemInfo.toPrettyJSON());
    }

@ -75,7 +75,7 @@ public class SystemInfoPrintListener implements TrainingListener {
            return;

        SystemInfo systemInfo = new SystemInfo();
-        log.info("System info on epoch end: ");
+        log.info(SYSTEM_INFO);
        log.info(systemInfo.toPrettyJSON());
    }

@ -85,7 +85,7 @@ public class SystemInfoPrintListener implements TrainingListener {
            return;

        SystemInfo systemInfo = new SystemInfo();
-        log.info("System info on epoch end: ");
+        log.info(SYSTEM_INFO);
        log.info(systemInfo.toPrettyJSON());
    }

@ -95,7 +95,7 @@ public class SystemInfoPrintListener implements TrainingListener {
            return;

        SystemInfo systemInfo = new SystemInfo();
-        log.info("System info on epoch end: ");
+        log.info(SYSTEM_INFO);
        log.info(systemInfo.toPrettyJSON());
    }

@ -104,7 +104,7 @@ public class SystemInfoPrintListener implements TrainingListener {
        if(!printOnBackwardPass)
            return;
        SystemInfo systemInfo = new SystemInfo();
-        log.info("System info on epoch end: ");
+        log.info(SYSTEM_INFO);
        log.info(systemInfo.toPrettyJSON());
    }
 }
--- a/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/perf/listener/SystemPolling.java
+++ b/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/perf/listener/SystemPolling.java
@ -16,6 +16,7 @@

 package org.deeplearning4j.perf.listener;

+import lombok.extern.slf4j.Slf4j;
 import org.nd4j.shade.jackson.databind.ObjectMapper;
 import org.nd4j.shade.jackson.dataformat.yaml.YAMLFactory;
 import oshi.json.SystemInfo;
@ -36,6 +37,7 @@ import java.util.concurrent.TimeUnit;
 *
 * @author Adam Gibson
 */
+@Slf4j
 public class SystemPolling {

    private ScheduledExecutorService scheduledExecutorService;
@ -66,7 +68,7 @@ public class SystemPolling {
                try {
                    objectMapper.writeValue(hardwareFile,hardwareMetric);
                } catch (IOException e) {
-                    e.printStackTrace();
+                    log.error("",e);
                }
            }
        },0,pollEveryMillis, TimeUnit.MILLISECONDS);
--- a/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/util/ModelGuesser.java
+++ b/deeplearning4j/deeplearning4j-core/src/main/java/org/deeplearning4j/util/ModelGuesser.java
@ -27,7 +27,6 @@ import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
 import org.nd4j.linalg.dataset.api.preprocessor.Normalizer;

 import java.io.*;
-import java.nio.file.Files;
 import java.util.UUID;

 /**
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/TestUtils.java
@ -20,6 +20,7 @@ import org.apache.commons.compress.utils.IOUtils;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.layers.BaseLayer;
 import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer;
 import org.deeplearning4j.nn.graph.ComputationGraph;
@ -153,11 +154,22 @@ public class TestUtils {
        return randomOneHotTimeSeries(minibatch, outSize, tsLength, new Random(rngSeed));
    }

-    public static INDArray randomOneHotTimeSeries(int minibatch, int outSize, int tsLength, Random rng){
-        INDArray out = Nd4j.create(new int[]{minibatch, outSize, tsLength}, 'f');
+    public static INDArray randomOneHotTimeSeries(int minibatch, int outSize, int tsLength, Random rng) {
+        return randomOneHotTimeSeries(RNNFormat.NCW, minibatch, outSize, tsLength, rng);
+    }
+
+    public static INDArray randomOneHotTimeSeries(RNNFormat format, int minibatch, int outSize, int tsLength, Random rng){
+        boolean ncw = format == RNNFormat.NCW;
+        long[] shape = ncw ? new long[]{minibatch, outSize, tsLength} : new long[]{minibatch, tsLength, outSize};
+        char order = ncw ? 'f' : 'c';
+        INDArray out = Nd4j.create(DataType.FLOAT, shape, order);
        for( int i=0; i<minibatch; i++ ){
            for( int j=0; j<tsLength; j++ ){
-                out.putScalar(i, rng.nextInt(outSize), j, 1.0);
+                if(ncw){
+                    out.putScalar(i, rng.nextInt(outSize), j, 1.0);
+                } else {
+                    out.putScalar(i, j, rng.nextInt(outSize), 1.0);
+                }
            }
        }
        return out;
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderDataSetiteratorTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/datavec/RecordReaderDataSetiteratorTest.java
@ -493,7 +493,7 @@ public class RecordReaderDataSetiteratorTest extends BaseDL4JTest {
                out.println();
            }
        } catch (IOException e) {
-            e.printStackTrace();
+            log.error("",e);
        }

        return new Pair<>(dArr,temp);
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/EarlyTerminationDataSetIteratorTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/EarlyTerminationDataSetIteratorTest.java
@ -78,10 +78,10 @@ public class EarlyTerminationDataSetIteratorTest extends BaseDL4JTest {
        EarlyTerminationDataSetIterator earlyEndIter = new EarlyTerminationDataSetIterator(iter, terminateAfter);

        earlyEndIter.next(10);
-        assertEquals(earlyEndIter.hasNext(), false);
+        assertEquals(false, earlyEndIter.hasNext());

        earlyEndIter.reset();
-        assertEquals(earlyEndIter.hasNext(), true);
+        assertEquals(true, earlyEndIter.hasNext());

    }

--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIteratorTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/MultipleEpochsIteratorTest.java
@ -98,7 +98,7 @@ public class MultipleEpochsIteratorTest extends BaseDL4JTest {
        while (multiIter.hasNext()) {
            DataSet path = multiIter.next(10);
            assertNotNull(path);
-            assertEquals(path.numExamples(), 10, 0.0);
+            assertEquals(10, path.numExamples(), 0.0);
        }

        assertEquals(epochs, multiIter.epochs);
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/SamplingTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/datasets/iterator/SamplingTest.java
@ -33,7 +33,7 @@ public class SamplingTest extends BaseDL4JTest {
        DataSetIterator iter = new MnistDataSetIterator(10, 10);
        //batch size and total
        DataSetIterator sampling = new SamplingDataSetIterator(iter.next(), 10, 10);
-        assertEquals(sampling.next().numExamples(), 10);
+        assertEquals(10, sampling.next().numExamples());
    }

 }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidConfigurations.java
@ -16,6 +16,7 @@

 package org.deeplearning4j.exceptions;

+import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.exception.DL4JException;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
@ -34,6 +35,7 @@ import static org.junit.Assert.fail;
 /**
 * A set of tests to ensure that useful exceptions are thrown on invalid network configurations
 */
+@Slf4j
 public class TestInvalidConfigurations extends BaseDL4JTest {

    public static MultiLayerNetwork getDensePlusOutput(int nIn, int nOut) {
@ -78,7 +80,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testDenseNin0(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -96,7 +98,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testDenseNout0(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -109,7 +111,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testOutputLayerNin0(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -122,7 +124,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testRnnOutputLayerNin0(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -135,7 +137,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testLSTMNIn0(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -153,7 +155,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testLSTMNOut0(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -166,7 +168,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testConvolutionalNin0(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -185,7 +187,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testConvolutionalNOut0(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -216,7 +218,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testCnnInvalidConfigPaddingStridesHeight(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -245,7 +247,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testCnnInvalidConfigOrInput_SmallerDataThanKernel(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -277,7 +279,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testCnnInvalidConfigOrInput_BadStrides(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -318,7 +320,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testCnnInvalidConfigPaddingStridesWidth(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
@ -347,7 +349,7 @@ public class TestInvalidConfigurations extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testCnnInvalidConfigPaddingStridesWidthSubsampling(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail();
        }
    }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidInput.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/exceptions/TestInvalidInput.java
@ -16,6 +16,7 @@

 package org.deeplearning4j.exceptions;

+import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.exception.DL4JException;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
@ -36,6 +37,7 @@ import static org.junit.Assert.*;
 /**
 * A set of tests to ensure that useful exceptions are thrown on invalid input
 */
+@Slf4j
 public class TestInvalidInput extends BaseDL4JTest {

    @Test
@ -53,7 +55,7 @@ public class TestInvalidInput extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testInputNinMismatchDense(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Expected DL4JException");
        }
    }
@ -73,7 +75,7 @@ public class TestInvalidInput extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testInputNinMismatchOutputLayer(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Expected DL4JException");
        }
    }
@ -94,7 +96,7 @@ public class TestInvalidInput extends BaseDL4JTest {
            //From loss function
            System.out.println("testLabelsNOutMismatchOutputLayer(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Expected DL4JException");
        }
    }
@ -115,7 +117,7 @@ public class TestInvalidInput extends BaseDL4JTest {
            //From loss function
            System.out.println("testLabelsNOutMismatchRnnOutputLayer(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Expected DL4JException");
        }
    }
@ -142,7 +144,7 @@ public class TestInvalidInput extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testInputNinMismatchConvolutional(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Expected DL4JException");
        }
    }
@ -169,7 +171,7 @@ public class TestInvalidInput extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testInputNinRank2Convolutional(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Expected DL4JException");
        }
    }
@ -195,7 +197,7 @@ public class TestInvalidInput extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testInputNinRank2Subsampling(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Expected DL4JException");
        }
    }
@ -217,7 +219,7 @@ public class TestInvalidInput extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testInputNinMismatchLSTM(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Expected DL4JException");
        }
    }
@ -238,7 +240,7 @@ public class TestInvalidInput extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testInputNinMismatchBidirectionalLSTM(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Expected DL4JException");
        }

@ -260,7 +262,7 @@ public class TestInvalidInput extends BaseDL4JTest {
        } catch (DL4JException e) {
            System.out.println("testInputNinMismatchEmbeddingLayer(): " + e.getMessage());
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Expected DL4JException");
        }
    }
@ -305,7 +307,7 @@ public class TestInvalidInput extends BaseDL4JTest {
                net.rnnTimeStep(Nd4j.create(5, 5, 10));
                fail("Expected Exception - " + layerType);
            } catch (Exception e) {
-//                e.printStackTrace();
+                log.error("",e);
                String msg = e.getMessage();
                assertTrue(msg, msg != null && msg.contains("rnn") && msg.contains("batch"));
            }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/CNNGradientCheckTest.java
@ -21,6 +21,7 @@ import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
 import org.deeplearning4j.nn.api.OptimizationAlgorithm;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@ -34,6 +35,8 @@ import org.deeplearning4j.nn.weights.WeightInit;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.junit.Ignore;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -51,6 +54,7 @@ import static org.junit.Assert.*;
 /**
 * Created by nyghtowl on 9/1/15.
 */
+@RunWith(Parameterized.class)
 public class CNNGradientCheckTest extends BaseDL4JTest {
    private static final boolean PRINT_RESULTS = true;
    private static final boolean RETURN_ON_FIRST_FAILURE = false;
@ -62,6 +66,17 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
        Nd4j.setDataType(DataType.DOUBLE);
    }

+    private CNN2DFormat format;
+
+    public CNNGradientCheckTest(CNN2DFormat format){
+        this.format = format;
+    }
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Object[] params(){
+        return CNN2DFormat.values();
+    }
+
    @Override
    public long getTimeoutMilliseconds() {
        return 90000L;
@ -69,6 +84,9 @@ public class CNNGradientCheckTest extends BaseDL4JTest {

    @Test
    public void testGradientCNNMLN() {
+        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
+            return;
+
        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
@ -144,6 +162,9 @@ public class CNNGradientCheckTest extends BaseDL4JTest {

    @Test
    public void testGradientCNNL1L2MLN() {
+        if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format...
+            return;
+
        //Parameterized test, testing combinations of:
        // (a) activation function
        // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
@ -311,10 +332,12 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX,
                        SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};

+        boolean nchw = format == CNN2DFormat.NCHW;
        for (String afn : activations) {
            for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
                for (int minibatchSize : minibatchSizes) {
-                    INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
+                    long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
+                    INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
                    INDArray labels = Nd4j.zeros(4 * minibatchSize, nOut);
                    for (int i = 0; i < 4 * minibatchSize; i++) {
                        labels.putScalar(new int[]{i, i % nOut}, 1.0);
@ -330,13 +353,13 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                                    .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                            .activation(Activation.SOFTMAX)
                                            .nOut(nOut).build())
-                                    .setInputType(InputType.convolutionalFlat(height, width, inputDepth))
+                                    .setInputType(InputType.convolutional(height, width, inputDepth, format))
                                    .build();

                    MultiLayerNetwork net = new MultiLayerNetwork(conf);
                    net.init();

-                    String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
+                    String msg = format + " - poolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
                            + afn;

                    if (PRINT_RESULTS) {
@ -377,8 +400,11 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
        int[] padding = {0, 0};
        int size = 2;

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for (int minibatchSize : minibatchSizes) {
-            INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
+            long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
+            INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
            INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);

            MultiLayerConfiguration conf =
@ -393,8 +419,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                            .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .activation(Activation.SOFTMAX).nIn(8 * 8 * 3)
                                    .nOut(4).build())
-                            .setInputType(InputType.convolutionalFlat(height, width,
-                                    inputDepth))
+                            .setInputType(InputType.convolutional(height, width, inputDepth, format))
                            .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -438,10 +463,13 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX,
                        SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for (Activation afn : activations) {
            for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
                for (int minibatchSize : minibatchSizes) {
-                    INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
+                    long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
+                    INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
                    INDArray labels = Nd4j.zeros(minibatchSize, nOut);
                    for (int i = 0; i < minibatchSize; i++) {
                        labels.putScalar(new int[]{i, i % nOut}, 1.0);
@ -461,14 +489,13 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                                    .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                            .activation(Activation.SOFTMAX).nIn(3 * 3 * 3)
                                            .nOut(4).build())
-                                    .setInputType(InputType.convolutionalFlat(height, width,
-                                            inputDepth))
+                                    .setInputType(InputType.convolutional(height, width, inputDepth, format))
                                    .build();

                    MultiLayerNetwork net = new MultiLayerNetwork(conf);
                    net.init();

-                    String msg = "PoolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
+                    String msg = format + " - poolingType=" + poolingType + ", minibatch=" + minibatchSize + ", activationFn="
                            + afn;

                    if (PRINT_RESULTS) {
@ -508,10 +535,13 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                new SubsamplingLayer.PoolingType[]{SubsamplingLayer.PoolingType.MAX,
                        SubsamplingLayer.PoolingType.AVG, SubsamplingLayer.PoolingType.PNORM};

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for (Activation afn : activations) {
            for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
                for (int minibatchSize : minibatchSizes) {
-                    INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
+                    long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
+                    INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
                    INDArray labels = Nd4j.zeros(minibatchSize, nOut);
                    for (int i = 0; i < minibatchSize; i++) {
                        labels.putScalar(new int[]{i, i % nOut}, 1.0);
@ -533,8 +563,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                                    .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                            .activation(Activation.SOFTMAX).nIn(2 * 2 * 2)
                                            .nOut(4).build())
-                                    .setInputType(InputType.convolutionalFlat(height, width,
-                                            inputDepth))
+                                    .setInputType(InputType.convolutional(height, width, inputDepth, format))
                                    .build();

                    MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -558,8 +587,6 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
    @Test
    public void testCnnLocallyConnected2D() {
        int nOut = 3;
-
-        int[] minibatchSizes = {2};
        int width = 5;
        int height = 5;

@ -569,11 +596,15 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
        Activation[] activations = {Activation.SIGMOID, Activation.TANH, Activation.SOFTPLUS};
        int[] minibatch = {2, 1, 3};

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for( int i=0; i<inputDepths.length; i++ ){
            int inputDepth = inputDepths[i];
            Activation afn = activations[i];
            int minibatchSize = minibatch[i];
-            INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
+
+            long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
+            INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
            INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(new NoOp())
@ -590,7 +621,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                    .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                            .activation(Activation.SOFTMAX).nIn(2 * 2 * 2).nOut(nOut)
                            .build())
-                    .setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();
+                    .setInputType(InputType.convolutional(height, width, inputDepth, format)).build();

            assertEquals(ConvolutionMode.Truncate,
                    ((ConvolutionLayer) conf.getConf(0).getLayer()).getConvolutionMode());
@ -626,11 +657,15 @@ public class CNNGradientCheckTest extends BaseDL4JTest {

        Nd4j.getRandom().setSeed(12345);

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for (int inputDepth : inputDepths) {
            for (Activation afn : activations) {
                for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
                    for (int minibatchSize : minibatchSizes) {
-                        INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
+                        long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
+                        INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
+
                        INDArray labels = Nd4j.zeros(minibatchSize, nOut);
                        for (int i = 0; i < minibatchSize; i++) {
                            labels.putScalar(new int[]{i, i % nOut}, 1.0);
@ -649,7 +684,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                                .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                        .activation(Activation.SOFTMAX).nIn(2 * 2 * 2).nOut(nOut)
                                        .build())
-                                .setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();
+                                .setInputType(InputType.convolutional(height, width, inputDepth, format)).build();

                        assertEquals(ConvolutionMode.Truncate,
                                ((ConvolutionLayer) conf.getConf(0).getLayer()).getConvolutionMode());
@ -691,13 +726,17 @@ public class CNNGradientCheckTest extends BaseDL4JTest {

        Nd4j.getRandom().setSeed(12345);

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for( int i=0; i<minibatchSizes.length; i++ ){
            int inputDepth = inputDepths[i];
            int minibatchSize = minibatchSizes[i];
            int height = heights[i];
            int k = kernelSizes[i];

-            INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
+            long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
+            INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
+
            INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
@ -713,7 +752,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                            .stride(1, 1).padding(0, 0).build())
                    .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                            .activation(Activation.SOFTMAX).nOut(nOut).build())
-                    .setInputType(InputType.convolutionalFlat(height, width, inputDepth)).build();
+                    .setInputType(InputType.convolutional(height, width, inputDepth, format)).build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
@ -748,13 +787,16 @@ public class CNNGradientCheckTest extends BaseDL4JTest {

        Nd4j.getRandom().setSeed(12345);

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for (int inputDepth : inputDepths) {
            for (int minibatchSize : minibatchSizes) {
                for (int stride : strides) {
                    for (int k : kernelSizes) {
                        for (boolean convFirst : new boolean[]{true, false}) {
+                            long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
+                            INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);

-                            INDArray input = Nd4j.rand(minibatchSize, width * height * inputDepth);
                            INDArray labels = Nd4j.zeros(minibatchSize, nOut);
                            for (int i = 0; i < minibatchSize; i++) {
                                labels.putScalar(new int[]{i, i % nOut}, 1.0);
@ -775,7 +817,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                                    .layer(1, convFirst ? poolLayer : convLayer)
                                    .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                            .activation(Activation.SOFTMAX).nOut(nOut).build())
-                                    .setInputType(InputType.convolutionalFlat(height, width, inputDepth))
+                                    .setInputType(InputType.convolutional(height, width, inputDepth, format))
                                    .build();

                            MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -822,11 +864,15 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
        int[] inputDepths = {1, 3, 2};
        int[][] zeroPadLayer = new int[][]{{0, 0, 0, 0}, {1, 1, 0, 0}, {2, 2, 2, 2}};

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for( int i=0; i<minibatchSizes.length; i++ ){
            int minibatchSize = minibatchSizes[i];
            int inputDepth = inputDepths[i];
            int[] zeroPad = zeroPadLayer[i];
-            INDArray input = Nd4j.rand(DataType.DOUBLE, new int[]{minibatchSize, inputDepth, height, width});
+
+            long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
+            INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
            INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);

            MultiLayerConfiguration conf =
@ -840,7 +886,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                                    padding).nIn(3).nOut(3).build())//output: (6-2+0)/1+1 = 5
                            .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .activation(Activation.SOFTMAX).nOut(4).build())
-                            .setInputType(InputType.convolutional(height, width, inputDepth))
+                            .setInputType(InputType.convolutional(height, width, inputDepth, format))
                            .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -849,8 +895,14 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
            //Check zero padding activation shape
            org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer zpl =
                    (org.deeplearning4j.nn.layers.convolution.ZeroPaddingLayer) net.getLayer(1);
-            val expShape = new long[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1],
-                    width + zeroPad[2] + zeroPad[3]};
+            long[] expShape;
+            if(nchw){
+                expShape = new long[]{minibatchSize, inputDepth, height + zeroPad[0] + zeroPad[1],
+                        width + zeroPad[2] + zeroPad[3]};
+            } else {
+                expShape = new long[]{minibatchSize, height + zeroPad[0] + zeroPad[1],
+                        width + zeroPad[2] + zeroPad[3], inputDepth};
+            }
            INDArray out = zpl.activate(input, false, LayerWorkspaceMgr.noWorkspaces());
            assertArrayEquals(expShape, out.shape());

@ -888,6 +940,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {

        Nd4j.getRandom().setSeed(12345);

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for (int i = 0; i < minibatchSizes.length; i++) {
            int minibatchSize = minibatchSizes[i];
            int k = kernelSizes[i];
@ -900,7 +954,10 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
            int w = d * width;
            int h = d * height;

-            INDArray input = Nd4j.rand(minibatchSize, w * h * inputDepth);
+            long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, h, w} : new long[]{minibatchSize, h, w, inputDepth};
+            INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
+
+
            INDArray labels = Nd4j.zeros(minibatchSize, nOut);
            for (int j = 0; j < minibatchSize; j++) {
                labels.putScalar(new int[]{j, j % nOut}, 1.0);
@ -920,7 +977,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {

            MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                    .activation(Activation.SOFTMAX).nOut(nOut).build())
-                    .setInputType(InputType.convolutionalFlat(h, w, inputDepth)).build();
+                    .setInputType(InputType.convolutional(h, w, inputDepth, format)).build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
@ -945,8 +1002,6 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
    @Test
    public void testSeparableConv2D() {
        int nOut = 2;
-
-        int[] minibatchSizes = new int[]{1, 3};
        int width = 6;
        int height = 6;
        int inputDepth = 3;
@ -959,6 +1014,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
        ConvolutionMode[] cms = new ConvolutionMode[]{Truncate, Truncate, Truncate, Truncate, Truncate};
        int[] mb = new int[]{1, 1, 1, 3, 3};

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for (int t = 0; t < ks.length; t++) {

            int k = ks[t];
@ -971,7 +1028,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
            int w = d * width;
            int h = d * height;

-            INDArray input = Nd4j.rand(minibatchSize, w * h * inputDepth);
+            long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, h, w} : new long[]{minibatchSize, h, w, inputDepth};
+            INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
            INDArray labels = Nd4j.zeros(minibatchSize, nOut);
            for (int i = 0; i < minibatchSize; i++) {
                labels.putScalar(new int[]{i, i % nOut}, 1.0);
@ -992,7 +1050,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {

            MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                    .activation(Activation.SOFTMAX).nOut(nOut).build())
-                    .setInputType(InputType.convolutionalFlat(h, w, inputDepth)).build();
+                    .setInputType(InputType.convolutional(h, w, inputDepth, format)).build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
@ -1017,7 +1075,6 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
    @Test
    public void testCnnDilated() {
        int nOut = 2;
-
        int minibatchSize = 2;
        int width = 8;
        int height = 8;
@ -1031,9 +1088,9 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
        int[] ds = new int[]{2, 2, 3, 3, 2};
        ConvolutionMode[] cms = new ConvolutionMode[]{Same, Truncate, Truncate, Same, Truncate};

+        boolean nchw = format == CNN2DFormat.NCHW;

        for (int t = 0; t < sub.length; t++) {
-
            boolean subsampling = sub[t];
            int s = stride[t];
            int k = kernel[t];
@ -1044,7 +1101,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
            int w = d * width;
            int h = d * height;

-            INDArray input = Nd4j.rand(minibatchSize, w * h * inputDepth);
+            long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, h, w} : new long[]{minibatchSize, h, w, inputDepth};
+            INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
            INDArray labels = Nd4j.zeros(minibatchSize, nOut);
            for (int i = 0; i < minibatchSize; i++) {
                labels.putScalar(new int[]{i, i % nOut}, 1.0);
@ -1076,7 +1134,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {

            MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                    .activation(Activation.SOFTMAX).nOut(nOut).build())
-                    .setInputType(InputType.convolutionalFlat(h, w, inputDepth)).build();
+                    .setInputType(InputType.convolutional(h, w, inputDepth, format)).build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
@ -1114,11 +1172,14 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
        int[] inputDepths = {1, 2, 3, 2};
        int[] minibatchSizes = {2, 1, 3, 2};

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for (int i = 0; i < cropTestCases.length; i++) {
            int inputDepth = inputDepths[i];
            int minibatchSize = minibatchSizes[i];
            int[] crop = cropTestCases[i];
-            INDArray input = Nd4j.rand(new int[]{minibatchSize, inputDepth, height, width});
+            long[] inShape = nchw ? new long[]{minibatchSize, inputDepth, height, width} : new long[]{minibatchSize, height, width, inputDepth};
+            INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
            INDArray labels = TestUtils.randomOneHot(minibatchSize, nOut);

            MultiLayerConfiguration conf =
@ -1134,7 +1195,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                            .layer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.AVG).kernelSize(3, 3).stride(3, 3).build())
                            .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                    .activation(Activation.SOFTMAX).nOut(nOut).build())
-                            .setInputType(InputType.convolutional(height, width, inputDepth))
+                            .setInputType(InputType.convolutional(height, width, inputDepth, format))
                            .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -1143,12 +1204,18 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
            //Check cropping activation shape
            org.deeplearning4j.nn.layers.convolution.Cropping2DLayer cl =
                    (org.deeplearning4j.nn.layers.convolution.Cropping2DLayer) net.getLayer(1);
-            val expShape = new long[]{minibatchSize, inputDepth, height - crop[0] - crop[1],
-                    width - crop[2] - crop[3]};
+            long[] expShape;
+            if(nchw){
+                expShape = new long[]{minibatchSize, inputDepth, height - crop[0] - crop[1],
+                        width - crop[2] - crop[3]};
+            } else {
+                expShape = new long[]{minibatchSize, height - crop[0] - crop[1],
+                        width - crop[2] - crop[3], inputDepth};
+            }
            INDArray out = cl.activate(input, false, LayerWorkspaceMgr.noWorkspaces());
            assertArrayEquals(expShape, out.shape());

-            String msg = "minibatch=" + minibatchSize + ", channels=" + inputDepth + ", zeroPad = "
+            String msg = format + " - minibatch=" + minibatchSize + ", channels=" + inputDepth + ", zeroPad = "
                    + Arrays.toString(crop);

            if (PRINT_RESULTS) {
@ -1181,6 +1248,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
                Truncate, Truncate, Truncate, Truncate, Truncate};
        int[] mb = new int[]{1,1,1,3,3};

+        boolean nchw = format == CNN2DFormat.NCHW;
+
        for( int t=0; t<ks.length; t++ ){

            int k = ks[t];
@ -1188,8 +1257,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
            ConvolutionMode cm = cms[t];
            int minibatchSize = mb[t];

-
-            INDArray input = Nd4j.rand(minibatchSize, width * height * nIn);
+            long[] inShape = nchw ? new long[]{minibatchSize, nIn, height, width} : new long[]{minibatchSize, height, width, nIn};
+            INDArray input = Nd4j.rand(DataType.DOUBLE, inShape);
            INDArray labels = Nd4j.zeros(minibatchSize, nOut);
            for (int i = 0; i < minibatchSize; i++) {
                labels.putScalar(new int[]{i, i % nOut}, 1.0);
@ -1211,7 +1280,7 @@ public class CNNGradientCheckTest extends BaseDL4JTest {

            MultiLayerConfiguration conf = b.layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                    .activation(Activation.SOFTMAX).nOut(nOut).build())
-                    .setInputType(InputType.convolutionalFlat(height, width, nIn)).build();
+                    .setInputType(InputType.convolutional(height, width, nIn, format)).build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GlobalPoolingGradientCheckTests.java
@ -18,6 +18,7 @@ package org.deeplearning4j.gradientcheck;

 import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.TestUtils;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@ -115,55 +116,57 @@ public class GlobalPoolingGradientCheckTests extends BaseDL4JTest {
        //Basic test of global pooling w/ CNN
        Nd4j.getRandom().setSeed(12345L);

-        int inputDepth = 3;
-        int inputH = 5;
-        int inputW = 4;
-        int layerDepth = 4;
-        int nOut = 2;
+        for(boolean nchw : new boolean[]{true, false}) {

-        int[] minibatchSizes = new int[] {1, 3};
-        PoolingType[] poolingTypes =
-                        new PoolingType[] {PoolingType.AVG, PoolingType.SUM, PoolingType.MAX, PoolingType.PNORM};
+            int inputDepth = 3;
+            int inputH = 5;
+            int inputW = 4;
+            int layerDepth = 4;
+            int nOut = 2;

-        for (int miniBatchSize : minibatchSizes) {
-            for (PoolingType pt : poolingTypes) {
+            int[] minibatchSizes = new int[]{1, 3};
+            PoolingType[] poolingTypes =
+                    new PoolingType[]{PoolingType.AVG, PoolingType.SUM, PoolingType.MAX, PoolingType.PNORM};

-                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
-                                .dataType(DataType.DOUBLE)
-                                .updater(new NoOp())
-                                .dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
-                                .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(layerDepth)
-                                                .build())
-                                .layer(1, new GlobalPoolingLayer.Builder().poolingType(pt).build())
-                                .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
-                                                .activation(Activation.SOFTMAX).nOut(nOut).build())
+            for (int miniBatchSize : minibatchSizes) {
+                for (PoolingType pt : poolingTypes) {

-                                .setInputType(InputType.convolutional(inputH, inputW, inputDepth)).build();
+                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+                            .dataType(DataType.DOUBLE)
+                            .updater(new NoOp())
+                            .dist(new NormalDistribution(0, 1.0)).seed(12345L).list()
+                            .layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(layerDepth)
+                                    .build())
+                            .layer(1, new GlobalPoolingLayer.Builder().poolingType(pt).build())
+                            .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
+                                    .activation(Activation.SOFTMAX).nOut(nOut).build())
+                            .setInputType(InputType.convolutional(inputH, inputW, inputDepth, nchw ? CNN2DFormat.NCHW : CNN2DFormat.NHWC)).build();

-                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
-                mln.init();
+                    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
+                    mln.init();

-                Random r = new Random(12345L);
-                INDArray input = Nd4j.rand(new int[] {miniBatchSize, inputDepth, inputH, inputW}).subi(0.5);
+                    Random r = new Random(12345L);
+                    long[] inShape = nchw ? new long[]{miniBatchSize, inputDepth, inputH, inputW} : new long[]{miniBatchSize, inputH, inputW, inputDepth};
+                    INDArray input = Nd4j.rand(DataType.DOUBLE, inShape).subi(0.5);

-                INDArray labels = Nd4j.zeros(miniBatchSize, nOut);
-                for (int i = 0; i < miniBatchSize; i++) {
-                    int idx = r.nextInt(nOut);
-                    labels.putScalar(i, idx, 1.0);
-                }
+                    INDArray labels = Nd4j.zeros(miniBatchSize, nOut);
+                    for (int i = 0; i < miniBatchSize; i++) {
+                        int idx = r.nextInt(nOut);
+                        labels.putScalar(i, idx, 1.0);
+                    }

-                if (PRINT_RESULTS) {
-                    System.out.println(
-                                    "testCnnGlobalPoolingBasicMultiLayer() - " + pt + ", minibatch = " + miniBatchSize);
+                    if (PRINT_RESULTS) {
+                        System.out.println("testCnnGlobalPoolingBasicMultiLayer() - " + pt + ", minibatch = " + miniBatchSize + " - " + (nchw ? "NCHW" : "NHWC"));
 //                    for (int j = 0; j < mln.getnLayers(); j++)
 //                        System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
+                    }
+
+                    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
+                            DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
+
+                    assertTrue(gradOK);
+                    TestUtils.testModelSerialization(mln);
                }
-
-                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
-                                DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
-
-                assertTrue(gradOK);
-                TestUtils.testModelSerialization(mln);
            }
        }
    }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTests.java
@ -24,6 +24,7 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm;
 import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
 import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
 import org.deeplearning4j.nn.conf.layers.*;
@ -560,75 +561,81 @@ public class GradientCheckTests extends BaseDL4JTest {
    public void testEmbeddingSequenceLayer(){
        Nd4j.getRandom().setSeed(12345);

-        for(boolean maskArray : new boolean[]{false, true}){
-            for(int inputRank : new int[]{2,3}) {
+        for(RNNFormat seqOutputFormat : RNNFormat.values()) {
+            for (boolean maskArray : new boolean[]{false, true}) {
+                for (int inputRank : new int[]{2, 3}) {

-                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
-                        .dataType(DataType.DOUBLE)
-                        .seed(12345)
-                        .updater(new NoOp())
-                        .weightInit(new NormalDistribution(0, 1))
-                        .list()
-                        .layer(new EmbeddingSequenceLayer.Builder()
-                                .nIn(8)
-                                .nOut(4)
-                                .build())
-                        .layer(new RnnOutputLayer.Builder().nIn(4).nOut(3).activation(Activation.TANH)
-                                .lossFunction(LossFunction.MSE).build())
-                        .build();
+                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+                            .dataType(DataType.DOUBLE)
+                            .seed(12345)
+                            .updater(new NoOp())
+                            .weightInit(new NormalDistribution(0, 1))
+                            .list()
+                            .layer(new EmbeddingSequenceLayer.Builder()
+                                    .nIn(8)
+                                    .nOut(4)
+                                    .outputDataFormat(seqOutputFormat)
+                                    .build())
+                            .layer(new RnnOutputLayer.Builder().nIn(4).nOut(3).activation(Activation.TANH)
+                                    .dataFormat(seqOutputFormat)
+                                    .lossFunction(LossFunction.MSE).build())
+                            .build();

-                MultiLayerNetwork net = new MultiLayerNetwork(conf);
-                net.init();
+                    MultiLayerNetwork net = new MultiLayerNetwork(conf);
+                    net.init();

-                INDArray in = Transforms.floor(Nd4j.rand(3, 6).muli(8));    //Integers 0 to 7 inclusive
-                INDArray label = Nd4j.rand(new int[]{3, 3, 6});
+                    boolean ncw = seqOutputFormat == RNNFormat.NCW;

-                if(inputRank == 3){
-                    //Reshape from [3,6] to [3,1,6]
-                    in = in.reshape('c', 3, 1, 6);
-                }
+                    INDArray in = Transforms.floor(Nd4j.rand(3, 6).muli(8));    //Integers 0 to 7 inclusive
+                    INDArray label = Nd4j.rand(DataType.FLOAT, ncw ? new int[]{3, 3, 6} : new int[]{3,6,3});

-                INDArray fMask = null;
-                if (maskArray) {
-                    fMask = Nd4j.create(new double[][]{{1, 1, 1, 1, 1, 1},
-                            {1, 1, 0, 0, 0, 0},
-                            {1, 0, 0, 0, 0, 0}});
-
-                }
-
-                String msg = "mask=" + maskArray + ", inputRank=" + inputRank;
-                boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(in)
-                        .labels(label).inputMask(fMask));
-                assertTrue(msg, gradOK);
-                TestUtils.testModelSerialization(net);
-
-
-                //Also: if mask is present, double check that the masked steps don't impact score
-                if (maskArray) {
-                    DataSet ds = new DataSet(in, label, fMask, null);
-                    double score = net.score(ds);
-                    if(inputRank == 2){
-                        in.putScalar(1, 2, 0);
-                        in.putScalar(2, 1, 0);
-                        in.putScalar(2, 2, 0);
-                    } else {
-                        in.putScalar(1, 0, 2, 0);
-                        in.putScalar(2, 0, 1, 0);
-                        in.putScalar(2, 0, 2, 0);
+                    if (inputRank == 3) {
+                        //Reshape from [3,6] to [3,1,6]
+                        in = in.reshape('c', 3, 1, 6);
                    }
-                    double score2 = net.score(ds);
-                    assertEquals(score, score2, 1e-6);
-                    if(inputRank == 2){
-                        in.putScalar(1, 2, 1);
-                        in.putScalar(2, 1, 1);
-                        in.putScalar(2, 2, 1);
-                    } else {
-                        in.putScalar(1, 0, 2, 1);
-                        in.putScalar(2, 0, 1, 1);
-                        in.putScalar(2, 0, 2, 1);
+
+                    INDArray fMask = null;
+                    if (maskArray) {
+                        fMask = Nd4j.create(new double[][]{{1, 1, 1, 1, 1, 1},
+                                {1, 1, 0, 0, 0, 0},
+                                {1, 0, 0, 0, 0, 0}});
+
+                    }
+
+                    String msg = "mask=" + maskArray + ", inputRank=" + inputRank;
+                    boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(in)
+                            .labels(label).inputMask(fMask));
+                    assertTrue(msg, gradOK);
+                    TestUtils.testModelSerialization(net);
+
+
+                    //Also: if mask is present, double check that the masked steps don't impact score
+                    if (maskArray) {
+                        DataSet ds = new DataSet(in, label, fMask, null);
+                        double score = net.score(ds);
+                        if (inputRank == 2) {
+                            in.putScalar(1, 2, 0);
+                            in.putScalar(2, 1, 0);
+                            in.putScalar(2, 2, 0);
+                        } else {
+                            in.putScalar(1, 0, 2, 0);
+                            in.putScalar(2, 0, 1, 0);
+                            in.putScalar(2, 0, 2, 0);
+                        }
+                        double score2 = net.score(ds);
+                        assertEquals(score, score2, 1e-6);
+                        if (inputRank == 2) {
+                            in.putScalar(1, 2, 1);
+                            in.putScalar(2, 1, 1);
+                            in.putScalar(2, 2, 1);
+                        } else {
+                            in.putScalar(1, 0, 2, 1);
+                            in.putScalar(2, 0, 1, 1);
+                            in.putScalar(2, 0, 2, 1);
+                        }
+                        double score3 = net.score(ds);
+                        assertEquals(score, score3, 1e-6);
                    }
-                    double score3 = net.score(ds);
-                    assertEquals(score, score3, 1e-6);
                }
            }
        }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/GradientCheckTestsComputationGraph.java
@ -21,9 +21,7 @@ import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
 import org.deeplearning4j.nn.api.OptimizationAlgorithm;
-import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
-import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
-import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.*;
 import org.deeplearning4j.nn.conf.distribution.GaussianDistribution;
 import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
 import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
@ -341,104 +339,112 @@ public class GradientCheckTestsComputationGraph extends BaseDL4JTest {
    @Test
    public void testCnnDepthMerge() {

-        Nd4j.getRandom().setSeed(12345);
-        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
-                        .dataType(DataType.DOUBLE)
-                        .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-                        .dist(new NormalDistribution(0, 0.1))
-                        .updater(new NoOp()).graphBuilder().addInputs("input")
-                        .addLayer("l1", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0)
-                                        .nIn(2).nOut(2).activation(Activation.TANH).build(), "input")
-                        .addLayer("l2", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0)
-                                        .nIn(2).nOut(2).activation(Activation.TANH).build(), "input")
-                        .addVertex("merge", new MergeVertex(), "l1", "l2")
-                        .addLayer("outputLayer",
-                                        new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT)
-                                                        .activation(Activation.SOFTMAX).nIn(5 * 5 * (2 + 2)).nOut(3)
-                                                        .build(),
-                                        "merge")
-                        .setOutputs("outputLayer")
-                        .inputPreProcessor("outputLayer", new CnnToFeedForwardPreProcessor(5, 5, 4))
-                        .build();
+        for(CNN2DFormat format : CNN2DFormat.values()) {

-        ComputationGraph graph = new ComputationGraph(conf);
-        graph.init();
+            String msg = "testCnnDepthMerge - " + format;

-        Random r = new Random(12345);
-        INDArray input = Nd4j.rand(new int[] {5, 2, 6, 6}); //Order: examples, channels, height, width
-        INDArray labels = Nd4j.zeros(5, 3);
-        for (int i = 0; i < 5; i++)
-            labels.putScalar(new int[] {i, r.nextInt(3)}, 1.0);
+            Nd4j.getRandom().setSeed(12345);
+            ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345)
+                    .dataType(DataType.DOUBLE)
+                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+                    .dist(new NormalDistribution(0, 0.1))
+                    .updater(new NoOp()).graphBuilder().addInputs("input")
+                    .addLayer("l1", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0)
+                            .nIn(2).nOut(2).activation(Activation.TANH).build(), "input")
+                    .addLayer("l2", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0)
+                            .nIn(2).nOut(2).activation(Activation.TANH).build(), "input")
+                    .addVertex("merge", new MergeVertex(), "l1", "l2")
+                    .addLayer("outputLayer",
+                            new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT)
+                                    .activation(Activation.SOFTMAX).nIn(5 * 5 * (2 + 2)).nOut(3)
+                                    .build(),
+                            "merge")
+                    .setOutputs("outputLayer")
+                    .setInputTypes(InputType.convolutional(6, 6, 2, format))
+                    .build();

-        if (PRINT_RESULTS) {
-            System.out.println("testCnnDepthMerge()");
+            ComputationGraph graph = new ComputationGraph(conf);
+            graph.init();
+
+            Random r = new Random(12345);
+            INDArray input = Nd4j.rand(DataType.DOUBLE, format == CNN2DFormat.NCHW ? new long[]{5,2,6,6} : new long[]{5,6,6,2});
+            INDArray labels = Nd4j.zeros(5, 3);
+            for (int i = 0; i < 5; i++)
+                labels.putScalar(new int[]{i, r.nextInt(3)}, 1.0);
+
+            if (PRINT_RESULTS) {
+                System.out.println(msg);
 //            for (int j = 0; j < graph.getNumLayers(); j++)
 //                System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams());
+            }
+
+            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input})
+                    .labels(new INDArray[]{labels}));
+
+            assertTrue(msg, gradOK);
+            TestUtils.testModelSerialization(graph);
        }
-
-        boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input})
-                .labels(new INDArray[]{labels}));
-
-        String msg = "testCnnDepthMerge()";
-        assertTrue(msg, gradOK);
-        TestUtils.testModelSerialization(graph);
    }

    @Test
    public void testRNNWithMerging() {

-        Nd4j.getRandom().setSeed(12345);
-        ComputationGraphConfiguration conf =
-                        new NeuralNetConfiguration.Builder().seed(12345)
-                                        .dataType(DataType.DOUBLE)
-                                        .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
-                                        .dist(new UniformDistribution(0.2, 0.6))
-                                        .updater(new NoOp()).graphBuilder().addInputs("input")
-                                        .setOutputs("out")
-                                        .addLayer("lstm1",
-                                                        new SimpleRnn.Builder().nIn(3).nOut(3)
-                                                                        .activation(Activation.TANH).build(),
-                                                        "input")
-                                        .addLayer("lstm2",
-                                                        new SimpleRnn.Builder().nIn(3).nOut(3)
-                                                                        .activation(Activation.TANH).build(),
-                                                        "lstm1")
-                                        .addLayer("dense1",
-                                                        new DenseLayer.Builder().nIn(3).nOut(3)
-                                                                        .activation(Activation.SIGMOID).build(),
-                                                        "lstm1")
-                                        .addLayer("lstm3",
-                                                        new SimpleRnn.Builder().nIn(3).nOut(3)
-                                                                        .activation(Activation.TANH).build(),
-                                                        "dense1")
-                                        .addVertex("merge", new MergeVertex(), "lstm2", "lstm3")
-                                        .addLayer("out", new RnnOutputLayer.Builder().nIn(6).nOut(3)
-                                                        .activation(Activation.SOFTMAX)
-                                                        .lossFunction(LossFunctions.LossFunction.MCXENT).build(),
-                                                        "merge")
-                                        .inputPreProcessor("dense1", new RnnToFeedForwardPreProcessor())
-                                        .inputPreProcessor("lstm3", new FeedForwardToRnnPreProcessor())
-                                        .build();
+        for(RNNFormat format : RNNFormat.values()) {

-        ComputationGraph graph = new ComputationGraph(conf);
-        graph.init();
+            String msg = "testLSTMWithMerging - " + format;

-        Random r = new Random(12345);
-        INDArray input = Nd4j.rand(new int[] {2, 3, 4});
-        INDArray labels = TestUtils.randomOneHotTimeSeries(2, 3, 4);
+            Nd4j.getRandom().setSeed(12345);
+            ComputationGraphConfiguration conf =
+                    new NeuralNetConfiguration.Builder().seed(12345)
+                            .dataType(DataType.DOUBLE)
+                            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+                            .dist(new UniformDistribution(0.2, 0.6))
+                            .updater(new NoOp()).graphBuilder().addInputs("input")
+                            .setOutputs("out")
+                            .addLayer("lstm1",
+                                    new SimpleRnn.Builder().nIn(3).nOut(3)
+                                            .activation(Activation.TANH).build(),
+                                    "input")
+                            .addLayer("lstm2",
+                                    new SimpleRnn.Builder().nIn(3).nOut(3)
+                                            .activation(Activation.TANH).build(),
+                                    "lstm1")
+                            .addLayer("dense1",
+                                    new DenseLayer.Builder().nIn(3).nOut(3)
+                                            .activation(Activation.SIGMOID).build(),
+                                    "lstm1")
+                            .addLayer("lstm3",
+                                    new SimpleRnn.Builder().nIn(3).nOut(3)
+                                            .activation(Activation.TANH).build(),
+                            "dense1")
+                            .addVertex("merge", new MergeVertex(), "lstm2", "lstm3")
+                            .addLayer("out", new RnnOutputLayer.Builder().nIn(6).nOut(3)
+                                            .activation(Activation.SOFTMAX)
+                                            .lossFunction(LossFunctions.LossFunction.MCXENT).build(),
+                                    "merge")
+                            .setInputTypes(InputType.recurrent(4, format))
+                            .build();

-        if (PRINT_RESULTS) {
-            System.out.println("testLSTMWithMerging()");
+            ComputationGraph graph = new ComputationGraph(conf);
+            graph.init();
+
+            Random r = new Random(12345);
+            INDArray input = Nd4j.rand(DataType.DOUBLE, format == RNNFormat.NCW ? new long[]{2, 3, 4} : new long[]{2,4,3});
+            INDArray labels = TestUtils.randomOneHotTimeSeries(format, 2, 3, 4, new Random(12345));
+
+            if (PRINT_RESULTS) {
+                System.out.println(msg);
 //            for (int j = 0; j < graph.getNumLayers(); j++)
 //                System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams());
+            }
+
+            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input})
+                    .labels(new INDArray[]{labels}));
+
+            assertTrue(msg, gradOK);
+            TestUtils.testModelSerialization(graph);
+
        }
-
-        boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(graph).inputs(new INDArray[]{input})
-                .labels(new INDArray[]{labels}));
-
-        String msg = "testLSTMWithMerging()";
-        assertTrue(msg, gradOK);
-        TestUtils.testModelSerialization(graph);
    }

    @Test
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java
@ -216,7 +216,7 @@ public class LossFunctionGradientCheck extends BaseDL4JTest {
                    gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                } catch (Exception e) {
-                    e.printStackTrace();
+                    log.error("",e);
                    failed.add(testName + "\t" + "EXCEPTION");
                    continue;
                }
@ -383,7 +383,7 @@ public class LossFunctionGradientCheck extends BaseDL4JTest {
                    gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                    DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                } catch (Exception e) {
-                    e.printStackTrace();
+                    log.error("",e);
                    failed.add(testName + "\t" + "EXCEPTION");
                    continue;
                }
@ -693,7 +693,7 @@ public class LossFunctionGradientCheck extends BaseDL4JTest {
                        gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR,
                                        DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                    } catch (Exception e) {
-                        e.printStackTrace();
+                        log.error("",e);
                        failed.add(testName + "\t" + "EXCEPTION");
                        continue;
                    }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/RnnGradientChecks.java
@ -338,7 +338,7 @@ public class RnnGradientChecks extends BaseDL4JTest {
                        .weightInit(WeightInit.XAVIER)
                        .list()
                        .layer(new LSTM.Builder().nOut(layerSize).build())
-                        .layer(new TimeDistributed(new DenseLayer.Builder().nOut(layerSize).activation(Activation.SOFTMAX).build(), 2))
+                        .layer(new TimeDistributed(new DenseLayer.Builder().nOut(layerSize).activation(Activation.SOFTMAX).build()))
                        .layer(new RnnOutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX)
                                .lossFunction(LossFunctions.LossFunction.MCXENT).build())
                        .setInputType(InputType.recurrent(nIn))
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/ComputationGraphConfigurationTest.java
@ -21,6 +21,7 @@ import lombok.AllArgsConstructor;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.NoArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.exception.DL4JInvalidConfigException;
 import org.deeplearning4j.nn.api.OptimizationAlgorithm;
@ -47,6 +48,7 @@ import org.nd4j.linalg.lossfunctions.LossFunctions;

 import static org.junit.Assert.*;

+@Slf4j
 public class ComputationGraphConfigurationTest extends BaseDL4JTest {

    @Test
@ -150,7 +152,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest {
            fail("No exception thrown for invalid configuration");
        } catch (IllegalStateException e) {
            //OK - exception is good
-            //e.printStackTrace();
+            log.info(e.toString());
        }

        // Use appendLayer on first layer
@ -162,7 +164,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest {
            fail("No exception thrown for invalid configuration");
        } catch (IllegalStateException e) {
            //OK - exception is good
-            //e.printStackTrace();
+            log.info(e.toString());
        }

        //Test no network inputs
@ -174,7 +176,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest {
            fail("No exception thrown for invalid configuration");
        } catch (IllegalStateException e) {
            //OK - exception is good
-            //e.printStackTrace();
+            log.info(e.toString());
        }

        //Test no network outputs
@ -185,7 +187,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest {
            fail("No exception thrown for invalid configuration");
        } catch (IllegalStateException e) {
            //OK - exception is good
-            //e.printStackTrace();
+            log.info(e.toString());
        }

        //Test: invalid input
@ -197,7 +199,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest {
            fail("No exception thrown for invalid configuration");
        } catch (IllegalStateException e) {
            //OK - exception is good
-            //e.printStackTrace();
+            log.info(e.toString());
        }

        //Test: graph with cycles
@ -215,7 +217,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest {
            fail("No exception thrown for invalid configuration");
        } catch (IllegalStateException e) {
            //OK - exception is good
-            //e.printStackTrace();
+            log.info(e.toString());
        }

        //Test: input != inputType count mismatch
@ -241,7 +243,7 @@ public class ComputationGraphConfigurationTest extends BaseDL4JTest {
            fail("No exception thrown for invalid configuration");
        } catch (IllegalArgumentException e) {
            //OK - exception is good
-            //e.printStackTrace();
+            log.info(e.toString());
        }
    }

--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/MultiLayerNeuralNetConfigurationTest.java
@ -16,6 +16,7 @@

 package org.deeplearning4j.nn.conf;

+import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.exception.DL4JInvalidConfigException;
 import org.deeplearning4j.nn.api.Layer;
@ -46,6 +47,7 @@ import static org.junit.Assert.*;
 /**
 * Created by agibsonccc on 11/27/14.
 */
+@Slf4j
 public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest {

    @Rule
@ -272,9 +274,9 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest {
            fail("No exception thrown for invalid configuration");
        } catch (IllegalStateException e) {
            //OK
-            e.printStackTrace();
+            log.error("",e);
        } catch (Throwable e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Unexpected exception thrown for invalid config");
        }

@ -288,9 +290,9 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest {
            fail("No exception thrown for invalid configuration");
        } catch (IllegalStateException e) {
            //OK
-            e.printStackTrace();
+            log.info(e.toString());
        } catch (Throwable e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Unexpected exception thrown for invalid config");
        }

@ -304,9 +306,9 @@ public class MultiLayerNeuralNetConfigurationTest extends BaseDL4JTest {
            fail("No exception thrown for invalid configuration");
        } catch (IllegalStateException e) {
            //OK
-            e.printStackTrace();
+            log.info(e.toString());
        } catch (Throwable e) {
-            e.printStackTrace();
+            log.error("",e);
            fail("Unexpected exception thrown for invalid config");
        }
    }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/constraints/TestConstraints.java
@ -96,8 +96,8 @@ public class TestConstraints extends BaseDL4JTest {
            } else if (lc instanceof NonNegativeConstraint) {
                assertTrue(RW0.minNumber().doubleValue() >= 0.0);
            } else if (lc instanceof UnitNormConstraint) {
-                assertEquals(RW0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6);
-                assertEquals(RW0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
+                assertEquals(1.0, RW0.norm2(1).minNumber().doubleValue(), 1e-6);
+                assertEquals(1.0, RW0.norm2(1).maxNumber().doubleValue(), 1e-6);
            }

            TestUtils.testModelSerialization(net);
@ -149,8 +149,8 @@ public class TestConstraints extends BaseDL4JTest {
            } else if (lc instanceof NonNegativeConstraint) {
                assertTrue(b0.minNumber().doubleValue() >= 0.0);
            } else if (lc instanceof UnitNormConstraint) {
-                assertEquals(b0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6);
-                assertEquals(b0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
+                assertEquals(1.0, b0.norm2(1).minNumber().doubleValue(), 1e-6);
+                assertEquals(1.0, b0.norm2(1).maxNumber().doubleValue(), 1e-6);
            }

            TestUtils.testModelSerialization(net);
@ -201,8 +201,8 @@ public class TestConstraints extends BaseDL4JTest {
            } else if (lc instanceof NonNegativeConstraint) {
                assertTrue(w0.minNumber().doubleValue() >= 0.0);
            } else if (lc instanceof UnitNormConstraint) {
-                assertEquals(w0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6);
-                assertEquals(w0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
+                assertEquals(1.0, w0.norm2(1).minNumber().doubleValue(), 1e-6);
+                assertEquals(1.0, w0.norm2(1).maxNumber().doubleValue(), 1e-6);
            }

            TestUtils.testModelSerialization(net);
@ -259,10 +259,10 @@ public class TestConstraints extends BaseDL4JTest {
                assertTrue(w0.minNumber().doubleValue() >= 0.0);
                assertTrue(b0.minNumber().doubleValue() >= 0.0);
            } else if (lc instanceof UnitNormConstraint) {
-                assertEquals(w0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6);
-                assertEquals(w0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
-                assertEquals(b0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6);
-                assertEquals(b0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
+                assertEquals(1.0, w0.norm2(1).minNumber().doubleValue(), 1e-6);
+                assertEquals(1.0, w0.norm2(1).maxNumber().doubleValue(), 1e-6);
+                assertEquals(1.0, b0.norm2(1).minNumber().doubleValue(), 1e-6);
+                assertEquals(1.0, b0.norm2(1).maxNumber().doubleValue(), 1e-6);
            }

            TestUtils.testModelSerialization(net);
@ -320,10 +320,10 @@ public class TestConstraints extends BaseDL4JTest {
                assertTrue(w0.minNumber().doubleValue() >= 0.0);
                assertTrue(b0.minNumber().doubleValue() >= 0.0);
            } else if (lc instanceof UnitNormConstraint) {
-                assertEquals(w0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6);
-                assertEquals(w0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
-                assertEquals(b0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6);
-                assertEquals(b0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6);
+                assertEquals(1.0, w0.norm2(1).minNumber().doubleValue(), 1e-6);
+                assertEquals(1.0, w0.norm2(1).maxNumber().doubleValue(), 1e-6);
+                assertEquals(1.0, b0.norm2(1).minNumber().doubleValue(), 1e-6);
+                assertEquals(1.0, b0.norm2(1).maxNumber().doubleValue(), 1e-6);
            }

            TestUtils.testModelSerialization(net);
@ -378,10 +378,10 @@ public class TestConstraints extends BaseDL4JTest {
            } else if(lc instanceof NonNegativeConstraint ){
                assertTrue(w0.minNumber().doubleValue() >= 0.0 );
            } else if(lc instanceof UnitNormConstraint ){
-                assertEquals(w0.norm2(1).minNumber().doubleValue(), 1.0, 1e-6 );
-                assertEquals(w0.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6 );
-                assertEquals(w1.norm2(1).minNumber().doubleValue(), 1.0, 1e-6 );
-                assertEquals(w1.norm2(1).maxNumber().doubleValue(), 1.0, 1e-6 );
+                assertEquals(1.0, w0.norm2(1).minNumber().doubleValue(),  1e-6 );
+                assertEquals(1.0, w0.norm2(1).maxNumber().doubleValue(), 1e-6 );
+                assertEquals(1.0, w1.norm2(1).minNumber().doubleValue(), 1e-6 );
+                assertEquals(1.0, w1.norm2(1).maxNumber().doubleValue(), 1e-6 );
            }

            TestUtils.testModelSerialization(net);
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/conf/layers/LayerBuilderTest.java
@ -156,7 +156,7 @@ public class LayerBuilderTest extends BaseDL4JTest {

        checkSerialization(glstm);

-        assertEquals(glstm.getForgetGateBiasInit(), 1.5, 0.0);
+        assertEquals(1.5, glstm.getForgetGateBiasInit(), 0.0);
        assertEquals(glstm.nIn, numIn);
        assertEquals(glstm.nOut, numOut);
        assertTrue(glstm.getActivationFn() instanceof ActivationTanH);
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/dtypes/DTypeTests.java
@ -17,7 +17,9 @@
 package org.deeplearning4j.nn.dtypes;

 import org.deeplearning4j.nn.conf.layers.recurrent.TimeDistributed;
+import org.deeplearning4j.nn.conf.preprocessor.*;
 import org.deeplearning4j.nn.modelimport.keras.layers.TFOpLayer;
+import org.deeplearning4j.nn.modelimport.keras.preprocessors.TensorFlowCnnToFeedForwardPreProcessor;
 import org.nd4j.shade.guava.collect.ImmutableSet;
 import org.nd4j.shade.guava.reflect.ClassPath;
 import lombok.extern.slf4j.Slf4j;
@ -51,16 +53,11 @@ import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer;
 import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder;
 import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer;
 import org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer;
-import org.deeplearning4j.nn.conf.preprocessor.CnnToRnnPreProcessor;
-import org.deeplearning4j.nn.conf.preprocessor.ComposableInputPreProcessor;
-import org.deeplearning4j.nn.conf.preprocessor.FeedForwardToCnn3DPreProcessor;
-import org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor;
 import org.deeplearning4j.nn.graph.ComputationGraph;
 import org.deeplearning4j.nn.layers.util.IdentityLayer;
 import org.deeplearning4j.nn.modelimport.keras.preprocessors.KerasFlattenRnnPreprocessor;
 import org.deeplearning4j.nn.modelimport.keras.preprocessors.PermutePreprocessor;
 import org.deeplearning4j.nn.modelimport.keras.preprocessors.ReshapePreprocessor;
-import org.deeplearning4j.nn.modelimport.keras.preprocessors.TensorFlowCnnToFeedForwardPreProcessor;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.nn.weights.WeightInit;
 import org.deeplearning4j.nn.weights.WeightInitDistribution;
@ -97,7 +94,8 @@ public class DTypeTests extends BaseDL4JTest {
            Pooling2D.class,        //Alias for SubsamplingLayer
            Convolution2D.class,    //Alias for ConvolutionLayer
            Pooling1D.class,        //Alias for Subsampling1D
-            Convolution1D.class     //Alias for  Convolution1DLayer
+            Convolution1D.class,    //Alias for  Convolution1DLayer
+            TensorFlowCnnToFeedForwardPreProcessor.class    //Deprecated
    ));

    @Override
@ -819,7 +817,7 @@ public class DTypeTests extends BaseDL4JTest {
                            .layer(new DenseLayer.Builder().nOut(5).build())
                            .layer(new GravesBidirectionalLSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())
                            .layer(new Bidirectional(new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()))
-                            .layer(new TimeDistributed(new DenseLayer.Builder().nIn(10).nOut(5).activation(Activation.TANH).build(), 2))
+                            .layer(new TimeDistributed(new DenseLayer.Builder().nIn(10).nOut(5).activation(Activation.TANH).build()))
                            .layer(new SimpleRnn.Builder().nIn(5).nOut(5).build())
                            .layer(new MaskZeroLayer.Builder().underlying(new SimpleRnn.Builder().nIn(5).nOut(5).build()).maskValue(0.0).build())
                            .layer(secondLast)
@ -1078,7 +1076,7 @@ public class DTypeTests extends BaseDL4JTest {
                                    .addLayer("l", new DenseLayer.Builder().nOut(16).build(), "in")
                                    .addVertex("preproc", new PreprocessorVertex(new FeedForwardToCnn3DPreProcessor(2, 2, 2, 2, true)), "l")
                                    .addVertex("preproc2", new PreprocessorVertex(new PermutePreprocessor(0, 2, 3, 4, 1)), "preproc")
-                                    .addVertex("preproc3", new PreprocessorVertex(new ReshapePreprocessor(new long[]{2, 2, 2, 2}, new long[]{16})), "preproc2")
+                                    .addVertex("preproc3", new PreprocessorVertex(new ReshapePreprocessor(new long[]{2, 2, 2, 2}, new long[]{16}, false)), "preproc2")
                                    .addLayer("out", new OutputLayer.Builder().nIn(16).nOut(10).build(), "preproc3")
                                    .setInputTypes(InputType.feedForward(5))
                                    .setOutputs("out");
@ -1150,7 +1148,7 @@ public class DTypeTests extends BaseDL4JTest {
                        case 7:
                            b.addInputs("in")
                                    .addLayer("1", new ConvolutionLayer.Builder().kernelSize(2, 2).nOut(5).convolutionMode(ConvolutionMode.Same).build(), "in")
-                                    .addVertex("2", new PreprocessorVertex(new TensorFlowCnnToFeedForwardPreProcessor(28, 28, 5)), "1")
+                                    .addVertex("2", new PreprocessorVertex(new CnnToFeedForwardPreProcessor(28, 28, 5)), "1")
                                    .addLayer("out", new OutputLayer.Builder().nOut(10).build(), "2")
                                    .setOutputs("out")
                                    .setInputTypes(InputType.convolutional(28, 28, 1));
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/ComputationGraphTestRNN.java
@ -16,6 +16,7 @@

 package org.deeplearning4j.nn.graph;

+import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.datasets.iterator.IteratorDataSetIterator;
@ -54,6 +55,7 @@ import java.util.Map;

 import static org.junit.Assert.*;

+@Slf4j
 public class ComputationGraphTestRNN extends BaseDL4JTest {

    @Test
@ -618,7 +620,7 @@ public class ComputationGraphTestRNN extends BaseDL4JTest {
                    .build();
            fail("Exception expected");
        } catch (IllegalStateException e){
-//            e.printStackTrace();
+            log.error("",e);
            assertTrue(e.getMessage().contains("TBPTT") && e.getMessage().contains("validateTbpttConfig"));
        }
    }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java
@ -1394,7 +1394,7 @@ public class TestComputationGraphNetwork extends BaseDL4JTest {


            } catch (Exception e) {
-                //e.printStackTrace();
+                log.error("",e);
                if(allowDisconnected){
                    fail("No exception expected");
                } else {
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestVariableLengthTSCG.java
@ -416,8 +416,8 @@ public class TestVariableLengthTSCG extends BaseDL4JTest {
                                    INDArray outRow2 = out2.get(NDArrayIndex.point(i), NDArrayIndex.all(),
                                                    NDArrayIndex.point(j));
                                    for (int k = 0; k < nOut; k++) {
-                                        assertEquals(outRow.getDouble(k), 0.0, 0.0);
-                                        assertEquals(outRow2.getDouble(k), 0.0, 0.0);
+                                        assertEquals(0.0, outRow.getDouble(k), 0.0);
+                                        assertEquals(0.0, outRow2.getDouble(k), 0.0);
                                    }
                                }
                            }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/graphnodes/TestGraphNodes.java
@ -60,7 +60,7 @@ public class TestGraphNodes extends BaseDL4JTest {
    @Test
    public void testMergeNode() {
        Nd4j.getRandom().setSeed(12345);
-        GraphVertex mergeNode = new MergeVertex(null, "", -1, Nd4j.dataType());
+        GraphVertex mergeNode = new MergeVertex(null, "", -1, Nd4j.dataType(), 1);

        INDArray first = Nd4j.linspace(0, 11, 12, Nd4j.dataType()).reshape(3, 4);
        INDArray second = Nd4j.linspace(0, 17, 18, Nd4j.dataType()).reshape(3, 6).addi(100);
@ -82,7 +82,7 @@ public class TestGraphNodes extends BaseDL4JTest {
    public void testMergeNodeRNN() {

        Nd4j.getRandom().setSeed(12345);
-        GraphVertex mergeNode = new MergeVertex(null, "", -1, Nd4j.dataType());
+        GraphVertex mergeNode = new MergeVertex(null, "", -1, Nd4j.dataType(), 1);

        INDArray first = Nd4j.linspace(0, 59, 60, Nd4j.dataType()).reshape(3, 4, 5);
        INDArray second = Nd4j.linspace(0, 89, 90, Nd4j.dataType()).reshape(3, 6, 5).addi(100);
@ -103,7 +103,7 @@ public class TestGraphNodes extends BaseDL4JTest {
    @Test
    public void testCnnDepthMerge() {
        Nd4j.getRandom().setSeed(12345);
-        GraphVertex mergeNode = new MergeVertex(null, "", -1, Nd4j.dataType());
+        GraphVertex mergeNode = new MergeVertex(null, "", -1, Nd4j.dataType(), 1);

        INDArray first = Nd4j.linspace(0, 3, 4, Nd4j.dataType()).reshape(1, 1, 2, 2);
        INDArray second = Nd4j.linspace(0, 3, 4, Nd4j.dataType()).reshape(1, 1, 2, 2).addi(10);
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/ConvDataFormatTests.java
@ -0,0 +1,974 @@
+/* ******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.nn.layers.convolution;
+
+import lombok.*;
+import org.deeplearning4j.BaseDL4JTest;
+import org.deeplearning4j.TestUtils;
+import org.deeplearning4j.nn.api.MaskState;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
+import org.deeplearning4j.nn.conf.ConvolutionMode;
+import org.deeplearning4j.nn.conf.InputPreProcessor;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.layers.*;
+import org.deeplearning4j.nn.conf.layers.CnnLossLayer;
+import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
+import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer;
+import org.deeplearning4j.nn.conf.layers.convolutional.Cropping2D;
+import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
+import org.deeplearning4j.nn.conf.preprocessor.ComposableInputPreProcessor;
+import org.deeplearning4j.nn.gradient.Gradient;
+import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
+import org.deeplearning4j.nn.workspace.ArrayType;
+import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+@RunWith(Parameterized.class)
+public class ConvDataFormatTests extends BaseDL4JTest {
+
+    private final DataType dataType;
+
+    public ConvDataFormatTests(DataType dataType){
+        this.dataType = dataType;
+    }
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Object[] params(){
+        return new DataType[]{DataType.FLOAT, DataType.DOUBLE};
+    }
+
+    @Test
+    public void testConv2d() {
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) {
+                    Nd4j.getRandom().setSeed(12345);
+                    Nd4j.getEnvironment().allowHelpers(helpers);
+                    String msg = helpers ? "With helpers (" + cm + ")" : "No helpers (" + cm + ")";
+                    System.out.println(" --- " + msg + " ---");
+
+                    INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                    INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                    TestCase tc = TestCase.builder()
+                            .msg(msg)
+                            .net1(getConv2dNet(CNN2DFormat.NCHW, true, cm))
+                            .net2(getConv2dNet(CNN2DFormat.NCHW, false, cm))
+                            .net3(getConv2dNet(CNN2DFormat.NHWC, true, cm))
+                            .net4(getConv2dNet(CNN2DFormat.NHWC, false, cm))
+                            .inNCHW(inNCHW)
+                            .labelsNCHW(labels)
+                            .labelsNHWC(labels)
+                            .testLayerIdx(1)
+                            .build();
+
+                    testHelper(tc);
+                }
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testSubsampling2d() {
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) {
+                    Nd4j.getRandom().setSeed(12345);
+                    Nd4j.getEnvironment().allowHelpers(helpers);
+                    String msg = helpers ? "With helpers (" + cm + ")" : "No helpers (" + cm + ")";
+                    System.out.println(" --- " + msg + " ---");
+
+                    INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                    INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                    TestCase tc = TestCase.builder()
+                            .msg(msg)
+                            .net1(getSubsampling2dNet(CNN2DFormat.NCHW, true, cm))
+                            .net2(getSubsampling2dNet(CNN2DFormat.NCHW, false, cm))
+                            .net3(getSubsampling2dNet(CNN2DFormat.NHWC, true, cm))
+                            .net4(getSubsampling2dNet(CNN2DFormat.NHWC, false, cm))
+                            .inNCHW(inNCHW)
+                            .labelsNCHW(labels)
+                            .labelsNHWC(labels)
+                            .testLayerIdx(1)
+                            .build();
+
+                    testHelper(tc);
+                }
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testDepthwiseConv2d() {
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) {
+                    Nd4j.getRandom().setSeed(12345);
+                    Nd4j.getEnvironment().allowHelpers(helpers);
+                    String msg = helpers ? "With helpers (" + cm + ")" : "No helpers (" + cm + ")";
+                    System.out.println(" --- " + msg + " ---");
+
+                    INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                    INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                    TestCase tc = TestCase.builder()
+                            .msg(msg)
+                            .net1(getDepthwiseConv2dNet(CNN2DFormat.NCHW, true, cm))
+                            .net2(getDepthwiseConv2dNet(CNN2DFormat.NCHW, false, cm))
+                            .net3(getDepthwiseConv2dNet(CNN2DFormat.NHWC, true, cm))
+                            .net4(getDepthwiseConv2dNet(CNN2DFormat.NHWC, false, cm))
+                            .inNCHW(inNCHW)
+                            .labelsNCHW(labels)
+                            .labelsNHWC(labels)
+                            .testLayerIdx(1)
+                            .build();
+
+                    testHelper(tc);
+                }
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testSeparableConv2d() {
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) {
+                    Nd4j.getRandom().setSeed(12345);
+                    Nd4j.getEnvironment().allowHelpers(helpers);
+                    String msg = helpers ? "With helpers (" + cm + ")" : "No helpers (" + cm + ")";
+                    System.out.println(" --- " + msg + " ---");
+
+                    INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                    INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                    TestCase tc = TestCase.builder()
+                            .msg(msg)
+                            .net1(getSeparableConv2dNet(CNN2DFormat.NCHW, true, cm))
+                            .net2(getSeparableConv2dNet(CNN2DFormat.NCHW, false, cm))
+                            .net3(getSeparableConv2dNet(CNN2DFormat.NHWC, true, cm))
+                            .net4(getSeparableConv2dNet(CNN2DFormat.NHWC, false, cm))
+                            .inNCHW(inNCHW)
+                            .labelsNCHW(labels)
+                            .labelsNHWC(labels)
+                            .testLayerIdx(1)
+                            .build();
+
+                    testHelper(tc);
+                }
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testDeconv2d() {
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) {
+                    Nd4j.getRandom().setSeed(12345);
+                    Nd4j.getEnvironment().allowHelpers(helpers);
+                    String msg = helpers ? "With helpers (" + cm + ")" : "No helpers (" + cm + ")";
+                    System.out.println(" --- " + msg + " ---");
+
+                    INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                    INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                    TestCase tc = TestCase.builder()
+                            .msg(msg)
+                            .net1(getDeconv2DNet2dNet(CNN2DFormat.NCHW, true, cm))
+                            .net2(getDeconv2DNet2dNet(CNN2DFormat.NCHW, false, cm))
+                            .net3(getDeconv2DNet2dNet(CNN2DFormat.NHWC, true, cm))
+                            .net4(getDeconv2DNet2dNet(CNN2DFormat.NHWC, false, cm))
+                            .inNCHW(inNCHW)
+                            .labelsNCHW(labels)
+                            .labelsNHWC(labels)
+                            .testLayerIdx(1)
+                            .build();
+
+                    testHelper(tc);
+                }
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testLRN() {
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) {
+                    Nd4j.getRandom().setSeed(12345);
+                    Nd4j.getEnvironment().allowHelpers(helpers);
+                    String msg = helpers ? "With helpers (" + cm + ")" : "No helpers (" + cm + ")";
+                    System.out.println(" --- " + msg + " ---");
+
+                    INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                    INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                    TestCase tc = TestCase.builder()
+                            .msg(msg)
+                            .net1(getLrnLayer(CNN2DFormat.NCHW, true, cm))
+                            .net2(getLrnLayer(CNN2DFormat.NCHW, false, cm))
+                            .net3(getLrnLayer(CNN2DFormat.NHWC, true, cm))
+                            .net4(getLrnLayer(CNN2DFormat.NHWC, false, cm))
+                            .inNCHW(inNCHW)
+                            .labelsNCHW(labels)
+                            .labelsNHWC(labels)
+                            .testLayerIdx(1)
+                            .build();
+
+                    testHelper(tc);
+                }
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testZeroPaddingLayer(){
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                Nd4j.getRandom().setSeed(12345);
+                Nd4j.getEnvironment().allowHelpers(helpers);
+                String msg = helpers ? "With helpers" : "No helpers";
+                System.out.println(" --- " + msg + " ---");
+
+                INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                TestCase tc = TestCase.builder()
+                        .msg(msg)
+                        .net1(getZeroPaddingNet(CNN2DFormat.NCHW, true))
+                        .net2(getZeroPaddingNet(CNN2DFormat.NCHW, false))
+                        .net3(getZeroPaddingNet(CNN2DFormat.NHWC, true))
+                        .net4(getZeroPaddingNet(CNN2DFormat.NHWC, false))
+                        .inNCHW(inNCHW)
+                        .labelsNCHW(labels)
+                        .labelsNHWC(labels)
+                        .testLayerIdx(1)
+                        .build();
+
+                testHelper(tc);
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testCropping2DLayer(){
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                Nd4j.getRandom().setSeed(12345);
+                Nd4j.getEnvironment().allowHelpers(helpers);
+                String msg = helpers ? "With helpers" : "No helpers";
+                System.out.println(" --- " + msg + " ---");
+
+                INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                TestCase tc = TestCase.builder()
+                        .msg(msg)
+                        .net1(getCropping2dNet(CNN2DFormat.NCHW, true))
+                        .net2(getCropping2dNet(CNN2DFormat.NCHW, false))
+                        .net3(getCropping2dNet(CNN2DFormat.NHWC, true))
+                        .net4(getCropping2dNet(CNN2DFormat.NHWC, false))
+                        .inNCHW(inNCHW)
+                        .labelsNCHW(labels)
+                        .labelsNHWC(labels)
+                        .testLayerIdx(1)
+                        .build();
+
+                testHelper(tc);
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testUpsampling2d(){
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                Nd4j.getRandom().setSeed(12345);
+                Nd4j.getEnvironment().allowHelpers(helpers);
+                String msg = helpers ? "With helpers" : "No helpers";
+                System.out.println(" --- " + msg + " ---");
+
+                INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                TestCase tc = TestCase.builder()
+                        .msg(msg)
+                        .net1(getUpsamplingNet(CNN2DFormat.NCHW, true))
+                        .net2(getUpsamplingNet(CNN2DFormat.NCHW, false))
+                        .net3(getUpsamplingNet(CNN2DFormat.NHWC, true))
+                        .net4(getUpsamplingNet(CNN2DFormat.NHWC, false))
+                        .inNCHW(inNCHW)
+                        .labelsNCHW(labels)
+                        .labelsNHWC(labels)
+                        .testLayerIdx(1)
+                        .build();
+
+                testHelper(tc);
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testBatchNormNet(){
+        try {
+            for(boolean useLogStd : new boolean[]{true, false}) {
+                for (boolean helpers : new boolean[]{false, true}) {
+                    Nd4j.getRandom().setSeed(12345);
+                    Nd4j.getEnvironment().allowHelpers(helpers);
+                    String msg = (helpers ? "With helpers" : "No helpers") + " - " + (useLogStd ? "logstd" : "std");
+                    System.out.println(" --- " + msg + " ---");
+
+                    INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                    INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                    TestCase tc = TestCase.builder()
+                            .msg(msg)
+                            .net1(getBatchNormNet(useLogStd, CNN2DFormat.NCHW, true))
+                            .net2(getBatchNormNet(useLogStd, CNN2DFormat.NCHW, false))
+                            .net3(getBatchNormNet(useLogStd, CNN2DFormat.NHWC, true))
+                            .net4(getBatchNormNet(useLogStd, CNN2DFormat.NHWC, false))
+                            .inNCHW(inNCHW)
+                            .labelsNCHW(labels)
+                            .labelsNHWC(labels)
+                            .testLayerIdx(1)
+                            .build();
+
+                    testHelper(tc);
+                }
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testCnnLossLayer() {
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                Nd4j.getRandom().setSeed(12345);
+                Nd4j.getEnvironment().allowHelpers(helpers);
+                String msg = helpers ? "With helpers" : "No helpers";
+                System.out.println(" --- " + msg + " ---");
+
+                INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                INDArray labelsNHWC = TestUtils.randomOneHot(this.dataType,2*6*6, 3);
+                labelsNHWC = labelsNHWC.reshape(2,6,6,3);
+                INDArray labelsNCHW = labelsNHWC.permute(0,3,1,2).dup();
+
+
+
+                TestCase tc = TestCase.builder()
+                        .msg(msg)
+                        .net1(getCnnLossNet(CNN2DFormat.NCHW, true, ConvolutionMode.Same))
+                        .net2(getCnnLossNet(CNN2DFormat.NCHW, false, ConvolutionMode.Same))
+                        .net3(getCnnLossNet(CNN2DFormat.NHWC, true, ConvolutionMode.Same))
+                        .net4(getCnnLossNet(CNN2DFormat.NHWC, false, ConvolutionMode.Same))
+                        .inNCHW(inNCHW)
+                        .labelsNCHW(labelsNCHW)
+                        .labelsNHWC(labelsNHWC)
+                        .testLayerIdx(1)
+                        .nhwcOutput(true)
+                        .build();
+
+                testHelper(tc);
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testSpaceToDepthNet(){
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                Nd4j.getRandom().setSeed(12345);
+                Nd4j.getEnvironment().allowHelpers(helpers);
+                String msg = helpers ? "With helpers" : "No helpers";
+                System.out.println(" --- " + msg + " ---");
+
+                INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                TestCase tc = TestCase.builder()
+                        .msg(msg)
+                        .net1(getSpaceToDepthNet(CNN2DFormat.NCHW, true))
+                        .net2(getSpaceToDepthNet(CNN2DFormat.NCHW, false))
+                        .net3(getSpaceToDepthNet(CNN2DFormat.NHWC, true))
+                        .net4(getSpaceToDepthNet(CNN2DFormat.NHWC, false))
+                        .inNCHW(inNCHW)
+                        .labelsNCHW(labels)
+                        .labelsNHWC(labels)
+                        .testLayerIdx(1)
+                        .build();
+
+                testHelper(tc);
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testSpaceToBatchNet(){
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                Nd4j.getRandom().setSeed(12345);
+                Nd4j.getEnvironment().allowHelpers(helpers);
+                String msg = helpers ? "With helpers" : "No helpers";
+                System.out.println(" --- " + msg + " ---");
+
+                INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 16, 16);
+                INDArray labels = TestUtils.randomOneHot(8, 10);
+
+                TestCase tc = TestCase.builder()
+                        .msg(msg)
+                        .net1(getSpaceToBatchNet(CNN2DFormat.NCHW, true))
+                        .net2(getSpaceToBatchNet(CNN2DFormat.NCHW, false))
+                        .net3(getSpaceToBatchNet(CNN2DFormat.NHWC, true))
+                        .net4(getSpaceToBatchNet(CNN2DFormat.NHWC, false))
+                        .inNCHW(inNCHW)
+                        .labelsNCHW(labels)
+                        .labelsNHWC(labels)
+                        .testLayerIdx(1)
+                        .build();
+
+                testHelper(tc);
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testLocallyConnected() {
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                for (ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Truncate, ConvolutionMode.Same}) {
+                    Nd4j.getRandom().setSeed(12345);
+                    Nd4j.getEnvironment().allowHelpers(helpers);
+                    String msg = helpers ? "With helpers (" + cm + ")" : "No helpers (" + cm + ")";
+                    System.out.println(" --- " + msg + " ---");
+
+                    INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                    INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                    TestCase tc = TestCase.builder()
+                            .msg(msg)
+                            .net1(getLocallyConnectedNet(CNN2DFormat.NCHW, true, cm))
+                            .net2(getLocallyConnectedNet(CNN2DFormat.NCHW, false, cm))
+                            .net3(getLocallyConnectedNet(CNN2DFormat.NHWC, true, cm))
+                            .net4(getLocallyConnectedNet(CNN2DFormat.NHWC, false, cm))
+                            .inNCHW(inNCHW)
+                            .labelsNCHW(labels)
+                            .labelsNHWC(labels)
+                            .testLayerIdx(1)
+                            .build();
+
+                    testHelper(tc);
+                }
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+
+    @Test
+    public void testGlobalPooling() {
+        try {
+            for (boolean helpers : new boolean[]{false, true}) {
+                for (PoolingType pt : PoolingType.values()) {
+                    Nd4j.getRandom().setSeed(12345);
+                    Nd4j.getEnvironment().allowHelpers(helpers);
+                    String msg = helpers ? "With helpers (" + pt + ")" : "No helpers (" + pt + ")";
+                    System.out.println(" --- " + msg + " ---");
+
+                    INDArray inNCHW = Nd4j.rand(this.dataType, 2, 3, 12, 12);
+                    INDArray labels = TestUtils.randomOneHot(2, 10);
+
+                    TestCase tc = TestCase.builder()
+                            .msg(msg)
+                            .net1(getGlobalPoolingNet(CNN2DFormat.NCHW, pt, true))
+                            .net2(getGlobalPoolingNet(CNN2DFormat.NCHW, pt, false))
+                            .net3(getGlobalPoolingNet(CNN2DFormat.NHWC, pt, true))
+                            .net4(getGlobalPoolingNet(CNN2DFormat.NHWC, pt, false))
+                            .inNCHW(inNCHW)
+                            .labelsNCHW(labels)
+                            .labelsNHWC(labels)
+                            .testLayerIdx(1)
+                            .build();
+
+                    testHelper(tc);
+                }
+            }
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    private MultiLayerNetwork getConv2dNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new ConvolutionLayer.Builder()
+                    .kernelSize(3, 3)
+                    .stride(2, 2)
+                    .activation(Activation.TANH)
+                    .dataFormat(format)
+                    .nOut(3)
+                    .helperAllowFallback(false)
+                    .build(), format, cm, null);
+        } else {
+            return getNetWithLayer(new ConvolutionLayer.Builder()
+                    .kernelSize(3, 3)
+                    .stride(2, 2)
+                    .activation(Activation.TANH)
+                    .nOut(3)
+                    .helperAllowFallback(false)
+                    .build(), format, cm, null);
+        }
+    }
+
+    private MultiLayerNetwork getSubsampling2dNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new SubsamplingLayer.Builder()
+                            .kernelSize(2, 2)
+                            .stride(1, 1)
+                            .dataFormat(format)
+                            .helperAllowFallback(false)
+                            .build(), format, cm, null);
+        } else {
+            return getNetWithLayer(new SubsamplingLayer.Builder()
+                            .kernelSize(2, 2)
+                            .stride(1, 1)
+                            .helperAllowFallback(false)
+                            .build(), format, cm, null);
+        }
+    }
+
+    private MultiLayerNetwork getSeparableConv2dNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new SeparableConvolution2D.Builder()
+                    .kernelSize(3, 3)
+                    .stride(2, 2)
+                    .activation(Activation.TANH)
+                    .dataFormat(format)
+                    .nOut(3)
+                    .helperAllowFallback(false)
+                    .build(), format, cm, null);
+        } else {
+            return getNetWithLayer(new SeparableConvolution2D.Builder()
+                    .kernelSize(3, 3)
+                    .stride(2, 2)
+                    .activation(Activation.TANH)
+                    .nOut(3)
+                    .helperAllowFallback(false)
+                    .build(), format, cm, null);
+        }
+    }
+
+    private MultiLayerNetwork getDepthwiseConv2dNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new DepthwiseConvolution2D.Builder()
+                    .depthMultiplier(2)
+                    .kernelSize(3, 3)
+                    .stride(2, 2)
+                    .activation(Activation.TANH)
+                    .dataFormat(format)
+                    .nOut(3)
+                    .helperAllowFallback(false)
+                    .build(), format, cm, null);
+        } else {
+            return getNetWithLayer(new DepthwiseConvolution2D.Builder()
+                    .depthMultiplier(2)
+                    .kernelSize(3, 3)
+                    .stride(2, 2)
+                    .activation(Activation.TANH)
+                    .nOut(3)
+                    .helperAllowFallback(false)
+                    .build(), format, cm, null);
+        }
+    }
+
+    private MultiLayerNetwork getLrnLayer(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new LocalResponseNormalization.Builder()
+                    .dataFormat(format)
+                    .helperAllowFallback(false)
+                    .build(), format, cm, null);
+        } else {
+            return getNetWithLayer(new LocalResponseNormalization.Builder()
+                    .helperAllowFallback(false)
+                    .build(), format, cm, null);
+        }
+    }
+
+    private MultiLayerNetwork getZeroPaddingNet(CNN2DFormat format, boolean setOnLayerAlso) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new ZeroPaddingLayer.Builder(2,2)
+                            .dataFormat(format).build(), format, ConvolutionMode.Same, null);
+        } else {
+            return getNetWithLayer(new ZeroPaddingLayer.Builder(2,2).build(),
+                    format, ConvolutionMode.Same, null);
+        }
+    }
+
+    private MultiLayerNetwork getCropping2dNet(CNN2DFormat format, boolean setOnLayerAlso) {
+        if (setOnLayerAlso) {
+           return getNetWithLayer(new Cropping2D.Builder(2,2)
+                            .dataFormat(format).build(), format, ConvolutionMode.Same, null);
+        } else {
+            return getNetWithLayer(new Cropping2D.Builder(2,2)
+                    .build(), format, ConvolutionMode.Same, null);
+        }
+    }
+
+    private MultiLayerNetwork getUpsamplingNet(CNN2DFormat format, boolean setOnLayerAlso) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new Upsampling2D.Builder(2)
+                    .dataFormat(format).build(), format, ConvolutionMode.Same, null);
+        } else {
+            return getNetWithLayer(new Upsampling2D.Builder(2)
+                    .build(), format, ConvolutionMode.Same, null);
+        }
+    }
+
+    private MultiLayerNetwork getDeconv2DNet2dNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new Deconvolution2D.Builder().nOut(2)
+                    .activation(Activation.TANH)
+                    .kernelSize(2,2)
+                    .stride(2,2)
+                    .build(), format, cm, null);
+        } else {
+            return getNetWithLayer(new Deconvolution2D.Builder().nOut(2)
+                    .activation(Activation.TANH)
+                    .kernelSize(2,2)
+                    .stride(2,2)
+                    .build(), format, cm, null);
+        }
+    }
+
+    private MultiLayerNetwork getBatchNormNet(boolean logStdev, CNN2DFormat format, boolean setOnLayerAlso) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new BatchNormalization.Builder()
+                    .useLogStd(logStdev)
+                    .dataFormat(format)
+                    .helperAllowFallback(false)
+                    .nOut(3).build(), format, ConvolutionMode.Same, null);
+        } else {
+            return getNetWithLayer(new BatchNormalization.Builder()
+                    .useLogStd(logStdev)
+                    .helperAllowFallback(false)
+                    .nOut(3).build(), format, ConvolutionMode.Same, null);
+        }
+    }
+
+    private MultiLayerNetwork getSpaceToDepthNet(CNN2DFormat format, boolean setOnLayerAlso) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new SpaceToDepthLayer.Builder()
+                    .blocks(2)
+                    .dataFormat(format)
+                    .build(), format, ConvolutionMode.Same, null);
+        } else {
+            return getNetWithLayer(new SpaceToDepthLayer.Builder()
+                    .blocks(2)
+                    .build(), format, ConvolutionMode.Same, null);
+        }
+    }
+
+    private MultiLayerNetwork getSpaceToBatchNet(CNN2DFormat format, boolean setOnLayerAlso) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new SpaceToBatchLayer.Builder()
+                    .blocks(2, 2)
+                    .dataFormat(format)
+                    .build(), format, ConvolutionMode.Same, InputType.convolutional(16, 16, 3, format));
+        } else {
+            return getNetWithLayer(new SpaceToBatchLayer.Builder()
+                    .blocks(2, 2)
+                    .build(), format, ConvolutionMode.Same, InputType.convolutional(16, 16, 3, format));
+        }
+    }
+
+    private MultiLayerNetwork getLocallyConnectedNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new LocallyConnected2D.Builder()
+                    .kernelSize(3, 3)
+                    .stride(2, 2)
+                    .activation(Activation.TANH)
+                    .dataFormat(format)
+                    .nOut(3)
+                    .build(), format, cm, null);
+        } else {
+            return getNetWithLayer(new LocallyConnected2D.Builder()
+                    .kernelSize(3, 3)
+                    .stride(2, 2)
+                    .activation(Activation.TANH)
+                    .nOut(3)
+                    .build(), format, cm, null);
+        }
+    }
+
+    private MultiLayerNetwork getNetWithLayer(Layer layer, CNN2DFormat format, ConvolutionMode cm, InputType inputType) {
+        NeuralNetConfiguration.ListBuilder builder = new NeuralNetConfiguration.Builder()
+                .dataType(this.dataType)
+                .seed(12345)
+                .convolutionMode(cm)
+                .list()
+                .layer(new ConvolutionLayer.Builder()
+                        .kernelSize(3, 3)
+                        .stride(2, 2)
+                        .activation(Activation.TANH)
+                        .dataFormat(format)
+                        .nOut(3)
+                        .helperAllowFallback(false)
+                        .build())
+                .layer(layer)
+                .layer(new OutputLayer.Builder().activation(Activation.SOFTMAX).nOut(10).build())
+                .setInputType(inputType != null ? inputType : InputType.convolutional(12, 12, 3, format));
+
+        if(format == CNN2DFormat.NHWC && !(layer instanceof GlobalPoolingLayer)){
+            //Add a preprocessor due to the differences in how NHWC and NCHW activations are flattened
+            //DL4J's flattening behaviour matches Keras (hence TF) for import compatibility
+            builder.inputPreProcessor(2, new ComposableInputPreProcessor(new NHWCToNCHWPreprocessor(), new CnnToFeedForwardPreProcessor()));
+        }
+
+        MultiLayerNetwork net = new MultiLayerNetwork(builder.build());
+        net.init();
+        return net;
+    }
+
+    private MultiLayerNetwork getGlobalPoolingNet(CNN2DFormat format, PoolingType pt, boolean setOnLayerAlso) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new GlobalPoolingLayer.Builder(pt)
+                    .poolingDimensions(format == CNN2DFormat.NCHW ? new int[]{2,3} : new int[]{1,2})
+                    .build(), format, ConvolutionMode.Same, null);
+        } else {
+            return getNetWithLayer(new GlobalPoolingLayer.Builder(pt)
+                    .build(), format, ConvolutionMode.Same, null);
+        }
+    }
+
+    private MultiLayerNetwork getCnnLossNet(CNN2DFormat format, boolean setOnLayerAlso, ConvolutionMode cm){
+        NeuralNetConfiguration.ListBuilder builder = new NeuralNetConfiguration.Builder()
+                .seed(12345)
+                .convolutionMode(cm)
+                .list()
+                .layer(new ConvolutionLayer.Builder()
+                        .kernelSize(3, 3)
+                        .stride(2, 2)
+                        .activation(Activation.TANH)
+                        .dataFormat(format)
+                        .nOut(3)
+                        .helperAllowFallback(false)
+                        .build());
+        if(setOnLayerAlso){
+            builder.layer(new CnnLossLayer.Builder().format(format).activation(Activation.SOFTMAX).build());
+        } else {
+            builder.layer(new CnnLossLayer.Builder().activation(Activation.SOFTMAX).build());
+        }
+
+        builder.setInputType(InputType.convolutional(12, 12, 3, format));
+
+        MultiLayerNetwork net = new MultiLayerNetwork(builder.build());
+        net.init();
+        return net;
+    }
+
+    @AllArgsConstructor
+    @Data
+    @NoArgsConstructor
+    @Builder
+    private static class TestCase {
+        private String msg;
+        private MultiLayerNetwork net1;
+        private MultiLayerNetwork net2;
+        private MultiLayerNetwork net3;
+        private MultiLayerNetwork net4;
+        private INDArray inNCHW;
+        private INDArray labelsNCHW;
+        private INDArray labelsNHWC;
+        private int testLayerIdx;
+        private boolean nhwcOutput;
+    }
+
+    public static void testHelper(TestCase tc) {
+
+        tc.net2.params().assign(tc.net1.params());
+        tc.net3.params().assign(tc.net1.params());
+        tc.net4.params().assign(tc.net1.params());
+
+        //Test forward pass:
+        INDArray inNCHW = tc.inNCHW;
+        INDArray inNHWC = tc.inNCHW.permute(0, 2, 3, 1).dup();
+
+        INDArray l0_1 = tc.net1.feedForward(inNCHW).get(tc.testLayerIdx + 1);
+        INDArray l0_2 = tc.net2.feedForward(inNCHW).get(tc.testLayerIdx + 1);
+        INDArray l0_3 = tc.net3.feedForward(inNHWC).get(tc.testLayerIdx + 1);
+        INDArray l0_4 = tc.net4.feedForward(inNHWC).get(tc.testLayerIdx + 1);
+
+        assertEquals(tc.msg, l0_1, l0_2);
+        if(l0_1.rank() == 4) {
+            assertEquals(tc.msg, l0_1, l0_3.permute(0, 3, 1, 2));
+            assertEquals(tc.msg, l0_1, l0_4.permute(0, 3, 1, 2));
+        } else {
+            assertEquals(tc.msg, l0_1, l0_3);
+            assertEquals(tc.msg, l0_1, l0_4);
+        }
+
+
+        INDArray out1 = tc.net1.output(inNCHW);
+        INDArray out2 = tc.net2.output(inNCHW);
+        INDArray out3 = tc.net3.output(inNHWC);
+        INDArray out4 = tc.net4.output(inNHWC);
+
+        assertEquals(tc.msg, out1, out2);
+        if(!tc.nhwcOutput) {
+            assertEquals(tc.msg, out1, out3);
+            assertEquals(tc.msg, out1, out4);
+        } else {
+            assertEquals(tc.msg, out1, out3.permute(0,3,1,2));      //NHWC to NCHW
+            assertEquals(tc.msg, out1, out4.permute(0,3,1,2));
+        }
+
+        //Test backprop
+        Pair<Gradient, INDArray> p1 = tc.net1.calculateGradients(inNCHW, tc.labelsNCHW, null, null);
+        Pair<Gradient, INDArray> p2 = tc.net2.calculateGradients(inNCHW, tc.labelsNCHW, null, null);
+        Pair<Gradient, INDArray> p3 = tc.net3.calculateGradients(inNHWC, tc.labelsNHWC, null, null);
+        Pair<Gradient, INDArray> p4 = tc.net4.calculateGradients(inNHWC, tc.labelsNHWC, null, null);
+
+            //Inpput gradients
+        assertEquals(tc.msg, p1.getSecond(), p2.getSecond());
+        assertEquals(tc.msg, p1.getSecond(), p3.getSecond().permute(0,3,1,2));  //Input gradients for NHWC input are also in NHWC format
+        assertEquals(tc.msg, p1.getSecond(), p4.getSecond().permute(0,3,1,2));
+
+        List<String> diff12 = differentGrads(p1.getFirst(), p2.getFirst());
+        List<String> diff13 = differentGrads(p1.getFirst(), p3.getFirst());
+        List<String> diff14 = differentGrads(p1.getFirst(), p4.getFirst());
+        assertEquals(tc.msg + " " + diff12, 0, diff12.size());
+        assertEquals(tc.msg + " " + diff13, 0, diff13.size());
+        assertEquals(tc.msg + " " + diff14, 0, diff14.size());
+
+        assertEquals(tc.msg, p1.getFirst().gradientForVariable(), p2.getFirst().gradientForVariable());
+        assertEquals(tc.msg, p1.getFirst().gradientForVariable(), p3.getFirst().gradientForVariable());
+        assertEquals(tc.msg, p1.getFirst().gradientForVariable(), p4.getFirst().gradientForVariable());
+
+        tc.net1.fit(inNCHW, tc.labelsNCHW);
+        tc.net2.fit(inNCHW, tc.labelsNCHW);
+        tc.net3.fit(inNHWC, tc.labelsNHWC);
+        tc.net4.fit(inNHWC, tc.labelsNHWC);
+
+        assertEquals(tc.msg, tc.net1.params(), tc.net2.params());
+        assertEquals(tc.msg, tc.net1.params(), tc.net3.params());
+        assertEquals(tc.msg, tc.net1.params(), tc.net4.params());
+
+        //Test serialization
+        MultiLayerNetwork net1a = TestUtils.testModelSerialization(tc.net1);
+        MultiLayerNetwork net2a = TestUtils.testModelSerialization(tc.net2);
+        MultiLayerNetwork net3a = TestUtils.testModelSerialization(tc.net3);
+        MultiLayerNetwork net4a = TestUtils.testModelSerialization(tc.net4);
+
+        out1 = tc.net1.output(inNCHW);
+        assertEquals(tc.msg, out1, net1a.output(inNCHW));
+        assertEquals(tc.msg, out1, net2a.output(inNCHW));
+        if(!tc.nhwcOutput) {
+            assertEquals(tc.msg, out1, net3a.output(inNHWC));
+            assertEquals(tc.msg, out1, net4a.output(inNHWC));
+        } else {
+            assertEquals(tc.msg, out1, net3a.output(inNHWC).permute(0,3,1,2));   //NHWC to NCHW
+            assertEquals(tc.msg, out1, net4a.output(inNHWC).permute(0,3,1,2));
+        }
+
+    }
+
+    private static List<String> differentGrads(Gradient g1, Gradient g2){
+        List<String> differs = new ArrayList<>();
+        Map<String,INDArray> m1 = g1.gradientForVariable();
+        Map<String,INDArray> m2 = g2.gradientForVariable();
+        for(String s : m1.keySet()){
+            INDArray a1 = m1.get(s);
+            INDArray a2 = m2.get(s);
+            if(!a1.equals(a2)){
+                differs.add(s);
+            }
+        }
+        return differs;
+    }
+
+
+    //Converts NHWC to NCHW activations
+    @EqualsAndHashCode
+    private static class NHWCToNCHWPreprocessor implements InputPreProcessor {
+
+        @Override
+        public INDArray preProcess(INDArray input, int miniBatchSize, LayerWorkspaceMgr workspaceMgr) {
+            return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, input.permute(0,3,1,2));
+        }
+
+        @Override
+        public INDArray backprop(INDArray output, int miniBatchSize, LayerWorkspaceMgr workspaceMgr) {
+            return workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, output.permute(0,2,3,1));
+        }
+
+        @Override
+        public InputPreProcessor clone() {
+            return this;
+        }
+
+        @Override
+        public InputType getOutputType(InputType inputType) {
+            InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
+            return InputType.convolutional(c.getHeight(), c.getWidth(), c.getChannels(), CNN2DFormat.NCHW);
+        }
+
+        @Override
+        public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
+            return null;
+        }
+    }
+}
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/convolution/TestConvolutionModes.java
@ -16,6 +16,7 @@

 package org.deeplearning4j.nn.layers.convolution;

+import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.exception.DL4JException;
 import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
@ -45,6 +46,7 @@ import static org.junit.Assert.*;
 /**
 * Created by Alex on 15/11/2016.
 */
+@Slf4j
 public class TestConvolutionModes extends BaseDL4JTest {

    @Test
@ -106,12 +108,12 @@ public class TestConvolutionModes extends BaseDL4JTest {
                                }
                            } catch (DL4JException e) {
                                if (inSize == 9 || cm != ConvolutionMode.Strict) {
-                                    e.printStackTrace();
+                                    log.error("",e);
                                    fail("Unexpected exception");
                                }
                                continue; //Expected exception
                            } catch (Exception e) {
-                                e.printStackTrace();
+                                log.error("",e);
                                fail("Unexpected exception");
                            }

@ -184,12 +186,12 @@ public class TestConvolutionModes extends BaseDL4JTest {
                                }
                            } catch (DL4JException e) {
                                if (inSize == 9 || cm != ConvolutionMode.Strict) {
-                                    e.printStackTrace();
+                                    log.error("",e);
                                    fail("Unexpected exception");
                                }
                                continue; //Expected exception
                            } catch (Exception e) {
-                                e.printStackTrace();
+                                log.error("",e);
                                fail("Unexpected exception");
                            }

--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/BidirectionalTest.java
@ -24,10 +24,7 @@ import org.deeplearning4j.earlystopping.saver.InMemoryModelSaver;
 import org.deeplearning4j.earlystopping.scorecalc.DataSetLossCalculator;
 import org.deeplearning4j.earlystopping.termination.MaxEpochsTerminationCondition;
 import org.deeplearning4j.earlystopping.trainer.EarlyStoppingGraphTrainer;
-import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
-import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
-import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.WorkspaceMode;
+import org.deeplearning4j.nn.conf.*;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM;
 import org.deeplearning4j.nn.conf.layers.GravesLSTM;
@ -45,6 +42,8 @@ import org.deeplearning4j.nn.weights.WeightInit;
 import org.deeplearning4j.util.ModelSerializer;
 import org.deeplearning4j.util.TimeSeriesUtils;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -61,12 +60,22 @@ import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;

+import static org.deeplearning4j.nn.conf.RNNFormat.NCW;
 import static org.junit.Assert.assertEquals;

@Slf4j
+@RunWith(Parameterized.class)
 public class BidirectionalTest extends BaseDL4JTest {

+    private RNNFormat rnnDataFormat;

+    public BidirectionalTest(RNNFormat rnnDataFormat){
+        this.rnnDataFormat = rnnDataFormat;
+    }
+    @Parameterized.Parameters
+    public static Object[] params(){
+        return RNNFormat.values();
+    }
    @Test
    public void compareImplementations(){
        for(WorkspaceMode wsm : WorkspaceMode.values()) {
@ -82,9 +91,9 @@ public class BidirectionalTest extends BaseDL4JTest {
                    .inferenceWorkspaceMode(wsm)
                    .updater(new Adam())
                    .list()
-                    .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).build()))
-                    .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).build()))
-                    .layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE)
+                    .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()))
+                    .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()))
+                    .layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).dataFormat(rnnDataFormat)
                            .nIn(10).nOut(10).build())
                    .build();

@ -95,9 +104,9 @@ public class BidirectionalTest extends BaseDL4JTest {
                    .inferenceWorkspaceMode(wsm)
                    .updater(new Adam())
                    .list()
-                    .layer(new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).build())
-                    .layer(new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).build())
-                    .layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE)
+                    .layer(new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build())
+                    .layer(new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build())
+                    .layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).dataFormat(rnnDataFormat)
                            .nIn(10).nOut(10).build())
                    .build();

@ -116,15 +125,24 @@ public class BidirectionalTest extends BaseDL4JTest {

            net2.setParams(net1.params());  //Assuming exact same layout here...

-            INDArray in = Nd4j.rand(new int[]{3, 10, 5});
+            INDArray in;
+            if (rnnDataFormat == NCW){
+                in = Nd4j.rand(new int[]{3, 10, 5});
+            }else{
+                in = Nd4j.rand(new int[]{3, 5, 10});
+            }

            INDArray out1 = net1.output(in);
            INDArray out2 = net2.output(in);

            assertEquals(out1, out2);

-            INDArray labels = Nd4j.rand(new int[]{3, 10, 5});
-
+            INDArray labels;
+            if (rnnDataFormat == NCW){
+                labels = Nd4j.rand(new int[]{3, 10, 5});
+            }else{
+                labels = Nd4j.rand(new int[]{3, 5, 10});
+            }
            net1.setInput(in);
            net1.setLabels(labels);

@ -276,17 +294,22 @@ public class BidirectionalTest extends BaseDL4JTest {
                    .inferenceWorkspaceMode(wsm)
                    .updater(new Adam())
                    .list()
-                    .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).build()))
-                    .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).build()))
+                    .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()))
+                    .layer(new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()))
                    .layer(new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE)
-                            .nIn(10).nOut(10).build())
+                            .nIn(10).nOut(10).dataFormat(rnnDataFormat).build())
                    .build();

            MultiLayerNetwork net1 = new MultiLayerNetwork(conf1);
            net1.init();

-            INDArray in = Nd4j.rand(new int[]{3, 10, 5});
-            INDArray labels = Nd4j.rand(new int[]{3, 10, 5});
+            INDArray in;
+            INDArray labels;
+
+            long[] inshape = rnnDataFormat == NCW ? new long[]{3, 10, 5} : new long[]{3, 5, 10};
+
+            in = Nd4j.rand(inshape);
+            labels = Nd4j.rand(inshape);

            net1.fit(in, labels);

@ -300,8 +323,8 @@ public class BidirectionalTest extends BaseDL4JTest {
            MultiLayerNetwork net2 = ModelSerializer.restoreMultiLayerNetwork(new ByteArrayInputStream(bytes), true);


-            in = Nd4j.rand(new int[]{3, 10, 5});
-            labels = Nd4j.rand(new int[]{3, 10, 5});
+            in = Nd4j.rand(inshape);
+            labels = Nd4j.rand(inshape);

            INDArray out1 = net1.output(in);
            INDArray out2 = net2.output(in);
@ -338,18 +361,18 @@ public class BidirectionalTest extends BaseDL4JTest {
                    .updater(new Adam())
                    .graphBuilder()
                    .addInputs("in")
-                    .layer("0", new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).build()), "in")
-                    .layer("1", new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).build()), "0")
-                    .layer("2", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE)
+                    .layer("0", new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()), "in")
+                    .layer("1", new Bidirectional(Bidirectional.Mode.ADD, new GravesLSTM.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()), "0")
+                    .layer("2", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).dataFormat(rnnDataFormat)
                            .nIn(10).nOut(10).build(), "1")
                    .setOutputs("2")
                    .build();

            ComputationGraph net1 = new ComputationGraph(conf1);
            net1.init();
-
-            INDArray in = Nd4j.rand(new int[]{3, 10, 5});
-            INDArray labels = Nd4j.rand(new int[]{3, 10, 5});
+            long[] inshape = (rnnDataFormat == NCW)? new long[]{3, 10, 5}: new long[]{3, 5, 10};
+            INDArray in = Nd4j.rand(inshape);
+            INDArray labels = Nd4j.rand(inshape);

            net1.fit(new DataSet(in, labels));

@ -363,8 +386,8 @@ public class BidirectionalTest extends BaseDL4JTest {
            ComputationGraph net2 = ModelSerializer.restoreComputationGraph(new ByteArrayInputStream(bytes), true);


-            in = Nd4j.rand(new int[]{3, 10, 5});
-            labels = Nd4j.rand(new int[]{3, 10, 5});
+            in = Nd4j.rand(inshape);
+            labels = Nd4j.rand(inshape);

            INDArray out1 = net1.outputSingle(in);
            INDArray out2 = net2.outputSingle(in);
@ -394,8 +417,8 @@ public class BidirectionalTest extends BaseDL4JTest {
            Bidirectional.Mode[] modes = new Bidirectional.Mode[]{Bidirectional.Mode.CONCAT, Bidirectional.Mode.ADD,
                    Bidirectional.Mode.AVERAGE, Bidirectional.Mode.MUL};

-
-            INDArray in = Nd4j.rand(new int[]{3, 10, 6});
+            long[] inshape = rnnDataFormat == NCW ? new long[]{3, 10, 6} : new long[]{3, 6, 10};
+            INDArray in = Nd4j.rand(inshape);

            for (Bidirectional.Mode m : modes) {
                MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder()
@ -406,7 +429,7 @@ public class BidirectionalTest extends BaseDL4JTest {
                        .inferenceWorkspaceMode(wsm)
                        .updater(new Adam())
                        .list()
-                        .layer(new Bidirectional(m, new SimpleRnn.Builder().nIn(10).nOut(10).build()))
+                        .layer(new Bidirectional(m, new SimpleRnn.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()))
                        .build();

                MultiLayerNetwork net1 = new MultiLayerNetwork(conf1);
@ -418,7 +441,7 @@ public class BidirectionalTest extends BaseDL4JTest {
                        .weightInit(WeightInit.XAVIER)
                        .updater(new Adam())
                        .list()
-                        .layer(new SimpleRnn.Builder().nIn(10).nOut(10).build())
+                        .layer(new SimpleRnn.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build())
                        .build();

                MultiLayerNetwork net2 = new MultiLayerNetwork(conf2.clone());
@ -434,11 +457,10 @@ public class BidirectionalTest extends BaseDL4JTest {
                net3.setParam("0_RW", net1.getParam("0_bRW"));
                net3.setParam("0_b", net1.getParam("0_bb"));

-                INDArray inReverse = TimeSeriesUtils.reverseTimeSeries(in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT);
-
+                INDArray inReverse = TimeSeriesUtils.reverseTimeSeries(in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat);
                INDArray out1 = net1.output(in);
                INDArray out2 = net2.output(in);
-                INDArray out3 = TimeSeriesUtils.reverseTimeSeries(net3.output(inReverse), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT);
+                INDArray out3 = TimeSeriesUtils.reverseTimeSeries(net3.output(inReverse), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat);

                INDArray outExp;
                switch (m) {
@ -452,7 +474,7 @@ public class BidirectionalTest extends BaseDL4JTest {
                        outExp = out2.add(out3).muli(0.5);
                        break;
                    case CONCAT:
-                        outExp = Nd4j.concat(1, out2, out3);
+                        outExp = Nd4j.concat((rnnDataFormat == NCW)?1:2, out2, out3);
                        break;
                    default:
                        throw new RuntimeException();
@ -464,25 +486,25 @@ public class BidirectionalTest extends BaseDL4JTest {
                //Check gradients:
                if (m == Bidirectional.Mode.ADD || m == Bidirectional.Mode.CONCAT) {

-                    INDArray eps = Nd4j.rand(new int[]{3, 10, 6});
+                    INDArray eps = Nd4j.rand(inshape);

                    INDArray eps1;
                    if (m == Bidirectional.Mode.CONCAT) {
-                        eps1 = Nd4j.concat(1, eps, eps);
+                        eps1 = Nd4j.concat((rnnDataFormat == NCW)?1:2, eps, eps);
                    } else {
                        eps1 = eps;
                    }

                    net1.setInput(in);
                    net2.setInput(in);
-                    net3.setInput(TimeSeriesUtils.reverseTimeSeries(in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT));
+                    net3.setInput(TimeSeriesUtils.reverseTimeSeries(in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat));
                    net1.feedForward(true, false);
                    net2.feedForward(true, false);
                    net3.feedForward(true, false);

                    Pair<Gradient, INDArray> p1 = net1.backpropGradient(eps1, LayerWorkspaceMgr.noWorkspaces());
                    Pair<Gradient, INDArray> p2 = net2.backpropGradient(eps, LayerWorkspaceMgr.noWorkspaces());
-                    Pair<Gradient, INDArray> p3 = net3.backpropGradient(TimeSeriesUtils.reverseTimeSeries(eps, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT), LayerWorkspaceMgr.noWorkspaces());
+                    Pair<Gradient, INDArray> p3 = net3.backpropGradient(TimeSeriesUtils.reverseTimeSeries(eps, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT, rnnDataFormat), LayerWorkspaceMgr.noWorkspaces());
                    Gradient g1 = p1.getFirst();
                    Gradient g2 = p2.getFirst();
                    Gradient g3 = p3.getFirst();
@ -520,7 +542,9 @@ public class BidirectionalTest extends BaseDL4JTest {
                    Bidirectional.Mode.AVERAGE, Bidirectional.Mode.MUL};


-            INDArray in = Nd4j.rand(new int[]{3, 10, 6});
+            long[] inshape = rnnDataFormat == NCW ? new long[]{3, 10, 6} : new long[]{3, 6, 10};
+            INDArray in = Nd4j.rand(inshape);
+

            for (Bidirectional.Mode m : modes) {
                ComputationGraphConfiguration conf1 = new NeuralNetConfiguration.Builder()
@ -532,7 +556,7 @@ public class BidirectionalTest extends BaseDL4JTest {
                        .updater(new Adam())
                        .graphBuilder()
                        .addInputs("in")
-                        .layer("0", new Bidirectional(m, new SimpleRnn.Builder().nIn(10).nOut(10).build()), "in")
+                        .layer("0", new Bidirectional(m, new SimpleRnn.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build()), "in")
                        .setOutputs("0")
                        .build();

@ -546,7 +570,7 @@ public class BidirectionalTest extends BaseDL4JTest {
                        .updater(new Adam())
                        .graphBuilder()
                        .addInputs("in")
-                        .layer("0", new SimpleRnn.Builder().nIn(10).nOut(10).build(), "in")
+                        .layer("0", new SimpleRnn.Builder().nIn(10).nOut(10).dataFormat(rnnDataFormat).build(), "in")
                        .setOutputs("0")
                        .build();

@ -566,9 +590,20 @@ public class BidirectionalTest extends BaseDL4JTest {

                INDArray out1 = net1.outputSingle(in);
                INDArray out2 = net2.outputSingle(in);
-                INDArray out3 = TimeSeriesUtils.reverseTimeSeries(net3.outputSingle(
-                        TimeSeriesUtils.reverseTimeSeries(in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT)),
-                        LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT);
+                INDArray out3;
+                INDArray inReverse;
+                if (rnnDataFormat == RNNFormat.NWC){
+                    inReverse = TimeSeriesUtils.reverseTimeSeries(in.permute(0, 2, 1), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT).permute(0, 2, 1);
+                    out3 = net3.outputSingle(inReverse);
+                    out3 = TimeSeriesUtils.reverseTimeSeries(out3.permute(0, 2, 1), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT).permute(0, 2, 1);
+
+                }
+                else{
+                    inReverse = TimeSeriesUtils.reverseTimeSeries(in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT);
+                    out3 = net3.outputSingle(inReverse);
+                    out3 = TimeSeriesUtils.reverseTimeSeries(out3, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT);
+
+                }

                INDArray outExp;
                switch (m) {
@ -582,7 +617,9 @@ public class BidirectionalTest extends BaseDL4JTest {
                        outExp = out2.add(out3).muli(0.5);
                        break;
                    case CONCAT:
-                        outExp = Nd4j.concat(1, out2, out3);
+                        System.out.println(out2.shapeInfoToString());
+                        System.out.println(out3.shapeInfoToString());
+                        outExp = Nd4j.concat((rnnDataFormat == NCW)?1:2, out2, out3);
                        break;
                    default:
                        throw new RuntimeException();
@ -594,22 +631,26 @@ public class BidirectionalTest extends BaseDL4JTest {
                //Check gradients:
                if (m == Bidirectional.Mode.ADD || m == Bidirectional.Mode.CONCAT) {

-                    INDArray eps = Nd4j.rand(new int[]{3, 10, 6});
+                    INDArray eps = Nd4j.rand(inshape);

                    INDArray eps1;
                    if (m == Bidirectional.Mode.CONCAT) {
-                        eps1 = Nd4j.concat(1, eps, eps);
+                        eps1 = Nd4j.concat((rnnDataFormat == NCW)?1:2, eps, eps);
                    } else {
                        eps1 = eps;
                    }

+                    INDArray epsReversed = (rnnDataFormat == NCW)?
+                           TimeSeriesUtils.reverseTimeSeries(eps, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT):
+                           TimeSeriesUtils.reverseTimeSeries(eps.permute(0, 2, 1), LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT)
+                           .permute(0, 2, 1);
                    net1.outputSingle(true, false, in);
                    net2.outputSingle(true, false, in);
-                    net3.outputSingle(true, false, TimeSeriesUtils.reverseTimeSeries(in, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT));
+                    net3.outputSingle(true, false, inReverse);

                    Gradient g1 = net1.backpropGradient(eps1);
                    Gradient g2 = net2.backpropGradient(eps);
-                    Gradient g3 = net3.backpropGradient(TimeSeriesUtils.reverseTimeSeries(eps, LayerWorkspaceMgr.noWorkspaces(), ArrayType.INPUT));
+                    Gradient g3 = net3.backpropGradient(epsReversed);

                    for (boolean updates : new boolean[]{false, true}) {
                        if (updates) {
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java
@ -23,6 +23,7 @@ import org.deeplearning4j.nn.api.OptimizationAlgorithm;
 import org.deeplearning4j.nn.conf.CacheMode;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.distribution.UniformDistribution;
 import org.deeplearning4j.nn.gradient.Gradient;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
@ -31,6 +32,8 @@ import org.deeplearning4j.nn.params.GravesLSTMParamInitializer;
 import org.deeplearning4j.nn.weights.WeightInit;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.activations.impl.ActivationSigmoid;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -42,10 +45,18 @@ import org.nd4j.linalg.primitives.Pair;

 import static org.junit.Assert.*;

-
+@RunWith(Parameterized.class)
 public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
    private double score = 0.0;
+    private RNNFormat rnnDataFormat;

+    public GravesBidirectionalLSTMTest(RNNFormat rnnDataFormat){
+        this.rnnDataFormat = rnnDataFormat;
+    }
+    @Parameterized.Parameters
+    public static Object[] params(){
+        return RNNFormat.values();
+    }
    @Test
    public void testBidirectionalLSTMGravesForwardBasic() {
        //Very basic test of forward prop. of LSTM layer with a time series.
@ -55,7 +66,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {

        final NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn)
-                                        .nOut(nHiddenUnits).activation(Activation.TANH).build())
+                                        .nOut(nHiddenUnits).dataFormat(rnnDataFormat).activation(Activation.TANH).build())
                        .build();

        val numParams = conf.getLayer().initializer().numParams(conf);
@ -65,22 +76,41 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {

        //Data: has shape [miniBatchSize,nIn,timeSeriesLength];
        //Output/activations has shape [miniBatchsize,nHiddenUnits,timeSeriesLength];
+        if (rnnDataFormat == RNNFormat.NCW){
+            final INDArray dataSingleExampleTimeLength1 = Nd4j.ones(1, nIn, 1);
+            final INDArray activations1 = layer.activate(dataSingleExampleTimeLength1, false, LayerWorkspaceMgr.noWorkspaces());
+            assertArrayEquals(activations1.shape(), new long[] {1, nHiddenUnits, 1});

-        final INDArray dataSingleExampleTimeLength1 = Nd4j.ones(1, nIn, 1);
-        final INDArray activations1 = layer.activate(dataSingleExampleTimeLength1, false, LayerWorkspaceMgr.noWorkspaces());
-        assertArrayEquals(activations1.shape(), new long[] {1, nHiddenUnits, 1});
+            final INDArray dataMultiExampleLength1 = Nd4j.ones(10, nIn, 1);
+            final INDArray activations2 = layer.activate(dataMultiExampleLength1, false, LayerWorkspaceMgr.noWorkspaces());
+            assertArrayEquals(activations2.shape(), new long[] {10, nHiddenUnits, 1});

-        final INDArray dataMultiExampleLength1 = Nd4j.ones(10, nIn, 1);
-        final INDArray activations2 = layer.activate(dataMultiExampleLength1, false, LayerWorkspaceMgr.noWorkspaces());
-        assertArrayEquals(activations2.shape(), new long[] {10, nHiddenUnits, 1});
+            final INDArray dataSingleExampleLength12 = Nd4j.ones(1, nIn, 12);
+            final INDArray activations3 = layer.activate(dataSingleExampleLength12, false, LayerWorkspaceMgr.noWorkspaces());
+            assertArrayEquals(activations3.shape(), new long[] {1, nHiddenUnits, 12});

-        final INDArray dataSingleExampleLength12 = Nd4j.ones(1, nIn, 12);
-        final INDArray activations3 = layer.activate(dataSingleExampleLength12, false, LayerWorkspaceMgr.noWorkspaces());
-        assertArrayEquals(activations3.shape(), new long[] {1, nHiddenUnits, 12});
+            final INDArray dataMultiExampleLength15 = Nd4j.ones(10, nIn, 15);
+            final INDArray activations4 = layer.activate(dataMultiExampleLength15, false, LayerWorkspaceMgr.noWorkspaces());
+            assertArrayEquals(activations4.shape(), new long[] {10, nHiddenUnits, 15});
+        }
+        else{
+            final INDArray dataSingleExampleTimeLength1 = Nd4j.ones(1, 1, nIn);
+            final INDArray activations1 = layer.activate(dataSingleExampleTimeLength1, false, LayerWorkspaceMgr.noWorkspaces());
+            assertArrayEquals(activations1.shape(), new long[] {1, 1, nHiddenUnits});
+
+            final INDArray dataMultiExampleLength1 = Nd4j.ones(10, 1, nIn);
+            final INDArray activations2 = layer.activate(dataMultiExampleLength1, false, LayerWorkspaceMgr.noWorkspaces());
+            assertArrayEquals(activations2.shape(), new long[] {10, 1, nHiddenUnits});
+
+            final INDArray dataSingleExampleLength12 = Nd4j.ones(1, 12, nIn);
+            final INDArray activations3 = layer.activate(dataSingleExampleLength12, false, LayerWorkspaceMgr.noWorkspaces());
+            assertArrayEquals(activations3.shape(), new long[] {1, 12, nHiddenUnits});
+
+            final INDArray dataMultiExampleLength15 = Nd4j.ones(10, 15, nIn);
+            final INDArray activations4 = layer.activate(dataMultiExampleLength15, false, LayerWorkspaceMgr.noWorkspaces());
+            assertArrayEquals(activations4.shape(), new long[] {10, 15, nHiddenUnits});
+        }

-        final INDArray dataMultiExampleLength15 = Nd4j.ones(10, nIn, 15);
-        final INDArray activations4 = layer.activate(dataMultiExampleLength15, false, LayerWorkspaceMgr.noWorkspaces());
-        assertArrayEquals(activations4.shape(), new long[] {10, nHiddenUnits, 15});
    }

    @Test
@ -94,14 +124,15 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
        testGravesBackwardBasicHelper(13, 3, 17, 1, 1); //Edge case: both miniBatchSize = 1 and timeSeriesLength = 1
    }

-    private static void testGravesBackwardBasicHelper(int nIn, int nOut, int lstmNHiddenUnits, int miniBatchSize,
+    private void testGravesBackwardBasicHelper(int nIn, int nOut, int lstmNHiddenUnits, int miniBatchSize,
                    int timeSeriesLength) {

-        INDArray inputData = Nd4j.ones(miniBatchSize, nIn, timeSeriesLength);
+        INDArray inputData = (rnnDataFormat == RNNFormat.NCW)?Nd4j.ones(miniBatchSize, nIn, timeSeriesLength):
+                Nd4j.ones(miniBatchSize, timeSeriesLength, nIn);

        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn)
-                                        .nOut(lstmNHiddenUnits)
+                                        .nOut(lstmNHiddenUnits).dataFormat(rnnDataFormat)
                                        .dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build())
                        .build();

@ -114,7 +145,8 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
        lstm.activate(inputData, false, LayerWorkspaceMgr.noWorkspaces());
        assertNotNull(lstm.input());

-        INDArray epsilon = Nd4j.ones(miniBatchSize, lstmNHiddenUnits, timeSeriesLength);
+        INDArray epsilon =(rnnDataFormat == RNNFormat.NCW)? Nd4j.ones(miniBatchSize, lstmNHiddenUnits, timeSeriesLength):
+                Nd4j.ones(miniBatchSize, timeSeriesLength, lstmNHiddenUnits);

        Pair<Gradient, INDArray> out = lstm.backpropGradient(epsilon, LayerWorkspaceMgr.noWorkspaces());
        Gradient outGradient = out.getFirst();
@ -147,7 +179,11 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
        assertArrayEquals(recurrentWeightGradientB.shape(), new long[] {lstmNHiddenUnits, 4 * lstmNHiddenUnits + 3});

        assertNotNull(nextEpsilon);
-        assertArrayEquals(nextEpsilon.shape(), new long[] {miniBatchSize, nIn, timeSeriesLength});
+        if (rnnDataFormat == RNNFormat.NCW) {
+            assertArrayEquals(nextEpsilon.shape(), new long[]{miniBatchSize, nIn, timeSeriesLength});
+        }else{
+            assertArrayEquals(nextEpsilon.shape(), new long[]{miniBatchSize, timeSeriesLength, nIn });
+        }

        //Check update:
        for (String s : outGradient.gradientForVariable().keySet()) {
@ -226,7 +262,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {

        final NeuralNetConfiguration confBidirectional = new NeuralNetConfiguration.Builder()
                        .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn)
-                                        .nOut(layerSize)
+                                        .nOut(layerSize).dataFormat(rnnDataFormat)
                                        .dist(new UniformDistribution(-0.1, 0.1)).activation(Activation.TANH).build())
                        .build();

@ -237,7 +273,8 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
                        .instantiate(confBidirectional, null, 0, params, true, params.dataType());


-        final INDArray sig = Nd4j.rand(new int[] {miniBatchSize, nIn, timeSeriesLength});
+        final INDArray sig = (rnnDataFormat == RNNFormat.NCW)?Nd4j.rand(new int[] {miniBatchSize, nIn, timeSeriesLength}):
+                Nd4j.rand(new int[] {miniBatchSize, timeSeriesLength, nIn});

        final INDArray act1 = bidirectionalLSTM.activate(sig, false, LayerWorkspaceMgr.noWorkspaces());

@ -265,13 +302,13 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
        final NeuralNetConfiguration confBidirectional =
                        new NeuralNetConfiguration.Builder()
                                        .layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder()
-                                                        .nIn(nIn).nOut(layerSize)
+                                                        .nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat)
                                                        .dist(new UniformDistribution(-0.1, 0.1))
                                                        .activation(Activation.TANH).updater(new NoOp()).build())
                                        .build();

        final NeuralNetConfiguration confForwards = new NeuralNetConfiguration.Builder()
-                        .layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(layerSize)
+                        .layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat)
                                        .weightInit(WeightInit.ZERO).activation(Activation.TANH).build())
                        .build();

@ -290,9 +327,16 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
                        Nd4j.create(1, confForwards.getLayer().initializer().numParams(confForwards)));


-        final INDArray sig = Nd4j.rand(new int[] {miniBatchSize, nIn, timeSeriesLength});
+        final INDArray sig = (rnnDataFormat == RNNFormat.NCW)?Nd4j.rand(new int[] {miniBatchSize, nIn, timeSeriesLength}):
+                Nd4j.rand(new int[] {miniBatchSize, timeSeriesLength, nIn});
        final INDArray sigb = sig.dup();
-        reverseColumnsInPlace(sigb.slice(0));
+
+        if (rnnDataFormat == RNNFormat.NCW) {
+            reverseColumnsInPlace(sigb.slice(0));
+        }
+        else{
+            reverseColumnsInPlace(sigb.slice(0).permute(1, 0));
+        }

        final INDArray recurrentWeightsF = bidirectionalLSTM
                        .getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS);
@ -345,10 +389,14 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {

        assertArrayEquals(activation1.data().asFloat(), activation2.data().asFloat(), 1e-5f);

-        final INDArray randSig = Nd4j.rand(new int[] {1, layerSize, timeSeriesLength});
-        final INDArray randSigBackwards = randSig.dup();
-        reverseColumnsInPlace(randSigBackwards.slice(0));
-
+        final INDArray randSig = (rnnDataFormat == RNNFormat.NCW)?Nd4j.rand(new int[] {1, layerSize, timeSeriesLength}):
+                Nd4j.rand(new int[] {1, timeSeriesLength, layerSize});
+        INDArray randSigBackwards = randSig.dup();
+        if (rnnDataFormat == RNNFormat.NCW){
+            reverseColumnsInPlace(randSigBackwards.slice(0));
+        }else{
+            reverseColumnsInPlace(randSigBackwards.slice(0).permute(1, 0));
+        }

        final Pair<Gradient, INDArray> backprop1 = forwardsLSTM.backpropGradient(randSig, LayerWorkspaceMgr.noWorkspaces());
        final Pair<Gradient, INDArray> backprop2 = bidirectionalLSTM.backpropGradient(randSig, LayerWorkspaceMgr.noWorkspaces());
@ -399,10 +447,16 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
        final INDArray activation3 = bidirectionalLSTM.activate(sigb, false, LayerWorkspaceMgr.noWorkspaces()).slice(0);

        final INDArray activation3Reverse = activation3.dup();
-        reverseColumnsInPlace(activation3Reverse);
+        if (rnnDataFormat == RNNFormat.NCW){
+            reverseColumnsInPlace(activation3Reverse);
+        }
+        else{
+            reverseColumnsInPlace(activation3Reverse.permute(1, 0));
+        }

-        assertEquals(activation3Reverse, activation1);
        assertArrayEquals(activation3Reverse.shape(), activation1.shape());
+        assertEquals(activation3Reverse, activation1);
+

        //test backprop now
        final INDArray refBackGradientReccurrent =
@ -434,7 +488,12 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
        final INDArray refEpsilon = backprop1.getSecond().dup();
        final INDArray backEpsilon = backprop3.getSecond().dup();

-        reverseColumnsInPlace(refEpsilon.slice(0));
+        if (rnnDataFormat == RNNFormat.NCW) {
+            reverseColumnsInPlace(refEpsilon.slice(0));
+        }
+        else{
+            reverseColumnsInPlace(refEpsilon.slice(0).permute(1, 0));
+        }
        assertArrayEquals(backEpsilon.dup().data().asDouble(), refEpsilon.dup().data().asDouble(), 1e-6);

    }
@ -477,10 +536,10 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
                            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                            .seed(12345).list()
                            .layer(0, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder()
-                                            .gateActivationFunction(gateAfn).activation(Activation.TANH).nIn(2).nOut(2)
+                                            .gateActivationFunction(gateAfn).activation(Activation.TANH).nIn(2).nOut(2).dataFormat(rnnDataFormat)
                                            .build())
                            .layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder()
-                                            .lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(2)
+                                            .lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(2).dataFormat(rnnDataFormat)
                                            .activation(Activation.TANH).build())
                            .build();

@ -492,7 +551,10 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {

            INDArray in = Nd4j.rand(new int[] {3, 2, 5});
            INDArray labels = Nd4j.rand(new int[] {3, 2, 5});
-
+            if (rnnDataFormat == RNNFormat.NWC){
+                in = in.permute(0, 2, 1);
+                labels = labels.permute(0, 2, 1);
+            }
            net.fit(in, labels);
        }
    }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayerTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/MaskZeroLayerTest.java
@ -21,11 +21,14 @@ import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.layers.LSTM;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.deeplearning4j.optimize.api.TrainingListener;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
@ -36,9 +39,17 @@ import java.util.Collections;

 import static org.junit.Assert.assertEquals;

-
+@RunWith(Parameterized.class)
 public class MaskZeroLayerTest extends BaseDL4JTest {
+    private RNNFormat rnnDataFormat;

+    public MaskZeroLayerTest(RNNFormat rnnDataFormat){
+        this.rnnDataFormat = rnnDataFormat;
+    }
+    @Parameterized.Parameters
+    public static Object[] params(){
+        return RNNFormat.values();
+    }
    @Test
    public void activate() {

@ -57,7 +68,7 @@ public class MaskZeroLayerTest extends BaseDL4JTest {
                .activation(Activation.IDENTITY)
                .gateActivationFunction(Activation.IDENTITY)
                .nIn(2)
-                .nOut(1)
+                .nOut(1).dataFormat(rnnDataFormat)
                .build();
        NeuralNetConfiguration conf = new NeuralNetConfiguration();
        conf.setLayer(underlying);
@ -72,20 +83,25 @@ public class MaskZeroLayerTest extends BaseDL4JTest {

        MaskZeroLayer l = new MaskZeroLayer(lstm, maskingValue);
        INDArray input = Nd4j.create(Arrays.asList(ex1, ex2), new int[]{2, 2, 3});
+        if (rnnDataFormat == RNNFormat.NWC){
+            input = input.permute(0, 2, 1);
+        }
        //WHEN
        INDArray out = l.activate(input, true, LayerWorkspaceMgr.noWorkspaces());
-
+        if (rnnDataFormat == RNNFormat.NWC){
+            out = out.permute(0, 2,1);
+        }
        //THEN output should only be incremented for the non-zero timesteps
        INDArray firstExampleOutput = out.get(NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.all());
        INDArray secondExampleOutput = out.get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.all());

-        assertEquals(firstExampleOutput.getDouble(0), 0.0, 1e-6);
-        assertEquals(firstExampleOutput.getDouble(1), 1.0, 1e-6);
-        assertEquals(firstExampleOutput.getDouble(2), 2.0, 1e-6);
+        assertEquals(0.0, firstExampleOutput.getDouble(0),  1e-6);
+        assertEquals(1.0, firstExampleOutput.getDouble(1), 1e-6);
+        assertEquals(2.0, firstExampleOutput.getDouble(2), 1e-6);

-        assertEquals(secondExampleOutput.getDouble(0), 0.0, 1e-6);
-        assertEquals(secondExampleOutput.getDouble(1), 0.0, 1e-6);
-        assertEquals(secondExampleOutput.getDouble(2), 1.0, 1e-6);
+        assertEquals(0.0, secondExampleOutput.getDouble(0), 1e-6);
+        assertEquals(0.0, secondExampleOutput.getDouble(1),  1e-6);
+        assertEquals(1.0, secondExampleOutput.getDouble(2), 1e-6);

    }

@ -94,7 +110,7 @@ public class MaskZeroLayerTest extends BaseDL4JTest {
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .list()
                .layer(new org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer.Builder()
-                        .setMaskValue(0.0).setUnderlying(new LSTM.Builder().nIn(4).nOut(5).build()).build())
+                        .setMaskValue(0.0).setUnderlying(new LSTM.Builder().nIn(4).nOut(5).dataFormat(rnnDataFormat).build()).build())
                .build();
        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/RnnDataFormatTests.java
@ -0,0 +1,394 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.deeplearning4j.nn.layers.recurrent;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import org.deeplearning4j.BaseDL4JTest;
+import org.deeplearning4j.TestUtils;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
+import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.layers.*;
+import org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM;
+import org.deeplearning4j.nn.conf.layers.GravesLSTM;
+import org.deeplearning4j.nn.conf.layers.LSTM;
+import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
+import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep;
+import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
+import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer;
+import org.deeplearning4j.nn.gradient.Gradient;
+import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.primitives.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+@RunWith(Parameterized.class)
+@AllArgsConstructor
+public class RnnDataFormatTests extends BaseDL4JTest {
+
+    private boolean helpers;
+    private boolean lastTimeStep;
+    private boolean maskZeros;
+
+    @Parameterized.Parameters(name = "helpers={0},lastTimeStep={1},maskZero={2}")
+    public static List params(){
+        List<Object[]> ret = new ArrayList<>();
+        for (boolean helpers: new boolean[]{true, false})
+            for (boolean lastTimeStep: new boolean[]{true, false})
+                for (boolean maskZero: new boolean[]{true, false})
+                    ret.add(new Object[]{helpers, lastTimeStep, maskZero});
+        return ret;
+    }
+
+
+    @Test
+    public void testSimpleRnn() {
+        try {
+
+                    Nd4j.getRandom().setSeed(12345);
+                    Nd4j.getEnvironment().allowHelpers(helpers);
+                    String msg = "Helpers: " + helpers + ", lastTimeStep: " + lastTimeStep + ", maskZeros: " + maskZeros;
+                    System.out.println(" --- " + msg + " ---");
+
+                    INDArray inNCW = Nd4j.rand(DataType.FLOAT, 2, 3, 12);
+
+                    INDArray labelsNWC = (lastTimeStep) ?TestUtils.randomOneHot(2, 10): TestUtils.randomOneHot(2 * 12, 10).reshape(2, 12, 10);
+
+                    TestCase tc = TestCase.builder()
+                            .msg(msg)
+                            .net1(getSimpleRnnNet(RNNFormat.NCW, true, lastTimeStep, maskZeros))
+                            .net2(getSimpleRnnNet(RNNFormat.NCW, false, lastTimeStep, maskZeros))
+                            .net3(getSimpleRnnNet(RNNFormat.NWC, true, lastTimeStep, maskZeros))
+                            .net4(getSimpleRnnNet(RNNFormat.NWC, false, lastTimeStep, maskZeros))
+                            .inNCW(inNCW)
+                            .labelsNCW((lastTimeStep)? labelsNWC: labelsNWC.permute(0, 2, 1))
+                            .labelsNWC(labelsNWC)
+                            .testLayerIdx(1)
+                            .build();
+
+                    TestCase.testHelper(tc);
+
+
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+    @Test
+    public void testLSTM() {
+        try {
+
+            Nd4j.getRandom().setSeed(12345);
+            Nd4j.getEnvironment().allowHelpers(helpers);
+            String msg = "Helpers: " + helpers + ", lastTimeStep: " + lastTimeStep + ", maskZeros: " + maskZeros;
+            System.out.println(" --- " + msg + " ---");
+
+            INDArray inNCW = Nd4j.rand(DataType.FLOAT, 2, 3, 12);
+
+            INDArray labelsNWC = (lastTimeStep) ?TestUtils.randomOneHot(2, 10): TestUtils.randomOneHot(2 * 12, 10).reshape(2, 12, 10);
+
+            TestCase tc = TestCase.builder()
+                    .msg(msg)
+                    .net1(getLstmNet(RNNFormat.NCW, true, lastTimeStep, maskZeros))
+                    .net2(getLstmNet(RNNFormat.NCW, false, lastTimeStep, maskZeros))
+                    .net3(getLstmNet(RNNFormat.NWC, true, lastTimeStep, maskZeros))
+                    .net4(getLstmNet(RNNFormat.NWC, false, lastTimeStep, maskZeros))
+                    .inNCW(inNCW)
+                    .labelsNCW((lastTimeStep)? labelsNWC: labelsNWC.permute(0, 2, 1))
+                    .labelsNWC(labelsNWC)
+                    .testLayerIdx(1)
+                    .build();
+
+            TestCase.testHelper(tc);
+
+
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+
+    @Test
+    public void testGraveLSTM() {
+        try {
+
+            Nd4j.getRandom().setSeed(12345);
+            Nd4j.getEnvironment().allowHelpers(helpers);
+            String msg = "Helpers: " + helpers + ", lastTimeStep: " + lastTimeStep + ", maskZeros: " + maskZeros;
+            System.out.println(" --- " + msg + " ---");
+
+            INDArray inNCW = Nd4j.rand(DataType.FLOAT, 2, 3, 12);
+
+            INDArray labelsNWC = (lastTimeStep) ?TestUtils.randomOneHot(2, 10): TestUtils.randomOneHot(2 * 12, 10).reshape(2, 12, 10);
+
+            TestCase tc = TestCase.builder()
+                    .msg(msg)
+                    .net1(getGravesLstmNet(RNNFormat.NCW, true, lastTimeStep, maskZeros))
+                    .net2(getGravesLstmNet(RNNFormat.NCW, false, lastTimeStep, maskZeros))
+                    .net3(getGravesLstmNet(RNNFormat.NWC, true, lastTimeStep, maskZeros))
+                    .net4(getGravesLstmNet(RNNFormat.NWC, false, lastTimeStep, maskZeros))
+                    .inNCW(inNCW)
+                    .labelsNCW((lastTimeStep)? labelsNWC: labelsNWC.permute(0, 2, 1))
+                    .labelsNWC(labelsNWC)
+                    .testLayerIdx(1)
+                    .build();
+
+            TestCase.testHelper(tc);
+
+
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+
+    @Test
+    public void testGraveBiLSTM() {
+        try {
+
+            Nd4j.getRandom().setSeed(12345);
+            Nd4j.getEnvironment().allowHelpers(helpers);
+            String msg = "Helpers: " + helpers + ", lastTimeStep: " + lastTimeStep + ", maskZeros: " + maskZeros;
+            System.out.println(" --- " + msg + " ---");
+
+            INDArray inNCW = Nd4j.rand(DataType.FLOAT, 2, 3, 12);
+
+            INDArray labelsNWC = (lastTimeStep) ?TestUtils.randomOneHot(2, 10): TestUtils.randomOneHot(2 * 12, 10).reshape(2, 12, 10);
+
+            TestCase tc = TestCase.builder()
+                    .msg(msg)
+                    .net1(getGravesBidirectionalLstmNet(RNNFormat.NCW, true, lastTimeStep, maskZeros))
+                    .net2(getGravesBidirectionalLstmNet(RNNFormat.NCW, false, lastTimeStep, maskZeros))
+                    .net3(getGravesBidirectionalLstmNet(RNNFormat.NWC, true, lastTimeStep, maskZeros))
+                    .net4(getGravesBidirectionalLstmNet(RNNFormat.NWC, false, lastTimeStep, maskZeros))
+                    .inNCW(inNCW)
+                    .labelsNCW((lastTimeStep)? labelsNWC: labelsNWC.permute(0, 2, 1))
+                    .labelsNWC(labelsNWC)
+                    .testLayerIdx(1)
+                    .build();
+
+            TestCase.testHelper(tc);
+
+
+        } finally {
+            Nd4j.getEnvironment().allowHelpers(true);
+        }
+    }
+
+
+    private MultiLayerNetwork getGravesBidirectionalLstmNet(RNNFormat format, boolean setOnLayerAlso, boolean lastTimeStep, boolean maskZeros) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new GravesBidirectionalLSTM.Builder().nOut(3)
+                    .dataFormat(format).build(), format, lastTimeStep, maskZeros);
+        } else {
+            return getNetWithLayer(new  GravesBidirectionalLSTM.Builder().nOut(3).build(), format, lastTimeStep, maskZeros);
+        }
+    }
+    private MultiLayerNetwork getGravesLstmNet(RNNFormat format, boolean setOnLayerAlso, boolean lastTimeStep, boolean maskZeros) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new GravesLSTM.Builder().nOut(3)
+                    .dataFormat(format).build(), format, lastTimeStep, maskZeros);
+        } else {
+            return getNetWithLayer(new GravesLSTM.Builder().nOut(3).build(), format, lastTimeStep, maskZeros);
+        }
+    }
+
+    private MultiLayerNetwork getLstmNet(RNNFormat format, boolean setOnLayerAlso, boolean lastTimeStep, boolean maskZeros) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new LSTM.Builder().nOut(3)
+                    .dataFormat(format).build(), format, lastTimeStep, maskZeros);
+        } else {
+            return getNetWithLayer(new LSTM.Builder().nOut(3).build(), format, lastTimeStep, maskZeros);
+        }
+    }
+
+    private MultiLayerNetwork getSimpleRnnNet(RNNFormat format, boolean setOnLayerAlso, boolean lastTimeStep, boolean maskZeros) {
+        if (setOnLayerAlso) {
+            return getNetWithLayer(new SimpleRnn.Builder().nOut(3)
+                    .dataFormat(format).build(), format, lastTimeStep, maskZeros);
+        } else {
+            return getNetWithLayer(new SimpleRnn.Builder().nOut(3).build(), format, lastTimeStep, maskZeros);
+        }
+    }
+    private MultiLayerNetwork getNetWithLayer(Layer layer, RNNFormat format, boolean lastTimeStep, boolean maskZeros) {
+        if (maskZeros){
+            layer = new MaskZeroLayer.Builder().setMaskValue(0.).setUnderlying(layer).build();
+        }
+        if(lastTimeStep){
+            layer = new LastTimeStep(layer);
+        }
+        NeuralNetConfiguration.ListBuilder builder = new NeuralNetConfiguration.Builder()
+                .seed(12345)
+                .list()
+                .layer(new LSTM.Builder()
+                        .nIn(3)
+                        .activation(Activation.TANH)
+                        .dataFormat(format)
+                        .nOut(3)
+                        .helperAllowFallback(false)
+                        .build())
+                .layer(layer)
+                .layer(
+                        (lastTimeStep)?new OutputLayer.Builder().activation(Activation.SOFTMAX).nOut(10).build():
+        new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).nOut(10).dataFormat(format).build()
+                )
+                .setInputType(InputType.recurrent(3, 12, format));
+
+        MultiLayerNetwork net = new MultiLayerNetwork(builder.build());
+        net.init();
+        return net;
+    }
+
+    @AllArgsConstructor
+    @Data
+    @NoArgsConstructor
+    @Builder
+    private static class TestCase {
+        private String msg;
+        private MultiLayerNetwork net1;
+        private MultiLayerNetwork net2;
+        private MultiLayerNetwork net3;
+        private MultiLayerNetwork net4;
+        private INDArray inNCW;
+        private INDArray labelsNCW;
+        private INDArray labelsNWC;
+        private int testLayerIdx;
+        private boolean nwcOutput;
+
+        public static void testHelper(TestCase tc) {
+
+            tc.net2.params().assign(tc.net1.params());
+            tc.net3.params().assign(tc.net1.params());
+            tc.net4.params().assign(tc.net1.params());
+
+            INDArray inNCW = tc.inNCW;
+            INDArray inNWC = tc.inNCW.permute(0, 2, 1).dup();
+
+            INDArray l0_1 = tc.net1.feedForward(inNCW).get(tc.testLayerIdx + 1);
+            INDArray l0_2 = tc.net2.feedForward(inNCW).get(tc.testLayerIdx + 1);
+            INDArray l0_3 = tc.net3.feedForward(inNWC).get(tc.testLayerIdx + 1);
+            INDArray l0_4 = tc.net4.feedForward(inNWC).get(tc.testLayerIdx + 1);
+
+            boolean rank3Out = tc.labelsNCW.rank() == 3;
+            assertEquals(tc.msg, l0_1, l0_2);
+            if (rank3Out){
+                assertEquals(tc.msg, l0_1, l0_3.permute(0, 2, 1));
+                assertEquals(tc.msg, l0_1, l0_4.permute(0, 2, 1));
+            }
+            else{
+                assertEquals(tc.msg, l0_1, l0_3);
+                assertEquals(tc.msg, l0_1, l0_4);
+            }
+            INDArray out1 = tc.net1.output(inNCW);
+            INDArray out2 = tc.net2.output(inNCW);
+            INDArray out3 = tc.net3.output(inNWC);
+            INDArray out4 = tc.net4.output(inNWC);
+
+            assertEquals(tc.msg, out1, out2);
+            if (rank3Out){
+                assertEquals(tc.msg, out1, out3.permute(0, 2, 1));      //NWC to NCW
+                assertEquals(tc.msg, out1, out4.permute(0, 2, 1));
+            }
+            else{
+                assertEquals(tc.msg, out1, out3);      //NWC to NCW
+                assertEquals(tc.msg, out1, out4);
+            }
+
+
+            //Test backprop
+            Pair<Gradient, INDArray> p1 = tc.net1.calculateGradients(inNCW, tc.labelsNCW, null, null);
+            Pair<Gradient, INDArray> p2 = tc.net2.calculateGradients(inNCW, tc.labelsNCW, null, null);
+            Pair<Gradient, INDArray> p3 = tc.net3.calculateGradients(inNWC, tc.labelsNWC, null, null);
+            Pair<Gradient, INDArray> p4 = tc.net4.calculateGradients(inNWC, tc.labelsNWC, null, null);
+
+            //Inpput gradients
+            assertEquals(tc.msg, p1.getSecond(), p2.getSecond());
+
+            assertEquals(tc.msg, p1.getSecond(), p3.getSecond().permute(0, 2, 1));  //Input gradients for NWC input are also in NWC format
+            assertEquals(tc.msg, p1.getSecond(), p4.getSecond().permute(0, 2, 1));
+
+
+            List<String> diff12 = differentGrads(p1.getFirst(), p2.getFirst());
+            List<String> diff13 = differentGrads(p1.getFirst(), p3.getFirst());
+            List<String> diff14 = differentGrads(p1.getFirst(), p4.getFirst());
+            assertEquals(tc.msg + " " + diff12, 0, diff12.size());
+            assertEquals(tc.msg + " " + diff13, 0, diff13.size());
+            assertEquals(tc.msg + " " + diff14, 0, diff14.size());
+
+            assertEquals(tc.msg, p1.getFirst().gradientForVariable(), p2.getFirst().gradientForVariable());
+            assertEquals(tc.msg, p1.getFirst().gradientForVariable(), p3.getFirst().gradientForVariable());
+            assertEquals(tc.msg, p1.getFirst().gradientForVariable(), p4.getFirst().gradientForVariable());
+
+            tc.net1.fit(inNCW, tc.labelsNCW);
+            tc.net2.fit(inNCW, tc.labelsNCW);
+            tc.net3.fit(inNWC, tc.labelsNWC);
+            tc.net4.fit(inNWC, tc.labelsNWC);
+
+            assertEquals(tc.msg, tc.net1.params(), tc.net2.params());
+            assertEquals(tc.msg, tc.net1.params(), tc.net3.params());
+            assertEquals(tc.msg, tc.net1.params(), tc.net4.params());
+
+            //Test serialization
+            MultiLayerNetwork net1a = TestUtils.testModelSerialization(tc.net1);
+            MultiLayerNetwork net2a = TestUtils.testModelSerialization(tc.net2);
+            MultiLayerNetwork net3a = TestUtils.testModelSerialization(tc.net3);
+            MultiLayerNetwork net4a = TestUtils.testModelSerialization(tc.net4);
+
+            out1 = tc.net1.output(inNCW);
+            assertEquals(tc.msg, out1, net1a.output(inNCW));
+            assertEquals(tc.msg, out1, net2a.output(inNCW));
+
+            if (rank3Out) {
+                assertEquals(tc.msg, out1, net3a.output(inNWC).permute(0, 2, 1));   //NWC to NCW
+                assertEquals(tc.msg, out1, net4a.output(inNWC).permute(0, 2, 1));
+            }
+            else{
+                assertEquals(tc.msg, out1, net3a.output(inNWC));   //NWC to NCW
+                assertEquals(tc.msg, out1, net4a.output(inNWC));
+            }
+        }
+
+    }
+    private static List<String> differentGrads(Gradient g1, Gradient g2){
+        List<String> differs = new ArrayList<>();
+        Map<String,INDArray> m1 = g1.gradientForVariable();
+        Map<String,INDArray> m2 = g2.gradientForVariable();
+        for(String s : m1.keySet()){
+            INDArray a1 = m1.get(s);
+            INDArray a2 = m2.get(s);
+            if(!a1.equals(a2)){
+                differs.add(s);
+            }
+        }
+        return differs;
+    }
+}
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestLastTimeStepLayer.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestLastTimeStepLayer.java
@ -21,6 +21,7 @@ import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.DenseLayer;
 import org.deeplearning4j.nn.conf.layers.LSTM;
@ -29,6 +30,8 @@ import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep;
 import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
 import org.deeplearning4j.nn.graph.ComputationGraph;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.indexing.NDArrayIndex;
@ -42,14 +45,25 @@ import static org.nd4j.linalg.activations.Activation.IDENTITY;
 import static org.nd4j.linalg.activations.Activation.TANH;
 import static org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction.MSE;

+
+@RunWith(Parameterized.class)
 public class TestLastTimeStepLayer extends BaseDL4JTest {
+    private RNNFormat rnnDataFormat;
+
+    public TestLastTimeStepLayer(RNNFormat rnnDataFormat){
+        this.rnnDataFormat = rnnDataFormat;
+    }
+    @Parameterized.Parameters(name="{0}")
+    public static Object[] params(){
+        return RNNFormat.values();
+    }

    @Test
    public void testLastTimeStepVertex() {

        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in")
                .addLayer("lastTS", new LastTimeStep(new SimpleRnn.Builder()
-                        .nIn(5).nOut(6).build()), "in")
+                        .nIn(5).nOut(6).dataFormat(rnnDataFormat).build()), "in")
                .setOutputs("lastTS")
                .build();

@ -59,9 +73,22 @@ public class TestLastTimeStepLayer extends BaseDL4JTest {
        //First: test without input mask array
        Nd4j.getRandom().setSeed(12345);
        Layer l = graph.getLayer("lastTS");
-        INDArray in = Nd4j.rand(new int[]{3, 5, 6});
+        INDArray in;
+        if (rnnDataFormat == RNNFormat.NCW){
+            in = Nd4j.rand(3, 5, 6);
+        }
+        else{
+            in = Nd4j.rand(3, 6, 5);
+        }
        INDArray outUnderlying = ((LastTimeStepLayer)l).getUnderlying().activate(in, false, LayerWorkspaceMgr.noWorkspaces());
-        INDArray expOut = outUnderlying.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(5));
+        INDArray expOut;
+        if (rnnDataFormat == RNNFormat.NCW){
+            expOut = outUnderlying.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(5));
+        }
+        else{
+            expOut = outUnderlying.get(NDArrayIndex.all(), NDArrayIndex.point(5), NDArrayIndex.all());
+        }
+


        //Forward pass:
@ -76,9 +103,17 @@ public class TestLastTimeStepLayer extends BaseDL4JTest {
        graph.setLayerMaskArrays(new INDArray[]{inMask}, null);

        expOut = Nd4j.zeros(3, 6);
-        expOut.putRow(0, outUnderlying.get(NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.point(2)));
-        expOut.putRow(1, outUnderlying.get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.point(3)));
-        expOut.putRow(2, outUnderlying.get(NDArrayIndex.point(2), NDArrayIndex.all(), NDArrayIndex.point(4)));
+        if (rnnDataFormat == RNNFormat.NCW){
+            expOut.putRow(0, outUnderlying.get(NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.point(2)));
+            expOut.putRow(1, outUnderlying.get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.point(3)));
+            expOut.putRow(2, outUnderlying.get(NDArrayIndex.point(2), NDArrayIndex.all(), NDArrayIndex.point(4)));
+        }
+        else{
+            expOut.putRow(0, outUnderlying.get(NDArrayIndex.point(0), NDArrayIndex.point(2), NDArrayIndex.all()));
+            expOut.putRow(1, outUnderlying.get(NDArrayIndex.point(1), NDArrayIndex.point(3), NDArrayIndex.all()));
+            expOut.putRow(2, outUnderlying.get(NDArrayIndex.point(2), NDArrayIndex.point(4), NDArrayIndex.all()));
+        }
+

        outFwd = l.activate(in, false, LayerWorkspaceMgr.noWorkspaces());
        assertEquals(expOut, outFwd);
@ -97,9 +132,9 @@ public class TestLastTimeStepLayer extends BaseDL4JTest {
                .seed(1234)
                .graphBuilder()
                .addInputs("in")
-                .setInputTypes(InputType.recurrent(1))
+                .setInputTypes(InputType.recurrent(1, rnnDataFormat))
                .addLayer("RNN", new LastTimeStep(new LSTM.Builder()
-                        .nOut(10)
+                        .nOut(10).dataFormat(rnnDataFormat)
                        .build()), "in")
                .addLayer("dense", new DenseLayer.Builder()
                        .nOut(10)
@ -120,7 +155,9 @@ public class TestLastTimeStepLayer extends BaseDL4JTest {
        INDArray fm2 = Nd4j.zeros(1,24);
        INDArray fm3 = Nd4j.zeros(1,24);
        fm3.get(NDArrayIndex.point(0), NDArrayIndex.interval(0,5)).assign(1);
-
+        if (rnnDataFormat == RNNFormat.NWC){
+            f = f.permute(0, 2, 1);
+        }
        INDArray[] out1 = cg.output(false, new INDArray[]{f}, new INDArray[]{fm1});
        try {
            cg.output(false, new INDArray[]{f}, new INDArray[]{fm2});
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestRnnLayers.java
@ -20,6 +20,7 @@ import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.dropout.TestDropout;
 import org.deeplearning4j.nn.conf.layers.GravesLSTM;
 import org.deeplearning4j.nn.conf.layers.LSTM;
@ -31,6 +32,8 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.nn.weights.WeightInit;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -41,13 +44,24 @@ import org.nd4j.linalg.primitives.Pair;

 import java.util.Arrays;
 import java.util.List;
+import java.util.Random;

 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;

+@RunWith(Parameterized.class)
 public class TestRnnLayers extends BaseDL4JTest {

+    private RNNFormat rnnDataFormat;
+
+    public TestRnnLayers(RNNFormat rnnDataFormat){
+        this.rnnDataFormat = rnnDataFormat;
+    }
+    @Parameterized.Parameters
+    public static Object[] params(){
+        return RNNFormat.values();
+    }
    @Test
    public void testTimeStepIs3Dimensional() {

@ -58,8 +72,8 @@ public class TestRnnLayers extends BaseDL4JTest {
                .updater(new NoOp())
                .weightInit(WeightInit.XAVIER)
                .list()
-                .layer(new SimpleRnn.Builder().nIn(nIn).nOut(3).build())
-                .layer(new LSTM.Builder().nIn(3).nOut(5).build())
+                .layer(new SimpleRnn.Builder().nIn(nIn).nOut(3).dataFormat(rnnDataFormat).build())
+                .layer(new LSTM.Builder().nIn(3).nOut(5).dataFormat(rnnDataFormat).build())
                .layer(new RnnOutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX).build())
                .build();

@ -70,9 +84,9 @@ public class TestRnnLayers extends BaseDL4JTest {
        org.deeplearning4j.nn.layers.recurrent.SimpleRnn simpleRnn =
                (org.deeplearning4j.nn.layers.recurrent.SimpleRnn) net.getLayer(0);

-        INDArray rnnInput3d = Nd4j.create(10, 12, 1);
+        INDArray rnnInput3d = (rnnDataFormat==RNNFormat.NCW)?Nd4j.create(10,12, 1):Nd4j.create(10, 1, 12);
        INDArray simpleOut = simpleRnn.rnnTimeStep(rnnInput3d, LayerWorkspaceMgr.noWorkspaces());
-        assertTrue(Arrays.equals(simpleOut.shape(), new long[] {10, 3, 1}));
+        assertTrue(Arrays.equals(simpleOut.shape(), (rnnDataFormat==RNNFormat.NCW)?new long[] {10, 3, 1}:new long[]{10, 1, 3}));

        INDArray rnnInput2d = Nd4j.create(10, 12);
        try {
@ -84,9 +98,9 @@ public class TestRnnLayers extends BaseDL4JTest {
        org.deeplearning4j.nn.layers.recurrent.LSTM lstm =
                (org.deeplearning4j.nn.layers.recurrent.LSTM) net.getLayer(1);

-        INDArray lstmInput3d = Nd4j.create(10, 3, 1);
+        INDArray lstmInput3d = (rnnDataFormat==RNNFormat.NCW)?Nd4j.create(10, 3, 1):Nd4j.create(10, 1, 3);
        INDArray lstmOut = lstm.rnnTimeStep(lstmInput3d, LayerWorkspaceMgr.noWorkspaces());
-        assertTrue(Arrays.equals(lstmOut.shape(), new long[] {10, 5, 1}));
+        assertTrue(Arrays.equals(lstmOut.shape(), (rnnDataFormat==RNNFormat.NCW)?new long[] {10, 5, 1}:new long[]{10, 1, 5}));

        INDArray lstmInput2d = Nd4j.create(10, 3);
        try {
@ -112,19 +126,19 @@ public class TestRnnLayers extends BaseDL4JTest {
            TestDropout.CustomDropout cd = new TestDropout.CustomDropout();
            switch (s){
                case "graves":
-                    layer = new GravesLSTM.Builder().activation(Activation.TANH).nIn(10).nOut(10).build();
-                    layerD = new GravesLSTM.Builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).build();
-                    layerD2 = new GravesLSTM.Builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).build();
+                    layer = new GravesLSTM.Builder().activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build();
+                    layerD = new GravesLSTM.Builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build();
+                    layerD2 = new GravesLSTM.Builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build();
                    break;
                case "lstm":
-                    layer = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().activation(Activation.TANH).nIn(10).nOut(10).build();
-                    layerD = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).build();
-                    layerD2 = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).build();
+                    layer = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build();
+                    layerD = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build();
+                    layerD2 = new org.deeplearning4j.nn.conf.layers.LSTM.Builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build();
                    break;
                case "simple":
-                    layer = new SimpleRnn.Builder().activation(Activation.TANH).nIn(10).nOut(10).build();
-                    layerD = new SimpleRnn.Builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).build();
-                    layerD2 = new SimpleRnn.Builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).build();
+                    layer = new SimpleRnn.Builder().activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build();
+                    layerD = new SimpleRnn.Builder().dropOut(0.5).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build();
+                    layerD2 = new SimpleRnn.Builder().dropOut(cd).activation(Activation.TANH).nIn(10).nOut(10).dataFormat(rnnDataFormat).build();
                    break;
                default:
                    throw new RuntimeException(s);
@ -134,21 +148,21 @@ public class TestRnnLayers extends BaseDL4JTest {
                    .seed(12345)
                    .list()
                    .layer(layer)
-                    .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build())
+                    .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build())
                    .build();

            MultiLayerConfiguration confD = new NeuralNetConfiguration.Builder()
                    .seed(12345)
                    .list()
                    .layer(layerD)
-                    .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build())
+                    .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build())
                    .build();

            MultiLayerConfiguration confD2 = new NeuralNetConfiguration.Builder()
                    .seed(12345)
                    .list()
                    .layer(layerD2)
-                    .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build())
+                    .layer(new RnnOutputLayer.Builder().activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).dataFormat(rnnDataFormat).build())
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -178,7 +192,6 @@ public class TestRnnLayers extends BaseDL4JTest {
            assertNotEquals(s, out2, out2D);

            INDArray l = TestUtils.randomOneHotTimeSeries(3, 10, 10, 12345);
-
            net.fit(f.dup(), l);
            netD.fit(f.dup(), l);
            assertNotEquals(s, net.params(), netD.params());
@ -205,14 +218,14 @@ public class TestRnnLayers extends BaseDL4JTest {
            NeuralNetConfiguration.ListBuilder lb = new NeuralNetConfiguration.Builder()

                    .list()
-                    .layer(new SimpleRnn.Builder().nIn(5).nOut(5).build());
+                    .layer(new SimpleRnn.Builder().nIn(5).nOut(5).dataFormat(rnnDataFormat).build());

            switch (i){
                case 0:
-                    lb.layer(new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(5).build());
+                    lb.layer(new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(5).dataFormat(rnnDataFormat).build());
                    break;
                case 1:
-                    lb.layer(new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build());
+                    lb.layer(new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).dataFormat(rnnDataFormat).build());
                    break;
                default:
                    throw new RuntimeException();
@ -223,14 +236,14 @@ public class TestRnnLayers extends BaseDL4JTest {
            net.init();

            INDArray in = Nd4j.rand(DataType.FLOAT, 3, 5, 5);
-            INDArray l = TestUtils.randomOneHotTimeSeries(3, 5, 10);
-
+            INDArray l = TestUtils.randomOneHotTimeSeries(rnnDataFormat, 3, 5, 10, new Random(12345));
            try{
                net.fit(in,l);
            } catch (Throwable t){
                String msg = t.getMessage();
                if(msg == null)
                    t.printStackTrace();
+                System.out.println(i);
                assertTrue(msg, msg != null && msg.contains("sequence length") && msg.contains("input") && msg.contains("label"));
            }

--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestSimpleRnn.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestSimpleRnn.java
@ -20,10 +20,13 @@ import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.nn.weights.WeightInit;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -36,8 +39,18 @@ import static org.nd4j.linalg.indexing.NDArrayIndex.all;
 import static org.nd4j.linalg.indexing.NDArrayIndex.interval;
 import static org.nd4j.linalg.indexing.NDArrayIndex.point;

+@RunWith(Parameterized.class)
 public class TestSimpleRnn extends BaseDL4JTest {

+    private RNNFormat rnnDataFormat;
+
+    public TestSimpleRnn(RNNFormat rnnDataFormat){
+        this.rnnDataFormat = rnnDataFormat;
+    }
+    @Parameterized.Parameters
+    public static Object[] params(){
+        return RNNFormat.values();
+    }
    @Test
    public void testSimpleRnn(){
        Nd4j.getRandom().setSeed(12345);
@ -46,15 +59,21 @@ public class TestSimpleRnn extends BaseDL4JTest {
        int nIn = 5;
        int layerSize = 6;
        int tsLength = 7;
-        INDArray in = Nd4j.rand(DataType.FLOAT, new int[]{m, nIn, tsLength});
-//        in.get(all(), all(), interval(1,tsLength)).assign(0);
+        INDArray in;
+        if (rnnDataFormat == RNNFormat.NCW){
+            in = Nd4j.rand(DataType.FLOAT, m, nIn, tsLength);
+        }
+        else{
+            in = Nd4j.rand(DataType.FLOAT, m, tsLength, nIn);
+        }
+

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .updater(new NoOp())
                .weightInit(WeightInit.XAVIER)
                .activation(Activation.TANH)
                .list()
-                .layer(new SimpleRnn.Builder().nIn(nIn).nOut(layerSize).build())
+                .layer(new SimpleRnn.Builder().nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat).build())
                .build();

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
@ -68,7 +87,13 @@ public class TestSimpleRnn extends BaseDL4JTest {

        INDArray outLast = null;
        for( int i=0; i<tsLength; i++ ){
-            INDArray inCurrent = in.get(all(), all(), point(i));
+            INDArray inCurrent;
+            if (rnnDataFormat == RNNFormat.NCW){
+                inCurrent = in.get(all(), all(), point(i));
+            }
+            else{
+                inCurrent = in.get(all(), point(i), all());
+            }

            INDArray outExpCurrent = inCurrent.mmul(w);
            if(outLast != null){
@ -79,7 +104,13 @@ public class TestSimpleRnn extends BaseDL4JTest {

            Transforms.tanh(outExpCurrent, false);

-            INDArray outActCurrent = out.get(all(), all(), point(i));
+            INDArray outActCurrent;
+            if (rnnDataFormat == RNNFormat.NCW){
+                outActCurrent = out.get(all(), all(), point(i));
+            }
+            else{
+                outActCurrent = out.get(all(), point(i), all());
+            }
            assertEquals(String.valueOf(i), outExpCurrent, outActCurrent);

            outLast = outExpCurrent;
@ -100,7 +131,7 @@ public class TestSimpleRnn extends BaseDL4JTest {
                .weightInit(WeightInit.XAVIER)
                .activation(Activation.TANH)
                .list()
-                .layer(new SimpleRnn.Builder().nIn(nIn).nOut(layerSize)
+                .layer(new SimpleRnn.Builder().nIn(nIn).nOut(layerSize).dataFormat(rnnDataFormat)
                        .biasInit(100)
                        .build())
                .build();
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java
@ -4,14 +4,21 @@ import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.WorkspaceMode;
 import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.conf.layers.DenseLayer;
+import org.deeplearning4j.nn.conf.layers.*;
+import org.deeplearning4j.nn.conf.layers.BaseRecurrentLayer;
 import org.deeplearning4j.nn.conf.layers.LSTM;
 import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
+import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional;
+import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
 import org.deeplearning4j.nn.conf.layers.recurrent.TimeDistributed;
+import org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
@ -22,8 +29,18 @@ import org.nd4j.linalg.lossfunctions.LossFunctions;

 import static org.junit.Assert.assertEquals;

+@RunWith(Parameterized.class)
 public class TestTimeDistributed extends BaseDL4JTest {

+    private RNNFormat rnnDataFormat;
+
+    public TestTimeDistributed(RNNFormat rnnDataFormat){
+        this.rnnDataFormat = rnnDataFormat;
+    }
+    @Parameterized.Parameters
+    public static Object[] params(){
+        return RNNFormat.values();
+    }
    @Test
    public void testTimeDistributed(){
        for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
@ -34,11 +51,11 @@ public class TestTimeDistributed extends BaseDL4JTest {
                    .seed(12345)
                    .updater(new Adam(0.1))
                    .list()
-                    .layer(new LSTM.Builder().nIn(3).nOut(3).build())
+                    .layer(new LSTM.Builder().nIn(3).nOut(3).dataFormat(rnnDataFormat).build())
                    .layer(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build())
-                    .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX)
+                    .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).dataFormat(rnnDataFormat)
                            .lossFunction(LossFunctions.LossFunction.MCXENT).build())
-                    .setInputType(InputType.recurrent(3))
+                    .setInputType(InputType.recurrent(3, rnnDataFormat))
                    .build();

            MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
@ -47,11 +64,11 @@ public class TestTimeDistributed extends BaseDL4JTest {
                    .seed(12345)
                    .updater(new Adam(0.1))
                    .list()
-                    .layer(new LSTM.Builder().nIn(3).nOut(3).build())
-                    .layer(new TimeDistributed(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build(), 2))
-                    .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX)
+                    .layer(new LSTM.Builder().nIn(3).nOut(3).dataFormat(rnnDataFormat).build())
+                    .layer(new TimeDistributed(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build(), rnnDataFormat))
+                    .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).dataFormat(rnnDataFormat)
                            .lossFunction(LossFunctions.LossFunction.MCXENT).build())
-                    .setInputType(InputType.recurrent(3))
+                    .setInputType(InputType.recurrent(3, rnnDataFormat))
                    .build();

            MultiLayerNetwork net1 = new MultiLayerNetwork(conf1);
@ -62,13 +79,21 @@ public class TestTimeDistributed extends BaseDL4JTest {
            for( int mb : new int[]{1, 5}) {
                for(char inLabelOrder : new char[]{'c', 'f'}) {
                    INDArray in = Nd4j.rand(DataType.FLOAT, mb, 3, 5).dup(inLabelOrder);
-
+                    if (rnnDataFormat == RNNFormat.NWC){
+                        in = in.permute(0, 2, 1);
+                    }
                    INDArray out1 = net1.output(in);
                    INDArray out2 = net2.output(in);
-
                    assertEquals(out1, out2);

-                    INDArray labels = TestUtils.randomOneHotTimeSeries(mb, 3, 5).dup(inLabelOrder);
+                    INDArray labels ;
+                    if (rnnDataFormat == RNNFormat.NCW) {
+                        labels = TestUtils.randomOneHotTimeSeries(mb, 3, 5).dup(inLabelOrder);
+                    }else{
+                        labels = TestUtils.randomOneHotTimeSeries(mb, 5, 3).dup(inLabelOrder);
+                    }
+
+

                    DataSet ds = new DataSet(in, labels);
                    net1.fit(ds);
@ -85,4 +110,73 @@ public class TestTimeDistributed extends BaseDL4JTest {
            }
        }
    }
+
+
+    @Test
+    public void testTimeDistributedDense(){
+
+        for( int rnnType=0; rnnType<3; rnnType++ ) {
+            for( int ffType=0; ffType<3; ffType++ ) {
+
+                Layer l0, l2;
+                switch (rnnType) {
+                    case 0:
+                        l0 = new LSTM.Builder().nOut(5).build();
+                        l2 = new LSTM.Builder().nOut(5).build();
+                        break;
+                    case 1:
+                        l0 = new SimpleRnn.Builder().nOut(5).build();
+                        l2 = new SimpleRnn.Builder().nOut(5).build();
+                        break;
+                    case 2:
+                        l0 = new Bidirectional(new LSTM.Builder().nOut(5).build());
+                        l2 = new Bidirectional(new LSTM.Builder().nOut(5).build());
+                        break;
+                    default:
+                        throw new RuntimeException("Not implemented: " + rnnType);
+                }
+
+                Layer l1;
+                switch (ffType){
+                    case 0:
+                        l1 = new DenseLayer.Builder().nOut(5).build();
+                        break;
+                    case 1:
+                        l1 = new VariationalAutoencoder.Builder().nOut(5).encoderLayerSizes(5).decoderLayerSizes(5).build();
+                        break;
+                    case 2:
+                        l1 = new AutoEncoder.Builder().nOut(5).build();
+                        break;
+                    default:
+                        throw new RuntimeException("Not implemented: " + ffType);
+                }
+
+                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+                        .activation(Activation.TANH)
+                        .list()
+                        .layer(l0)
+                        .layer(l1)
+                        .layer(l2)
+                        .setInputType(InputType.recurrent(5, 9, rnnDataFormat))
+                        .build();
+
+                BaseRecurrentLayer l0a;
+                BaseRecurrentLayer l2a;
+                if (rnnType < 2) {
+                    l0a = (BaseRecurrentLayer) l0;
+                    l2a = (BaseRecurrentLayer) l2;
+                } else {
+                    l0a = (BaseRecurrentLayer) ((Bidirectional) l0).getFwd();
+                    l2a = (BaseRecurrentLayer) ((Bidirectional) l2).getFwd();
+                }
+                assertEquals(rnnDataFormat, l0a.getRnnDataFormat());
+                assertEquals(rnnDataFormat, l2a.getRnnDataFormat());
+
+                MultiLayerNetwork net = new MultiLayerNetwork(conf);
+                net.init();
+                INDArray in = Nd4j.rand(DataType.FLOAT, rnnDataFormat == RNNFormat.NCW ? new long[]{2, 5, 9} : new long[]{2, 9, 5} );
+                net.output(in);
+            }
+        }
+    }
 }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/MultiLayerTestRNN.java
@ -771,7 +771,7 @@ public class MultiLayerTestRNN extends BaseDL4JTest {
                    .build();
            fail("Exception expected");
        } catch (IllegalStateException e){
-//            e.printStackTrace();
+            log.info(e.toString());
            assertTrue(e.getMessage().contains("TBPTT") && e.getMessage().contains("validateTbpttConfig"));
        }
    }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/multilayer/TestVariableLengthTS.java
@ -408,8 +408,8 @@ public class TestVariableLengthTS extends BaseDL4JTest {
                                    INDArray outRow2 = out2.get(NDArrayIndex.point(i), NDArrayIndex.all(),
                                                    NDArrayIndex.point(j));
                                    for (int k = 0; k < nOut; k++) {
-                                        assertEquals(outRow.getDouble(k), 0.0, 0.0);
-                                        assertEquals(outRow2.getDouble(k), 0.0, 0.0);
+                                        assertEquals(0.0, outRow.getDouble(k), 0.0);
+                                        assertEquals(0.0, outRow2.getDouble(k), 0.0);
                                    }
                                }
                            }
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/updater/TestUpdaters.java
@ -336,7 +336,7 @@ public class TestUpdaters extends BaseDL4JTest {
            actualM[i] = Math.round(actualM[i] * 1e2) / 1e2;
        }

-        assertEquals("Wrong weight gradient after first iteration's update", Arrays.equals(actualM, expectedM), true);
+        assertEquals("Wrong weight gradient after first iteration's update", Arrays.equals(expectedM, actualM), true);

    }

--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest050.java
@ -159,14 +159,14 @@ public class RegressionTest050 extends BaseDL4JTest {
        assertArrayEquals(new int[] {2, 2}, l0.getKernelSize());
        assertArrayEquals(new int[] {1, 1}, l0.getStride());
        assertArrayEquals(new int[] {0, 0}, l0.getPadding());
-        assertEquals(l0.getConvolutionMode(), ConvolutionMode.Truncate); //Pre-0.7.0: no ConvolutionMode. Want to default to truncate here if not set
+        assertEquals(ConvolutionMode.Truncate, l0.getConvolutionMode()); //Pre-0.7.0: no ConvolutionMode. Want to default to truncate here if not set

        SubsamplingLayer l1 = (SubsamplingLayer) conf.getConf(1).getLayer();
        assertArrayEquals(new int[] {2, 2}, l1.getKernelSize());
        assertArrayEquals(new int[] {1, 1}, l1.getStride());
        assertArrayEquals(new int[] {0, 0}, l1.getPadding());
        assertEquals(PoolingType.MAX, l1.getPoolingType());
-        assertEquals(l1.getConvolutionMode(), ConvolutionMode.Truncate); //Pre-0.7.0: no ConvolutionMode. Want to default to truncate here if not set
+        assertEquals(ConvolutionMode.Truncate, l1.getConvolutionMode()); //Pre-0.7.0: no ConvolutionMode. Want to default to truncate here if not set

        OutputLayer l2 = (OutputLayer) conf.getConf(2).getLayer();
        assertEquals("sigmoid", l2.getActivationFn().toString());
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest060.java
@ -162,14 +162,14 @@ public class RegressionTest060 extends BaseDL4JTest {
        assertArrayEquals(new int[] {2, 2}, l0.getKernelSize());
        assertArrayEquals(new int[] {1, 1}, l0.getStride());
        assertArrayEquals(new int[] {0, 0}, l0.getPadding());
-        assertEquals(l0.getConvolutionMode(), ConvolutionMode.Truncate); //Pre-0.7.0: no ConvolutionMode. Want to default to truncate here if not set
+        assertEquals(ConvolutionMode.Truncate, l0.getConvolutionMode()); //Pre-0.7.0: no ConvolutionMode. Want to default to truncate here if not set

        SubsamplingLayer l1 = (SubsamplingLayer) conf.getConf(1).getLayer();
        assertArrayEquals(new int[] {2, 2}, l1.getKernelSize());
        assertArrayEquals(new int[] {1, 1}, l1.getStride());
        assertArrayEquals(new int[] {0, 0}, l1.getPadding());
        assertEquals(PoolingType.MAX, l1.getPoolingType());
-        assertEquals(l1.getConvolutionMode(), ConvolutionMode.Truncate); //Pre-0.7.0: no ConvolutionMode. Want to default to truncate here if not set
+        assertEquals(ConvolutionMode.Truncate, l1.getConvolutionMode()); //Pre-0.7.0: no ConvolutionMode. Want to default to truncate here if not set

        OutputLayer l2 = (OutputLayer) conf.getConf(2).getLayer();
        assertEquals("sigmoid", l2.getActivationFn().toString());
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest071.java
@ -162,7 +162,7 @@ public class RegressionTest071 extends BaseDL4JTest {
        assertArrayEquals(new int[] {2, 2}, l0.getKernelSize());
        assertArrayEquals(new int[] {1, 1}, l0.getStride());
        assertArrayEquals(new int[] {0, 0}, l0.getPadding());
-        assertEquals(l0.getConvolutionMode(), ConvolutionMode.Same);
+        assertEquals(ConvolutionMode.Same, l0.getConvolutionMode());

        SubsamplingLayer l1 = (SubsamplingLayer) conf.getConf(1).getLayer();
        assertArrayEquals(new int[] {2, 2}, l1.getKernelSize());
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest080.java
@ -176,14 +176,14 @@ public class RegressionTest080 extends BaseDL4JTest {
        assertArrayEquals(new int[] {2, 2}, l0.getKernelSize());
        assertArrayEquals(new int[] {1, 1}, l0.getStride());
        assertArrayEquals(new int[] {0, 0}, l0.getPadding());
-        assertEquals(l0.getConvolutionMode(), ConvolutionMode.Same);
+        assertEquals(ConvolutionMode.Same, l0.getConvolutionMode());

        SubsamplingLayer l1 = (SubsamplingLayer) conf.getConf(1).getLayer();
        assertArrayEquals(new int[] {2, 2}, l1.getKernelSize());
        assertArrayEquals(new int[] {1, 1}, l1.getStride());
        assertArrayEquals(new int[] {0, 0}, l1.getPadding());
        assertEquals(PoolingType.MAX, l1.getPoolingType());
-        assertEquals(l1.getConvolutionMode(), ConvolutionMode.Same);
+        assertEquals(ConvolutionMode.Same, l1.getConvolutionMode());

        OutputLayer l2 = (OutputLayer) conf.getConf(2).getLayer();
        assertTrue(l2.getActivationFn() instanceof ActivationSigmoid);
--- a/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/BaseCudnnHelper.java
+++ b/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/BaseCudnnHelper.java
@ -178,8 +178,6 @@ public abstract class BaseCudnnHelper {
        }
    }

-    protected static final int TENSOR_FORMAT = CUDNN_TENSOR_NCHW;
-
    protected final DataType nd4jDataType;
    protected final int dataType;
    protected final int dataTypeSize;
--- a/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java
+++ b/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/CudnnConvolutionHelper.java
@ -1,5 +1,6 @@
 /*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
+ * Copyright (c) 2020 Konduit K.K.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
@ -22,6 +23,7 @@ import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import com.jakewharton.byteunits.BinaryByteUnit;
 import org.bytedeco.javacpp.Pointer;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.AlgoMode;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer.BwdDataAlgo;
@ -86,7 +88,7 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
        }

        private cudnnTensorStruct srcTensorDesc = new cudnnTensorStruct(), dstTensorDesc = new cudnnTensorStruct(),
-                        biasTensorDesc = new cudnnTensorStruct(), deltaTensorDesc = new cudnnTensorStruct();
+                biasTensorDesc = new cudnnTensorStruct(), deltaTensorDesc = new cudnnTensorStruct();
        private cudnnFilterStruct filterDesc = new cudnnFilterStruct();
        private cudnnConvolutionStruct convDesc = new cudnnConvolutionStruct();
        private cudnnActivationStruct activationDesc = new cudnnActivationStruct();
@ -138,7 +140,21 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
    public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray weights, INDArray bias, INDArray delta, int[] kernel,
                                                     int[] strides, int[] pad, INDArray biasGradView, INDArray weightGradView, IActivation afn,
                                                     AlgoMode mode, BwdFilterAlgo bwdFilterAlgo, BwdDataAlgo bwdDataAlgo,
-                                                     ConvolutionMode convolutionMode, int[] dilation, LayerWorkspaceMgr workspaceMgr) {
+                                                     ConvolutionMode convolutionMode, int[] dilation, CNN2DFormat format, LayerWorkspaceMgr workspaceMgr) {
+
+        //AB 2020/04/21 - cuDNN does have NHWC support (with limitations) however I have been unable to get it working
+        // correctly on NHWC data, even after updating all descriptors, tensor format, etc.
+        //Therefore: all computation here is done in NCHW format only
+        //As of a future (next?) release we'll likely switch to C++ for cuDNN support
+        boolean origNHWC = false;
+        if(format == CNN2DFormat.NHWC){
+            input = input.permute(0,3,1,2); //NHWC to NCHW
+            delta = delta.permute(0,3,1,2);
+            origNHWC = true;
+        }
+
+        int TENSOR_FORMAT = CUDNN_TENSOR_NCHW;
+
        int code;

        val miniBatch = input.size(0);
@ -147,7 +163,7 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
        val kH = weights.size(2);
        val kW = weights.size(3);

-        CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode, null);
+        CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode, null, CNN2DFormat.NCHW);    //Note hardcoded NCHW due to above
        input = args.getInput();
        val inH = input.size(2);
        val inW = input.size(3);
@ -176,7 +192,7 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
                (int) deltaStride[0], (int) deltaStride[1], (int) deltaStride[2], (int) deltaStride[3]);
        checkCudnn(false, "cudnnSetTensor4dDescriptorEx", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);
        code = cudnnSetConvolution2dDescriptor(cudnnContext.convDesc, pad[0], pad[1], strides[0], strides[1], dilation[0],
-                        dilation[1], CUDNN_CROSS_CORRELATION, dataType);
+                dilation[1], CUDNN_CROSS_CORRELATION, dataType);
        checkCudnn(false, "cudnnSetConvolution2dDescriptor", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);
        code = cudnnSetFilter4dDescriptor(cudnnContext.filterDesc, dataType, TENSOR_FORMAT, (int) outDepth, (int) inDepth, (int) kH, (int) kW);
        checkCudnn(false, "cudnnSetFilter4dDescriptor", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);
@ -238,16 +254,16 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
            }
        } else {
            code = cudnnGetConvolutionBackwardFilterAlgorithm(cudnnContext, cudnnContext.srcTensorDesc,
-                            cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.filterDesc,
-                            mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE
-                                            : CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
-                            0, algo1);
+                    cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.filterDesc,
+                    mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE
+                            : CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
+                    0, algo1);
            checkCudnn(false, "cudnnGetConvolutionBackwardFilterAlgorithm", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);
            code = cudnnGetConvolutionBackwardDataAlgorithm(cudnnContext, cudnnContext.filterDesc,
-                            cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.srcTensorDesc,
-                            mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE
-                                            : CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
-                            0, algo2);
+                    cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.srcTensorDesc,
+                    mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE
+                            : CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
+                    0, algo2);
            checkCudnn(false, "cudnnGetConvolutionBackwardDataAlgorithm", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);
        }

@ -263,7 +279,7 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti

        Allocator allocator = AtomicAllocator.getInstance();
        CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, weights, weightGradView,
-                        biasGradView, delta, epsNext);
+                biasGradView, delta, epsNext);
        Pointer srcData = allocator.getPointer(input, context);
        Pointer filterData = allocator.getPointer(weights, context);
        Pointer filterGradData = allocator.getPointer(weightGradView, context);
@ -279,14 +295,14 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
        checkCudnn(false, "cudnnSetTensor4dDescriptorEx", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);

        code = cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnContext, cudnnContext.srcTensorDesc,
-                        cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.filterDesc, algo1[0],
-                        sizeInBytes);
+                cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.filterDesc, algo1[0],
+                sizeInBytes);
        checkCudnn(false, "cudnnGetConvolutionBackwardFilterWorkspaceSize", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);

        long sizeInBytes1 = sizeInBytes.get(0);
        code = cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnContext, cudnnContext.filterDesc,
-                        cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.dstTensorDesc, algo2[0],
-                        sizeInBytes);
+                cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.dstTensorDesc, algo2[0],
+                sizeInBytes);
        checkCudnn(false, "cudnnGetConvolutionBackwardDataWorkspaceSize", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);

        DataCache workSpace = workspaceMgr.getHelperWorkspace(LayerWorkspaceMgr.CUDNN_WORKSPACE_KEY);
@ -313,21 +329,21 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
        checkCudnn(false, "cudnnSetTensor4dDescriptor", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);

        code = cudnnConvolutionBackwardBias(cudnnContext, alpha, cudnnContext.deltaTensorDesc, deltaData, beta,
-                        cudnnContext.biasTensorDesc, biasGradData);
+                cudnnContext.biasTensorDesc, biasGradData);
        checkCudnn(false, "cudnnConvolutionBackwardBias", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);

        code = cudnnConvolutionBackwardFilter(cudnnContext, alpha, cudnnContext.srcTensorDesc, srcData,
-                        cudnnContext.deltaTensorDesc, deltaData, cudnnContext.convDesc, algo1[0], workSpace,
-                        workSpace.capacity(), beta, cudnnContext.filterDesc, filterGradData);
+                cudnnContext.deltaTensorDesc, deltaData, cudnnContext.convDesc, algo1[0], workSpace,
+                workSpace.capacity(), beta, cudnnContext.filterDesc, filterGradData);
        checkCudnn(false, "cudnnConvolutionBackwardFilter", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);

        code = cudnnConvolutionBackwardData(cudnnContext, alpha, cudnnContext.filterDesc, filterData,
-                        cudnnContext.deltaTensorDesc, deltaData, cudnnContext.convDesc, algo2[0], workSpace,
-                        workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData);
+                cudnnContext.deltaTensorDesc, deltaData, cudnnContext.convDesc, algo2[0], workSpace,
+                workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData);
        checkCudnn(false, "cudnnConvolutionBackwardData", code, input, weights, null, delta, kernel, strides, pad, mode, null, bwdFilterAlgo, bwdDataAlgo, convolutionMode, dilation);

        allocator.getFlowController().registerActionAllWrite(context, input, weights, weightGradView, biasGradView,
-                        delta, epsNext);
+                delta, epsNext);

        Gradient retGradient = new DefaultGradient();
        retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView);
@ -344,12 +360,30 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
                    interval(0, epsNext.size(3) - (args.isManualPadRight() ? 1 : 0)));
        }

+        if(origNHWC){
+            epsNext = epsNext.permute(0,2,3,1);     //NCHW to NHWC
+        }
+
        return new Pair<>(retGradient, epsNext);
    }

    @Override
    public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] kernel, int[] strides, int[] pad,
-                              AlgoMode mode, FwdAlgo fwdAlgo, ConvolutionMode convolutionMode, int[] dilation, LayerWorkspaceMgr workspaceMgr) {
+                              AlgoMode mode, FwdAlgo fwdAlgo, ConvolutionMode convolutionMode, int[] dilation, CNN2DFormat format,
+                              LayerWorkspaceMgr workspaceMgr) {
+
+        //AB 2020/04/21 - cuDNN does have NHWC support (with limitations) however I have been unable to get it working
+        // correctly on NHWC data, even after updating all descriptors, tensor format, etc.
+        //Therefore: all computation here is done in NCHW format only
+        //As of a future (next?) release we'll likely switch to C++ for cuDNN support
+        boolean origNHWC = false;
+        if(format == CNN2DFormat.NHWC){
+            input = input.permute(0,3,1,2); //NHWC to NCHW
+            origNHWC = true;
+        }
+
+        int TENSOR_FORMAT = CUDNN_TENSOR_NCHW;
+
        int code;

        val miniBatch = input.size(0);
@ -358,7 +392,7 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
        val kH = weights.size(2);
        val kW = weights.size(3);

-        CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode, null);
+        CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode, null, CNN2DFormat.NCHW);        //Note hardcoded NCHW due to above
        input = args.getInput();
        val inH = input.size(2);
        val inW = input.size(3);
@ -378,7 +412,7 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
        checkCudnn(true, "cudnnSetFilter4dDescriptor", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation);

        code = cudnnSetConvolution2dDescriptor(cudnnContext.convDesc, pad[0], pad[1], strides[0], strides[1], dilation[0],
-                        dilation[1], CUDNN_CROSS_CORRELATION, dataType);
+                dilation[1], CUDNN_CROSS_CORRELATION, dataType);
        checkCudnn(true, "cudnnSetConvolution2dDescriptor", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation);


@ -460,8 +494,8 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
        checkCudnn(true, "cudnnSetStream", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation);

        code = cudnnGetConvolutionForwardWorkspaceSize(cudnnContext, cudnnContext.srcTensorDesc,
-                        cudnnContext.filterDesc, cudnnContext.convDesc, cudnnContext.dstTensorDesc, algo[0],
-                        sizeInBytes);
+                cudnnContext.filterDesc, cudnnContext.convDesc, cudnnContext.dstTensorDesc, algo[0],
+                sizeInBytes);
        checkCudnn(true, "cudnnGetConvolutionForwardWorkspaceSize", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation);

        DataCache workSpace = workspaceMgr.getHelperWorkspace(LayerWorkspaceMgr.CUDNN_WORKSPACE_KEY);
@ -482,8 +516,8 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
            workspaceMgr.setHelperWorkspace(LayerWorkspaceMgr.CUDNN_WORKSPACE_KEY, workSpace);
        }
        code = cudnnConvolutionForward(cudnnContext, alpha, cudnnContext.srcTensorDesc, srcData,
-                        cudnnContext.filterDesc, filterData, cudnnContext.convDesc, algo[0], workSpace,
-                        workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData);
+                cudnnContext.filterDesc, filterData, cudnnContext.convDesc, algo[0], workSpace,
+                workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData);
        checkCudnn(true, "cudnnConvolutionForward", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation);


@ -491,7 +525,7 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
        checkCudnn(true, "cudnnSetTensor4dDescriptor", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation);

        code = cudnnAddTensor(cudnnContext, alpha, cudnnContext.biasTensorDesc, biasData, alpha,
-                        cudnnContext.dstTensorDesc, dstData);
+                cudnnContext.dstTensorDesc, dstData);
        checkCudnn(true, "cudnnAddTensor", code, input, weights, bias, null, kernel, strides, pad, mode, fwdAlgo, null, null, convolutionMode, dilation);

        allocator.registerAction(context, z, input, weights, bias);
@ -499,6 +533,10 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
        if (CudaEnvironment.getInstance().getConfiguration().isDebug())
            context.syncOldStream();

+        if(origNHWC){
+            z = z.permute(0,2,3,1);     //NCHW to NHWC
+        }
+
        return z;
    }

@ -552,29 +590,29 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
                break;
            case "sigmoid":
                checkCudnn(cudnnSetActivationDescriptor(cudnnContext.activationDesc, CUDNN_ACTIVATION_SIGMOID,
-                                CUDNN_PROPAGATE_NAN, 0));
+                        CUDNN_PROPAGATE_NAN, 0));
                checkCudnn(cudnnActivationForward(cudnnContext, cudnnContext.activationDesc, alpha,
-                                cudnnContext.dstTensorDesc, dstData, beta, cudnnContext.dstTensorDesc, dstData));
+                        cudnnContext.dstTensorDesc, dstData, beta, cudnnContext.dstTensorDesc, dstData));
                break;
            case "relu":
                checkCudnn(cudnnSetActivationDescriptor(cudnnContext.activationDesc, CUDNN_ACTIVATION_RELU,
-                                CUDNN_PROPAGATE_NAN, 0));
+                        CUDNN_PROPAGATE_NAN, 0));
                checkCudnn(cudnnActivationForward(cudnnContext, cudnnContext.activationDesc, alpha,
-                                cudnnContext.dstTensorDesc, dstData, beta, cudnnContext.dstTensorDesc, dstData));
+                        cudnnContext.dstTensorDesc, dstData, beta, cudnnContext.dstTensorDesc, dstData));
                break;
            case "tanh":
                checkCudnn(cudnnSetActivationDescriptor(cudnnContext.activationDesc, CUDNN_ACTIVATION_TANH,
-                                CUDNN_PROPAGATE_NAN, 0));
+                        CUDNN_PROPAGATE_NAN, 0));
                checkCudnn(cudnnActivationForward(cudnnContext, cudnnContext.activationDesc, alpha,
-                                cudnnContext.dstTensorDesc, dstData, beta, cudnnContext.dstTensorDesc, dstData));
+                        cudnnContext.dstTensorDesc, dstData, beta, cudnnContext.dstTensorDesc, dstData));
                break;
            case "softmax":
                checkCudnn(cudnnSoftmaxForward(cudnnContext, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, alpha,
-                                cudnnContext.dstTensorDesc, dstData, beta, cudnnContext.dstTensorDesc, dstData));
+                        cudnnContext.dstTensorDesc, dstData, beta, cudnnContext.dstTensorDesc, dstData));
                break;
            case "logsoftmax":
                checkCudnn(cudnnSoftmaxForward(cudnnContext, CUDNN_SOFTMAX_LOG, CUDNN_SOFTMAX_MODE_CHANNEL, alpha,
-                                cudnnContext.dstTensorDesc, dstData, beta, cudnnContext.dstTensorDesc, dstData));
+                        cudnnContext.dstTensorDesc, dstData, beta, cudnnContext.dstTensorDesc, dstData));
                break;
            default:
                activation = null;
@ -593,7 +631,7 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
     * @return
     */
    public static CudnnForwardArgs getCudnnForwardArgs(INDArray input, int[] kernel, int[] strides, int[] padding, int[] dilation,
-                                                       ConvolutionMode convolutionMode, PoolingType poolingType){
+                                                       ConvolutionMode convolutionMode, PoolingType poolingType, CNN2DFormat format){
        INDArray origInput = input;

        //Check if we need to dup the input: views, non-contiguous, etc. CuDNN also seems to have has issues if strides
@ -602,16 +640,19 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
            input = input.dup('c');
        }

+        boolean nchw = format == CNN2DFormat.NCHW;
+        int hIdx = nchw ? 2 : 1;
+        int wIdx = nchw ? 3 : 2;

-        val inH = input.size(2);
-        val inW = input.size(3);
+        val inH = input.size(hIdx);
+        val inW = input.size(wIdx);

        boolean manualPadBottom = false;
        boolean manualPadRight = false;

        int[] outSize;
        if (convolutionMode == ConvolutionMode.Same) {
-            outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation); //Also performs validation
+            outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation
            padding = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int) inH, (int) inW}, kernel, strides, dilation);
            int[] padBottomRight = ConvolutionUtils.getSameModeBottomRightPadding(outSize, new int[] {(int) inH, (int) inW}, kernel, strides, dilation);
            if(!Arrays.equals(padding, padBottomRight)){
@ -626,9 +667,17 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
                manualPadRight = (padding[1] != padBottomRight[1]);

                //NCHW format
-                val newShape = new long[]{input.size(0), input.size(1),
-                        input.size(2) + (manualPadBottom ? 1 : 0),
-                        input.size(3) + (manualPadRight ? 1 : 0)};
+                long[] newShape;
+                if(nchw){
+                    newShape = new long[]{input.size(0), input.size(1),
+                            input.size(2) + (manualPadBottom ? 1 : 0),
+                            input.size(3) + (manualPadRight ? 1 : 0)};
+                } else {
+                    newShape = new long[]{input.size(0),
+                            input.size(1) + (manualPadBottom ? 1 : 0),
+                            input.size(2) + (manualPadRight ? 1 : 0),
+                            input.size(3)};
+                }
                INDArray newInput;
                if(poolingType == null || poolingType != PoolingType.MAX){
                    newInput = Nd4j.create(input.dataType(), newShape);
@ -638,15 +687,22 @@ public class CudnnConvolutionHelper extends BaseCudnnHelper implements Convoluti
                    // if the 'real' (non-padding) values are all < 0, we take the real value, not the padding value
                    newInput = Nd4j.valueArrayOf(newShape, Double.NEGATIVE_INFINITY, input.dataType());
                }
-                newInput.put(new INDArrayIndex[]{all(), all(), interval(0,input.size(2)),
-                        interval(0, input.size(3))}, input);
+
+                if(nchw){
+                    newInput.put(new INDArrayIndex[]{all(), all(), interval(0,input.size(2)),
+                            interval(0, input.size(3))}, input);
+                } else {
+                    newInput.put(new INDArrayIndex[]{all(), interval(0,input.size(1)),
+                            interval(0, input.size(2)), all()}, input);
+                }
+
                input = newInput;
                //Now: we've manually applied the "extra" bottom/right padding only - if required. Consequently, we
                // now have the same amount of padding required for top/bottom, and left/right - which we'll let
                // CuDNN handle
            }
        } else {
-            outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, padding, convolutionMode, dilation); //Also performs validation
+            outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, padding, convolutionMode, dilation, format); //Also performs validation
        }

        return new CudnnForwardArgs(manualPadBottom, manualPadRight, input, origInput, padding, outSize);
--- a/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java
+++ b/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/convolution/subsampling/CudnnSubsamplingHelper.java
@ -19,6 +19,7 @@ package org.deeplearning4j.nn.layers.convolution.subsampling;
 import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.bytedeco.javacpp.Pointer;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.layers.PoolingType;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
@ -114,23 +115,29 @@ public class CudnnSubsamplingHelper extends BaseCudnnHelper implements Subsampli

    @Override
    public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, int[] kernel, int[] strides,
-                 int[] pad, PoolingType poolingType, ConvolutionMode convolutionMode, int[] dilation, LayerWorkspaceMgr workspaceMgr) {
+                                                     int[] pad, PoolingType poolingType, ConvolutionMode convolutionMode,
+                                                     int[] dilation, CNN2DFormat format, LayerWorkspaceMgr workspaceMgr) {
        if(dilation[0] != 1 || dilation[1] != 1){
            //CuDNN doesn't support dilated subsampling
            return null;
        }

+        boolean nchw = format == CNN2DFormat.NCHW;
+        int chIdx = nchw ? 1 : 3;
+        int hIdx = nchw ? 2 : 1;
+        int wIdx = nchw ? 3 : 2;
+
        //We require the output as one of the arguments for backprop here
        //TODO we could add cache mode support here somehow...
-        INDArray reduced = activate(input, true, kernel, strides, pad, poolingType, convolutionMode, dilation, workspaceMgr);
+        INDArray reduced = activate(input, true, kernel, strides, pad, poolingType, convolutionMode, dilation, format, workspaceMgr);

        val miniBatch = input.size(0);
-        val depth = input.size(1);
+        val depth = input.size(chIdx);

-        CudnnConvolutionHelper.CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode, poolingType);
+        CudnnConvolutionHelper.CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode, poolingType, format);
        input = args.getInput();
-        val inH = input.size(2);
-        val inW = input.size(3);
+        val inH = input.size(hIdx);
+        val inW = input.size(wIdx);
        val srcStride = input.stride();
        int[] outSize = args.getOutSize();
        int outH = outSize[0];
@ -160,23 +167,26 @@ public class CudnnSubsamplingHelper extends BaseCudnnHelper implements Subsampli
            epsilon = epsilon.dup('c');
        }

+        input = input.dup();
+
        val deltaStride = epsilon.stride();

        if (Nd4j.getExecutioner() instanceof GridExecutioner)
            ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

        checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, (int) miniBatch, (int) depth, (int) inH, (int) inW,
-                (int) srcStride[0], (int) srcStride[1], (int) srcStride[2], (int) srcStride[3]));
+                (int) srcStride[0], (int) srcStride[chIdx], (int) srcStride[hIdx], (int) srcStride[wIdx]));
        checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, (int) miniBatch, (int) depth, (int) outH, (int) outW,
-                (int) deltaStride[0], (int) deltaStride[1], (int) deltaStride[2], (int) deltaStride[3]));
+                (int) deltaStride[0], (int) deltaStride[chIdx], (int) deltaStride[hIdx], (int) deltaStride[wIdx]));
        checkCudnn(cudnnSetPooling2dDescriptor(cudnnContext.poolingDesc, poolingMode, CUDNN_PROPAGATE_NAN, kernel[0],
                        kernel[1], pad[0], pad[1], strides[0], strides[1]));

-        INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), new long[] {(int) miniBatch, (int) depth, (int) inH, (int) inW}, 'c');
+        long[] outEpsShape = nchw ? new long[] {miniBatch, depth, inH, inW} : new long[] {miniBatch, inH, inW, depth};
+        INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), outEpsShape, 'c');

        val dstStride = outEpsilon.stride();
        checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, (int) miniBatch, (int) depth, (int) inH, (int) inW,
-                (int) dstStride[0], (int) dstStride[1], (int) dstStride[2], (int) dstStride[3]));
+                (int) dstStride[0], (int) dstStride[chIdx], (int) dstStride[hIdx], (int) dstStride[wIdx]));

        Allocator allocator = AtomicAllocator.getInstance();
        CudaContext context = allocator.getFlowController().prepareAction(input, epsilon, reduced, outEpsilon);
@ -198,9 +208,16 @@ public class CudnnSubsamplingHelper extends BaseCudnnHelper implements Subsampli
        //Note that: if we had to manually pad for SAME mode, we have to 'undo' this manual padding for the epsilon
        // we return. The returned epsilon (i.e., dL/dIn array) has to be the same shape as the *original* input.
        if(args.isManualPadBottom() || args.isManualPadRight()) {
-            outEpsilon = outEpsilon.get(all(), all(),
-                    interval(0, outEpsilon.size(2) - (args.isManualPadBottom() ? 1 : 0)),
-                    interval(0, outEpsilon.size(3) - (args.isManualPadRight() ? 1 : 0)));
+            if(nchw){
+                outEpsilon = outEpsilon.get(all(), all(),
+                        interval(0, outEpsilon.size(2) - (args.isManualPadBottom() ? 1 : 0)),
+                        interval(0, outEpsilon.size(3) - (args.isManualPadRight() ? 1 : 0)));
+            } else {
+                outEpsilon = outEpsilon.get(all(),
+                        interval(0, outEpsilon.size(1) - (args.isManualPadBottom() ? 1 : 0)),
+                        interval(0, outEpsilon.size(2) - (args.isManualPadRight() ? 1 : 0)),
+                        all());
+            }
        }

        return new Pair<>(retGradient, outEpsilon);
@ -209,19 +226,24 @@ public class CudnnSubsamplingHelper extends BaseCudnnHelper implements Subsampli

    @Override
    public INDArray activate(INDArray input, boolean training, int[] kernel, int[] strides, int[] pad,
-                    PoolingType poolingType, ConvolutionMode convolutionMode, int[] dilation, LayerWorkspaceMgr workspaceMgr) {
+                    PoolingType poolingType, ConvolutionMode convolutionMode, int[] dilation, CNN2DFormat format, LayerWorkspaceMgr workspaceMgr) {
        if(dilation[0] != 1 || dilation[1] != 1){
            //CuDNN doesn't support dilated subsampling
            return null;
        }

-        val miniBatch = input.size(0);
-        val inDepth = input.size(1);
+        boolean nchw = format == CNN2DFormat.NCHW;
+        int chIdx = nchw ? 1 : 3;
+        int hIdx = nchw ? 2 : 1;
+        int wIdx = nchw ? 3 : 2;

-        CudnnConvolutionHelper.CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode, poolingType);
+        val miniBatch = input.size(0);
+        val inDepth = input.size(nchw ? 1 : 3);
+
+        CudnnConvolutionHelper.CudnnForwardArgs args = getCudnnForwardArgs(input, kernel, strides, pad, dilation, convolutionMode, poolingType, format);
        input = args.getInput();
-        val inH = input.size(2);
-        val inW = input.size(3);
+        val inH = input.size(nchw ? 2 : 1);
+        val inW = input.size(nchw ? 3 : 2);
        val srcStride = input.stride();
        val outSize = args.getOutSize();
        int outH = outSize[0];
@ -246,13 +268,14 @@ public class CudnnSubsamplingHelper extends BaseCudnnHelper implements Subsampli
        checkCudnn(cudnnSetPooling2dDescriptor(cudnnContext.poolingDesc, poolingMode, CUDNN_PROPAGATE_NAN, kernel[0],
                        kernel[1], pad[0], pad[1], strides[0], strides[1]));
        checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, (int) miniBatch, (int) inDepth, (int) inH, (int) inW,
-                (int) srcStride[0], (int) srcStride[1], (int) srcStride[2], (int) srcStride[3]));
+                (int) srcStride[0], (int) srcStride[chIdx], (int) srcStride[hIdx], (int) srcStride[wIdx]));

-        INDArray reduced = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), new long[] {(int) miniBatch, (int) inDepth, outH, outW}, 'c');
+        long[] outShape = nchw ? new long[] {miniBatch, inDepth, outH, outW} : new long[] {miniBatch, outH, outW, inDepth};
+        INDArray reduced = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), outShape, 'c');

        val dstStride = reduced.stride();
        checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, (int) miniBatch, (int) inDepth, (int) outH, (int) outW,
-                (int) dstStride[0], (int) dstStride[1], (int) dstStride[2], (int) dstStride[3]));
+                (int) dstStride[0], (int) dstStride[chIdx], (int) dstStride[hIdx], (int) dstStride[wIdx]));

        Allocator allocator = AtomicAllocator.getInstance();
        CudaContext context = allocator.getFlowController().prepareAction(input, reduced);
--- a/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java
+++ b/deeplearning4j/deeplearning4j-cuda/src/main/java/org/deeplearning4j/nn/layers/normalization/CudnnBatchNormalizationHelper.java
@ -19,6 +19,7 @@ package org.deeplearning4j.nn.layers.normalization;
 import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.bytedeco.javacpp.Pointer;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
 import org.deeplearning4j.nn.layers.BaseCudnnHelper;
@ -124,12 +125,21 @@ public class CudnnBatchNormalizationHelper extends BaseCudnnHelper implements Ba

    @Override
    public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, long[] shape, INDArray gamma, INDArray beta,
-                    INDArray dGammaView, INDArray dBetaView, double eps, LayerWorkspaceMgr layerWorkspaceMgr) {
+                                                     INDArray dGammaView, INDArray dBetaView, double eps, CNN2DFormat format, LayerWorkspaceMgr layerWorkspaceMgr) {
+
+        boolean nchw = format == CNN2DFormat.NCHW;
+
        this.eps = eps;
+
+        int cudnnTensorFormat = nchw ? CUDNN_TENSOR_NCHW : CUDNN_TENSOR_NHWC;
+        int chIdx = nchw ? 1 : 3;
+        int hIdx = nchw ? 2 : 1;
+        int wIdx = nchw ? 3 : 2;
+
        val miniBatch = (int) input.size(0);
-        val depth = (int) input.size(1);
-        val inH = (int) input.size(2);
-        val inW = (int) input.size(3);
+        val depth = (int) input.size(chIdx);
+        val inH = (int) input.size(hIdx);
+        val inW = (int) input.size(wIdx);

        final boolean isHalf = (input.dataType() == DataType.HALF);
        INDArray gammaOrig = null;
@ -164,16 +174,17 @@ public class CudnnBatchNormalizationHelper extends BaseCudnnHelper implements Ba
            ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();

        checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, (int) miniBatch, (int) depth, (int) inH, (int) inW,
-                (int) srcStride[0], (int) srcStride[1], (int) srcStride[2], (int) srcStride[3]));
+                (int) srcStride[0], (int) srcStride[chIdx], (int) srcStride[hIdx], (int) srcStride[wIdx]));
        checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, (int) miniBatch, (int) depth, (int) inH, (int) inW,
-                (int) deltaStride[0], (int) deltaStride[1], (int) deltaStride[2], (int) deltaStride[3]));
+                (int) deltaStride[0], (int) deltaStride[chIdx], (int) deltaStride[hIdx], (int) deltaStride[wIdx]));

-        INDArray nextEpsilon = layerWorkspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), new long[] {miniBatch, depth, inH, inW}, 'c');
+        long[] nextEpsShape = nchw ? new long[] {miniBatch, depth, inH, inW} : new long[] {miniBatch, inH, inW, depth};
+        INDArray nextEpsilon = layerWorkspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), nextEpsShape, 'c');
        val dstStride = ArrayUtil.toInts(nextEpsilon.stride());

        checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW,
-                        dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
-        checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, TENSOR_FORMAT, toCudnnDataType(gamma.data().dataType()), (int)shape[0],
+                        dstStride[0], dstStride[chIdx], dstStride[hIdx], dstStride[wIdx]));
+        checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, cudnnTensorFormat, toCudnnDataType(gamma.data().dataType()), (int)shape[0],
                (int)shape[1], shape.length > 2 ? (int)shape[2] : 1, shape.length > 3 ? (int)shape[3] : 1));

        Allocator allocator = AtomicAllocator.getInstance();
@ -215,9 +226,15 @@ public class CudnnBatchNormalizationHelper extends BaseCudnnHelper implements Ba

    @Override
    public INDArray preOutput(INDArray x, boolean training, long[] shape, INDArray gamma, INDArray beta, INDArray mean,
-                    INDArray var, double decay, double eps, LayerWorkspaceMgr workspaceMgr) {
+                    INDArray var, double decay, double eps, CNN2DFormat format, LayerWorkspaceMgr workspaceMgr) {
+        boolean nchw = format == CNN2DFormat.NCHW;
+        int cudnnTensorFormat = nchw ? CUDNN_TENSOR_NCHW : CUDNN_TENSOR_NHWC;
+        int chIdx = nchw ? 1 : 3;
+        int hIdx = nchw ? 2 : 1;
+        int wIdx = nchw ? 3 : 2;
+
        this.eps = eps;
-        final boolean isHalf = (x.dataType() == DataType.HALF);
+        final boolean isHalf = (x.dataType() == DataType.FLOAT16);
        INDArray origGamma = gamma;
        INDArray origBeta = beta;
        INDArray origMean = mean;
@ -238,21 +255,22 @@ public class CudnnBatchNormalizationHelper extends BaseCudnnHelper implements Ba
        decay = 0.0;                //From cudnn docs: runningMean = newMean*factor + runningMean*(1-factor). -> 0 = "in-place modification of running mean disabled"

        val miniBatch = (int) x.size(0);
-        val inDepth = (int) x.size(1);
-        val inH = (int) x.size(2);
-        val inW = (int) x.size(3);
+        val inDepth = (int) x.size(chIdx);
+        val inH = (int) x.size(hIdx);
+        val inW = (int) x.size(wIdx);

        val srcStride = ArrayUtil.toInts(x.stride());
        checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW,
-                        srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
+                        srcStride[0], srcStride[chIdx], srcStride[hIdx], srcStride[wIdx]));

-        INDArray activations = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, x.dataType(), new long[] {miniBatch, inDepth, inH, inW}, 'c');
+        long[] actShape = nchw ? new long[] {miniBatch, inDepth, inH, inW} : new long[] {miniBatch, inH, inW, inDepth};
+        INDArray activations = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, x.dataType(), actShape, 'c');

        val dstStride = ArrayUtil.toInts(activations.stride());
        checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW,
-                        dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
+                        dstStride[0], dstStride[chIdx], dstStride[hIdx], dstStride[wIdx]));

-        checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, TENSOR_FORMAT, toCudnnDataType(mean.data().dataType()), (int)shape[0],
+        checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, cudnnTensorFormat, toCudnnDataType(mean.data().dataType()), (int)shape[0],
                (int)shape[1], shape.length > 2 ? (int)shape[2] : 1, shape.length > 3 ? (int)shape[3] : 1));

        Allocator allocator = AtomicAllocator.getInstance();
--- a/deeplearning4j/deeplearning4j-cuda/src/test/java/org/deeplearning4j/TestUtils.java
+++ b/deeplearning4j/deeplearning4j-cuda/src/test/java/org/deeplearning4j/TestUtils.java
@ -16,74 +16,131 @@

 package org.deeplearning4j;

+import org.apache.commons.compress.utils.IOUtils;
+import org.deeplearning4j.nn.api.Layer;
+import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.layers.BaseLayer;
+import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer;
 import org.deeplearning4j.nn.graph.ComputationGraph;
+import org.deeplearning4j.nn.layers.convolution.ConvolutionLayer;
+import org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer;
+import org.deeplearning4j.nn.layers.normalization.BatchNormalization;
+import org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization;
+import org.deeplearning4j.nn.layers.recurrent.LSTM;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.util.ModelSerializer;
+import org.nd4j.base.Preconditions;
+import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.random.impl.BernoulliDistribution;
 import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.regularization.L1Regularization;
+import org.nd4j.linalg.learning.regularization.L2Regularization;
+import org.nd4j.linalg.learning.regularization.Regularization;
+import org.nd4j.linalg.learning.regularization.WeightDecay;

-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
+import java.io.*;
+import java.lang.reflect.Field;
+import java.util.List;
 import java.util.Random;

 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;

 public class TestUtils {

    public static MultiLayerNetwork testModelSerialization(MultiLayerNetwork net){

+        MultiLayerNetwork restored;
        try {
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            ModelSerializer.writeModel(net, baos, true);
            byte[] bytes = baos.toByteArray();

            ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
-            MultiLayerNetwork restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);
+            restored = ModelSerializer.restoreMultiLayerNetwork(bais, true);

            assertEquals(net.getLayerWiseConfigurations(), restored.getLayerWiseConfigurations());
            assertEquals(net.params(), restored.params());
-
-            return restored;
        } catch (IOException e){
            //Should never happen
            throw new RuntimeException(e);
        }
+
+        //Also check the MultiLayerConfiguration is serializable (required by Spark etc)
+        MultiLayerConfiguration conf = net.getLayerWiseConfigurations();
+        serializeDeserializeJava(conf);
+
+        return restored;
    }

    public static ComputationGraph testModelSerialization(ComputationGraph net){
-
+        ComputationGraph restored;
        try {
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            ModelSerializer.writeModel(net, baos, true);
            byte[] bytes = baos.toByteArray();

            ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
-            ComputationGraph restored = ModelSerializer.restoreComputationGraph(bais, true);
+            restored = ModelSerializer.restoreComputationGraph(bais, true);

            assertEquals(net.getConfiguration(), restored.getConfiguration());
            assertEquals(net.params(), restored.params());
-
-            return restored;
        } catch (IOException e){
            //Should never happen
            throw new RuntimeException(e);
        }
+
+        //Also check the ComputationGraphConfiguration is serializable (required by Spark etc)
+        ComputationGraphConfiguration conf = net.getConfiguration();
+        serializeDeserializeJava(conf);
+
+        return restored;
    }

-    public static INDArray randomOneHot(int examples, int nOut){
+    private static <T> T serializeDeserializeJava(T object){
+        byte[] bytes;
+        try(ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos)){
+            oos.writeObject(object);
+            oos.close();
+            bytes = baos.toByteArray();
+        } catch (IOException e){
+            //Should never happen
+            throw new RuntimeException(e);
+        }
+
+        T out;
+        try(ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes))){
+            out = (T)ois.readObject();
+        } catch (IOException | ClassNotFoundException e){
+            throw new RuntimeException(e);
+        }
+
+        assertEquals(object, out);
+        return out;
+    }
+
+    public static INDArray randomOneHot(long examples, long nOut){
        return randomOneHot(examples, nOut, new Random(12345));
    }

-    public static INDArray randomOneHot(int examples, int nOut, long rngSeed){
+    public static INDArray randomOneHot(DataType dataType, long examples, long nOut){
+        return randomOneHot(dataType, examples, nOut, new Random(12345));
+    }
+
+    public static INDArray randomOneHot(long examples, long nOut, long rngSeed){
        return randomOneHot(examples, nOut, new Random(rngSeed));
    }

-    public static INDArray randomOneHot(int examples, int nOut, Random rng){
-        INDArray arr = Nd4j.create(examples, nOut);
+    public static INDArray randomOneHot(long examples, long nOut, Random rng) {
+        return randomOneHot(Nd4j.defaultFloatingPointType(), examples,nOut, rng);
+    }
+
+    public static INDArray randomOneHot(DataType dataType, long examples, long nOut, Random rng){
+        INDArray arr = Nd4j.create(dataType, examples, nOut);
        for( int i=0; i<examples; i++ ){
-            arr.putScalar(i, rng.nextInt(nOut), 1.0);
+            arr.putScalar(i, rng.nextInt((int) nOut), 1.0);
        }
        return arr;
    }
@ -115,4 +172,143 @@ public class TestUtils {
        Nd4j.getExecutioner().exec(new BernoulliDistribution(ret, p));
        return ret;
    }
+
+    public static void writeStreamToFile(File out, InputStream is) throws IOException {
+        byte[] b = IOUtils.toByteArray(is);
+        try (OutputStream os = new BufferedOutputStream(new FileOutputStream(out))) {
+            os.write(b);
+        }
+    }
+
+    public static L1Regularization getL1Reg(List<Regularization> l){
+        for(Regularization r : l){
+            if(r instanceof L1Regularization){
+                return (L1Regularization) r;
+            }
+        }
+        return null;
+    }
+
+    public static L2Regularization getL2Reg(BaseLayer baseLayer){
+        return getL2Reg(baseLayer.getRegularization());
+    }
+
+    public static L2Regularization getL2Reg(List<Regularization> l){
+        for(Regularization r : l){
+            if(r instanceof L2Regularization){
+                return (L2Regularization) r;
+            }
+        }
+        return null;
+    }
+
+    public static WeightDecay getWeightDecayReg(BaseLayer bl){
+        return getWeightDecayReg(bl.getRegularization());
+    }
+
+    public static WeightDecay getWeightDecayReg(List<Regularization> l){
+        for(Regularization r : l){
+            if(r instanceof WeightDecay){
+                return (WeightDecay) r;
+            }
+        }
+        return null;
+    }
+
+    public static double getL1(BaseLayer layer) {
+        List<Regularization> l = layer.getRegularization();
+        return getL1(l);
+    }
+
+    public static double getL1(List<Regularization> l){
+        L1Regularization l1Reg = null;
+        for(Regularization reg : l){
+            if(reg instanceof L1Regularization)
+                l1Reg = (L1Regularization) reg;
+        }
+        assertNotNull(l1Reg);
+        return l1Reg.getL1().valueAt(0,0);
+    }
+
+    public static double getL2(BaseLayer layer) {
+        List<Regularization> l = layer.getRegularization();
+        return getL2(l);
+    }
+
+    public static double getL2(List<Regularization> l){
+        L2Regularization l2Reg = null;
+        for(Regularization reg : l){
+            if(reg instanceof L2Regularization)
+                l2Reg = (L2Regularization) reg;
+        }
+        assertNotNull(l2Reg);
+        return l2Reg.getL2().valueAt(0,0);
+    }
+
+    public static double getL1(AbstractSameDiffLayer layer){
+        return getL1(layer.getRegularization());
+    }
+
+    public static double getL2(AbstractSameDiffLayer layer){
+        return getL2(layer.getRegularization());
+    }
+
+    public static double getWeightDecay(BaseLayer layer) {
+        return getWeightDecayReg(layer.getRegularization()).getCoeff().valueAt(0,0);
+    }
+
+    public static void removeHelper(Layer layer) throws Exception {
+        removeHelpers(new Layer[]{layer});
+    }
+
+    public static void removeHelpers(Layer[] layers) throws Exception {
+        for(Layer l : layers){
+
+            if(l instanceof ConvolutionLayer){
+                Field f1 = ConvolutionLayer.class.getDeclaredField("helper");
+                f1.setAccessible(true);
+                f1.set(l, null);
+            } else if(l instanceof SubsamplingLayer){
+                Field f2 = SubsamplingLayer.class.getDeclaredField("helper");
+                f2.setAccessible(true);
+                f2.set(l, null);
+            } else if(l instanceof BatchNormalization) {
+                Field f3 = BatchNormalization.class.getDeclaredField("helper");
+                f3.setAccessible(true);
+                f3.set(l, null);
+            } else if(l instanceof LSTM){
+                Field f4 = LSTM.class.getDeclaredField("helper");
+                f4.setAccessible(true);
+                f4.set(l, null);
+            } else if(l instanceof LocalResponseNormalization){
+                Field f5 = LocalResponseNormalization.class.getDeclaredField("helper");
+                f5.setAccessible(true);
+                f5.set(l, null);
+            }
+
+
+            if(l.getHelper() != null){
+                throw new IllegalStateException("Did not remove helper for layer: " + l.getClass().getSimpleName());
+            }
+        }
+    }
+
+    public static void assertHelperPresent(Layer layer){
+
+    }
+
+    public static void assertHelpersPresent(Layer[] layers) throws Exception {
+        for(Layer l : layers){
+            //Don't use instanceof here - there are sub conv subclasses
+            if(l.getClass() == ConvolutionLayer.class || l instanceof SubsamplingLayer || l instanceof BatchNormalization || l instanceof LSTM){
+                Preconditions.checkNotNull(l.getHelper(), l.conf().getLayer().getLayerName());
+            }
+        }
+    }
+
+    public static void assertHelpersAbsent(Layer[] layers) throws Exception {
+        for(Layer l : layers){
+            Preconditions.checkState(l.getHelper() == null, l.conf().getLayer().getLayerName());
+        }
+    }
 }
--- a/deeplearning4j/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/ConvDataFormatTests.java
+++ b/deeplearning4j/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/ConvDataFormatTests.java
--- a/deeplearning4j/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/TestConvolution.java
+++ b/deeplearning4j/deeplearning4j-cuda/src/test/java/org/deeplearning4j/convolution/TestConvolution.java
@ -212,7 +212,7 @@ public class TestConvolution extends BaseDL4JTest {
        ComputationGraph model = KerasModelImport.importKerasModelAndWeights( fExtracted.getAbsolutePath(), new int[]{inSize, inSize, 3}, false);
        model = model.convertDataType(DataType.DOUBLE);

-        INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{1, 3, inSize, inSize});
+        INDArray in = Nd4j.rand(DataType.DOUBLE, new int[]{1, inSize, inSize, 3});      //Keras import model -> NHWC

        CuDNNTestUtils.assertHelpersPresent(model.getLayers());
        Map<String,INDArray> withCudnn = model.feedForward(in, false);
--- a/deeplearning4j/deeplearning4j-data/deeplearning4j-datasets/src/main/java/org/deeplearning4j/datasets/fetchers/EmnistDataFetcher.java
+++ b/deeplearning4j/deeplearning4j-data/deeplearning4j-datasets/src/main/java/org/deeplearning4j/datasets/fetchers/EmnistDataFetcher.java
@ -16,6 +16,7 @@

 package org.deeplearning4j.datasets.fetchers;

+import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.FilenameUtils;
 import org.deeplearning4j.base.EmnistFetcher;
@ -36,6 +37,7 @@ import java.util.Random;
 * @author Alex Black
 *
 */
+@Slf4j
 public class EmnistDataFetcher extends MnistDataFetcher implements DataSetFetcher {

    protected EmnistFetcher fetcher;
@ -64,7 +66,7 @@ public class EmnistDataFetcher extends MnistDataFetcher implements DataSetFetche
        try {
            man = new MnistManager(images, labels, totalExamples);
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
            FileUtils.deleteDirectory(new File(EMNIST_ROOT));
            new EmnistFetcher(dataSet).downloadAndUntar();
            man = new MnistManager(images, labels, totalExamples);
--- a/deeplearning4j/deeplearning4j-manifold/deeplearning4j-tsne/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java
+++ b/deeplearning4j/deeplearning4j-manifold/deeplearning4j-tsne/src/main/java/org/deeplearning4j/plot/BarnesHutTsne.java
@ -687,9 +687,7 @@ public class BarnesHutTsne implements Model {
     * @throws IOException
     */
    public void saveAsFile(List<String> labels, String path) throws IOException {
-        BufferedWriter write = null;
-        try {
-            write = new BufferedWriter(new FileWriter(new File(path)));
+        try (BufferedWriter write = new BufferedWriter(new FileWriter(new File(path)))) {
            for (int i = 0; i < Y.rows(); i++) {
                if (i >= labels.size())
                    break;
@ -711,17 +709,11 @@ public class BarnesHutTsne implements Model {

            }
            write.flush();
-            write.close();
-        } finally {
-            if (write != null)
-                write.close();
        }
    }

    public void saveAsFile(String path) throws IOException {
-        BufferedWriter write = null;
-        try {
-            write = new BufferedWriter(new FileWriter(new File(path)));
+        try (BufferedWriter write = new BufferedWriter(new FileWriter(new File(path)))) {
            for (int i = 0; i < Y.rows(); i++) {
                StringBuilder sb = new StringBuilder();
                INDArray wordVector = Y.getRow(i);
@ -734,10 +726,6 @@ public class BarnesHutTsne implements Model {
                write.write(sb.toString());
            }
            write.flush();
-            write.close();
-        } finally {
-            if (write != null)
-                write.close();
        }
    }
    /**
--- a/deeplearning4j/deeplearning4j-modelimport/pom.xml
+++ b/deeplearning4j/deeplearning4j-modelimport/pom.xml
@ -113,6 +113,12 @@
            <scope>test</scope>
        </dependency>

+        <dependency>
+            <groupId>org.datavec</groupId>
+            <artifactId>datavec-python</artifactId>
+            <version>${datavec.version}</version>
+            <scope>test</scope>
+        </dependency>
    </dependencies>

    <profiles>
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java
@ -60,7 +60,7 @@ public class Hdf5Archive implements Closeable {
            /* This is necessary for the call to the BytePointer constructor below. */
            Loader.load(org.bytedeco.hdf5.global.hdf5.class);
        } catch (Exception e) {
-            e.printStackTrace();
+            log.error("",e);
        }
    }

--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/KerasInput.java
@ -19,6 +19,9 @@ package org.deeplearning4j.nn.modelimport.keras.layers;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.ArrayUtils;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Convolution3D;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
@ -121,27 +124,29 @@ public class KerasInput extends KerasLayer {
        InputType myInputType;
        switch (this.inputShape.length) {
            case 1:
-                myInputType = new InputType.InputTypeFeedForward(this.inputShape[0]);
+                myInputType = new InputType.InputTypeFeedForward(this.inputShape[0], null);
                break;
            case 2:
                if(this.dimOrder != null) {
+                    System.out.println("Dim order: " + this.dimOrder);
+                    System.out.println("Input shape: " + ArrayUtils.toString(this.inputShape));
                    switch (this.dimOrder) {
                        case TENSORFLOW:    //NWC == channels_last
-                            myInputType = new InputType.InputTypeRecurrent(this.inputShape[1], this.inputShape[0]);
+                            myInputType = new InputType.InputTypeRecurrent(this.inputShape[1], this.inputShape[0], RNNFormat.NWC);
                            break;
                        case THEANO:        //NCW == channels_first
-                            myInputType = new InputType.InputTypeRecurrent(this.inputShape[0], this.inputShape[1]);
+                            myInputType = new InputType.InputTypeRecurrent(this.inputShape[0], this.inputShape[1], RNNFormat.NCW);
                            break;
                        case NONE:
                            //Assume RNN in [mb, seqLen, size] format
-                            myInputType = new InputType.InputTypeRecurrent(this.inputShape[0], this.inputShape[1]);
+                            myInputType = new InputType.InputTypeRecurrent(this.inputShape[1], this.inputShape[0], RNNFormat.NWC);
                            break;
                        default:
                            throw new IllegalStateException("Unknown/not supported dimension ordering: " + this.dimOrder);
                    }
                } else {
                    //Assume RNN in [mb, seqLen, size] format
-                    myInputType = new InputType.InputTypeRecurrent(this.inputShape[0], this.inputShape[1]);
+                    myInputType = new InputType.InputTypeRecurrent(this.inputShape[1], this.inputShape[0], RNNFormat.NWC);
                }

                break;
@ -150,17 +155,17 @@ public class KerasInput extends KerasLayer {
                    case TENSORFLOW:
                        /* TensorFlow convolutional input: # rows, # cols, # channels */
                        myInputType = new InputType.InputTypeConvolutional(this.inputShape[0], this.inputShape[1],
-                                this.inputShape[2]);
+                                this.inputShape[2], CNN2DFormat.NHWC);
                        break;
                    case THEANO:
                        /* Theano convolutional input:     # channels, # rows, # cols */
                        myInputType = new InputType.InputTypeConvolutional(this.inputShape[1], this.inputShape[2],
-                                this.inputShape[0]);
+                                this.inputShape[0], CNN2DFormat.NCHW);
                        break;
                    default:
                        this.dimOrder = DimOrder.THEANO;
                        myInputType = new InputType.InputTypeConvolutional(this.inputShape[1], this.inputShape[2],
-                                this.inputShape[0]);
+                                this.inputShape[0], CNN2DFormat.NCHW);
                        log.warn("Couldn't determine dim ordering / data format from model file. Older Keras " +
                                "versions may come without specified backend, in which case we assume the model was " +
                                "built with theano." );
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayer.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayer.java
@ -20,6 +20,7 @@ import org.deeplearning4j.nn.api.ParamInitializer;
 import org.deeplearning4j.nn.conf.GradientNormalization;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Layer;
 import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
@ -65,6 +66,9 @@ public class TFOpLayer extends Layer {
        long[] shape = inputType.getShape(true);
        TFOpLayerImpl tempLayer = new TFOpLayerImpl(nodeDef, constants, null, null);
        long[] outputShape = tempLayer.getOutputShape(shape);
+        if (outputShape.length == 3){
+            return InputType.recurrent(outputShape[2], outputShape[1], RNNFormat.NWC);
+        }
        return InputType.inferInputType(Nd4j.create(outputShape));

    }
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayerImpl.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/TFOpLayerImpl.java
@ -125,17 +125,9 @@ public class TFOpLayerImpl extends AbstractLayer<TFOpLayer> {
    }

    private INDArray runGraph(INDArray input){
-        if (input.rank() == 3){
-            // TODO make this a preprocessor
-            input = input.permute(0, 2, 1);
-        }
        Map<String, INDArray> inputMap = new HashMap<>();
        inputMap.put(inputNames.get(0), input);
        INDArray out = graphRunnerService.run(inputMap).values().toArray(new INDArray[0])[0];
-        if (out.rank() == 3){
-            out = out.permute(0, 2, 1); // TODO post-processing?
-        }
-
        return out;
    }

--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java
@ -22,6 +22,7 @@ import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang3.ArrayUtils;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Convolution1DLayer;
 import org.deeplearning4j.nn.conf.layers.InputTypeUtil;
@ -94,7 +95,6 @@ public class KerasConvolution1D extends KerasConvolution {

        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                enforceTrainingConfig, conf, kerasMajorVersion);
-
        Convolution1DLayer.Builder builder = new Convolution1DLayer.Builder().name(this.layerName)
                .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                .activation(getIActivationFromConfig(layerConfig, conf))
@ -103,7 +103,7 @@ public class KerasConvolution1D extends KerasConvolution {
                .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
                .kernelSize(getKernelSizeFromConfig(layerConfig, 1,  conf, kerasMajorVersion)[0])
                .hasBias(hasBias)
-                .stride(getStrideFromConfig(layerConfig, 1, conf)[0]);
+                .stride(getStrideFromConfig(layerConfig, 1, conf)[0]).rnnDataFormat(dimOrder == DimOrder.TENSORFLOW? RNNFormat.NWC: RNNFormat.NCW);
        int[] padding = getPaddingFromBorderModeConfig(layerConfig, 1, conf, kerasMajorVersion);
        if (hasBias)
            builder.biasInit(0.0);
@ -160,8 +160,8 @@ public class KerasConvolution1D extends KerasConvolution {
    public InputPreProcessor getInputPreprocessor(InputType... inputType) throws InvalidKerasConfigurationException {
        if (inputType.length > 1)
            throw new InvalidKerasConfigurationException(
-                    "Keras LSTM layer accepts only one input (received " + inputType.length + ")");
-        return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], layerName);
+                    "Keras Conv1D layer accepts only one input (received " + inputType.length + ")");
+        return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], RNNFormat.NCW,layerName);
    }


--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
@ -20,6 +20,7 @@ import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
@ -27,6 +28,7 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurat
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.weights.IWeightInit;
+import oshi.jna.platform.windows.PowrProf;

 import java.util.Map;

@ -93,6 +95,7 @@ public class KerasConvolution2D extends KerasConvolution {
        LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);

+        System.out.println("----" + dimOrder);
        ConvolutionLayer.Builder builder = new ConvolutionLayer.Builder().name(this.layerName)
                .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                .activation(getIActivationFromConfig(layerConfig, conf))
@ -101,7 +104,8 @@ public class KerasConvolution2D extends KerasConvolution {
                .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
                .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
                .hasBias(hasBias)
-                .stride(getStrideFromConfig(layerConfig, 2, conf));
+                .stride(getStrideFromConfig(layerConfig, 2, conf))
+                .dataFormat((dimOrder==DimOrder.TENSORFLOW)? CNN2DFormat.NHWC:CNN2DFormat.NCHW);
        int[] padding = getPaddingFromBorderModeConfig(layerConfig, 2, conf, kerasMajorVersion);
        if (hasBias)
            builder.biasInit(0.0);
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolutionUtils.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolutionUtils.java
@ -360,8 +360,19 @@ public class KerasConvolutionUtils {
            }

        } else if (dimension == 1) {
-            int paddingInt = (int) innerConfig.get(layerField);
-            padding = new int[]{paddingInt, paddingInt};
+            Object paddingObj  = innerConfig.get(layerField);
+            if (paddingObj instanceof List){
+                List<Integer> paddingList = (List)paddingObj;
+                padding = new int[]{
+                        paddingList.get(0),
+                        paddingList.get(1)
+                };
+            }
+            else{
+                int paddingInt = (int) innerConfig.get(layerField);
+                padding = new int[]{paddingInt, paddingInt};
+            }
+
        } else {
            throw new UnsupportedKerasConfigurationException(
                    "Keras padding layer not supported");
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java
@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;

 import lombok.extern.slf4j.Slf4j;
 import lombok.val;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeConvolutional;
@ -27,7 +28,6 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurat
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.preprocessors.KerasFlattenRnnPreprocessor;
 import org.deeplearning4j.nn.modelimport.keras.preprocessors.ReshapePreprocessor;
-import org.deeplearning4j.nn.modelimport.keras.preprocessors.TensorFlowCnnToFeedForwardPreProcessor;

 import java.util.Map;

@ -93,11 +93,10 @@ public class KerasFlatten extends KerasLayer {
            switch (this.getDimOrder()) {
                case NONE:
                case THEANO:
-                    preprocessor = new CnnToFeedForwardPreProcessor(it.getHeight(), it.getWidth(), it.getChannels());
+                    preprocessor = new CnnToFeedForwardPreProcessor(it.getHeight(), it.getWidth(), it.getChannels(), CNN2DFormat.NCHW);
                    break;
                case TENSORFLOW:
-                    preprocessor = new TensorFlowCnnToFeedForwardPreProcessor(it.getHeight(), it.getWidth(),
-                            it.getChannels());
+                    preprocessor = new CnnToFeedForwardPreProcessor(it.getHeight(), it.getWidth(), it.getChannels(), CNN2DFormat.NHWC);
                    break;
                default:
                    throw new InvalidKerasConfigurationException("Unknown Keras backend " + this.getDimOrder());
@ -111,7 +110,7 @@ public class KerasFlatten extends KerasLayer {
            // to RNN type. Otherwise we add this trivial preprocessor (since there's nothing to flatten).
            InputType.InputTypeFeedForward it = (InputType.InputTypeFeedForward) inputType[0];
            val inputShape = new long[]{it.getSize()};
-            preprocessor = new ReshapePreprocessor(inputShape, inputShape, false);
+            preprocessor = new ReshapePreprocessor(inputShape, inputShape, false, null);
        }
        return preprocessor;
    }
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java
@ -17,6 +17,7 @@
 package org.deeplearning4j.nn.modelimport.keras.layers.core;

 import lombok.extern.slf4j.Slf4j;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.misc.RepeatVector;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
@ -60,6 +61,7 @@ public class KerasRepeatVector extends KerasLayer {
        super(layerConfig, enforceTrainingConfig);

        this.layer = new RepeatVector.Builder().repetitionFactor(getRepeatMultiplier(layerConfig, conf))
+                .dataFormat(RNNFormat.NWC)
                .name(this.layerName).build();
    }

--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java
@ -18,6 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;


 import lombok.val;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
@ -111,11 +112,9 @@ public class KerasReshape extends KerasLayer {
                } else {
                    targetShape = new long[]{targetShape[1], targetShape[0], targetShape[2]};
                }
-                preprocessor = new ReshapePreprocessor(inputShape, targetShape, false);
+                preprocessor = new ReshapePreprocessor(inputShape, targetShape, false, CNN2DFormat.NCHW);
            } else { // (dimOrder == DimOrder.TENSORFLOW || dimOrder == DimOrder.NONE && kerasMajorVersion == 2)
-                if (inputShape[0] != targetShape[0])
-                    targetShape = new long[]{targetShape[2], targetShape[0], targetShape[1]};
-                preprocessor = new ReshapePreprocessor(inputShape, targetShape, false);
+                preprocessor = new ReshapePreprocessor(inputShape, targetShape, false, CNN2DFormat.NHWC);
            }

        } else if (inputType[0] instanceof InputType.InputTypeConvolutional3D) {
@ -128,23 +127,23 @@ public class KerasReshape extends KerasLayer {
                } else {
                    targetShape = new long[] { targetShape[2], targetShape[1], targetShape[0], targetShape[3] };
                }
-                preprocessor = new ReshapePreprocessor(inputShape, targetShape, false);
+                preprocessor = new ReshapePreprocessor(inputShape, targetShape, false, null);
            } else {
                if (inputShape[0] != targetShape[0])
                    targetShape = new long[] { targetShape[3], targetShape[0], targetShape[1], targetShape[2] };
-                preprocessor = new ReshapePreprocessor(inputShape, targetShape, false);
+                preprocessor = new ReshapePreprocessor(inputShape, targetShape, false, null);
            }
        }  else if (inputType[0] instanceof InputType.InputTypeRecurrent) {
            InputType.InputTypeRecurrent it = (InputType.InputTypeRecurrent) inputType[0];
            val inputShape = new long[]{it.getSize(), it.getTimeSeriesLength()};
-            preprocessor = new ReshapePreprocessor(inputShape, this.targetShape, false);
+            preprocessor = new ReshapePreprocessor(inputShape, this.targetShape, false, null);
        } else if (inputType[0] instanceof InputType.InputTypeFeedForward) {
            InputType.InputTypeFeedForward it = (InputType.InputTypeFeedForward) inputType[0];
            val inputShape = new long[]{it.getSize()};
            if (targetShape.length == 3) {
                targetShape = targetShapeForDimOrder(inputShape, targetShape);
            }
-            preprocessor = new ReshapePreprocessor(inputShape, this.targetShape, false);
+            preprocessor = new ReshapePreprocessor(inputShape, this.targetShape, false, null);
        }
        return preprocessor;
    }
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java
@ -21,6 +21,7 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.EmbeddingSequenceLayer;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
@ -121,6 +122,7 @@ public class KerasEmbedding extends KerasLayer {
                .biasInit(0.0)
                .l1(this.weightL1Regularization)
                .l2(this.weightL2Regularization)
+                .outputDataFormat(RNNFormat.NWC)
                .hasBias(false);
        if (embeddingConstraint != null)
            builder.constrainWeights(embeddingConstraint);
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java
@ -22,11 +22,9 @@ import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.conf.layers.FeedForwardLayer;
-import org.deeplearning4j.nn.conf.layers.InputTypeUtil;
-import org.deeplearning4j.nn.conf.layers.LSTM;
-import org.deeplearning4j.nn.conf.layers.Layer;
+import org.deeplearning4j.nn.conf.layers.*;
 import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep;
 import org.deeplearning4j.nn.conf.layers.util.MaskZeroLayer;
 import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer;
@ -37,6 +35,7 @@ import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
 import org.deeplearning4j.nn.params.LSTMParamInitializer;
 import org.deeplearning4j.nn.weights.IWeightInit;
+import org.deeplearning4j.util.TimeSeriesUtils;
 import org.nd4j.linalg.activations.IActivation;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
@ -187,7 +186,7 @@ public class KerasLSTM extends KerasLayer {
                .weightInitRecurrent(recurrentInit)
                .biasInit(0.0) // TODO: this is incorrect
                .l1(this.weightL1Regularization)
-                .l2(this.weightL2Regularization);
+                .l2(this.weightL2Regularization).dataFormat(RNNFormat.NWC);
        Integer nIn = KerasLayerUtils.getNInFromInputDim(layerConfig, conf);
        if(nIn != null)
            builder.setNIn(nIn);
@ -266,7 +265,8 @@ public class KerasLSTM extends KerasLayer {
            throw new InvalidKerasConfigurationException("Keras LSTM layer accepts only one single input" +
                    "or three (input to LSTM and two states tensors, but " +
                    "received " + inputType.length + ".");
-        return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], layerName);
+        RNNFormat f = TimeSeriesUtils.getFormatFromRnnLayer(layer);
+        return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], f,layerName);
    }

    /**
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java
@ -21,7 +21,9 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.layers.BaseRecurrentLayer;
 import org.deeplearning4j.nn.conf.layers.FeedForwardLayer;
 import org.deeplearning4j.nn.conf.layers.InputTypeUtil;
 import org.deeplearning4j.nn.conf.layers.Layer;
@ -36,6 +38,7 @@ import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
 import org.deeplearning4j.nn.params.SimpleRnnParamInitializer;
 import org.deeplearning4j.nn.weights.IWeightInit;
+import org.deeplearning4j.util.TimeSeriesUtils;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.primitives.Pair;

@ -155,7 +158,7 @@ public class KerasSimpleRnn extends KerasLayer {
                .weightInitRecurrent(recurrentInit)
                .biasInit(0.0)
                .l1(this.weightL1Regularization)
-                .l2(this.weightL2Regularization);
+                .l2(this.weightL2Regularization).dataFormat(RNNFormat.NWC);
        Integer nIn = KerasLayerUtils.getNInFromInputDim(layerConfig, conf);
        if(nIn != null)
            builder.setNIn(nIn);
@ -227,7 +230,8 @@ public class KerasSimpleRnn extends KerasLayer {
            throw new InvalidKerasConfigurationException(
                    "Keras SimpleRnn layer accepts only one input (received " + inputType.length + ")");

-        return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], layerName);
+        RNNFormat f = TimeSeriesUtils.getFormatFromRnnLayer(layer);
+        return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], f, layerName);
    }

    /**
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/wrappers/KerasBidirectional.java
@ -147,7 +147,7 @@ public class KerasBidirectional extends KerasLayer {
                break;
            case "SimpleRNN":
                kerasRnnlayer = new KerasSimpleRnn(innerRnnConfig, enforceTrainingConfig, previousLayers);
-                SimpleRnn rnnLayer = (SimpleRnn) ((KerasSimpleRnn) kerasRnnlayer).getSimpleRnnLayer();
+                Layer rnnLayer = ((KerasSimpleRnn) kerasRnnlayer).getSimpleRnnLayer();
                this.layer = new Bidirectional(mode, rnnLayer);
                layer.setLayerName(layerName);
                break;
@ -218,7 +218,7 @@ public class KerasBidirectional extends KerasLayer {
        if (inputType.length > 1)
            throw new InvalidKerasConfigurationException(
                    "Keras Bidirectional layer accepts only one input (received " + inputType.length + ")");
-        return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], layerName);
+        return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType[0], ((Bidirectional)layer).getRNNDataFormat(), layerName);
    }

    /**
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/ReshapePreprocessor.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/ReshapePreprocessor.java
@ -21,6 +21,9 @@ import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import lombok.val;
+import org.deeplearning4j.nn.conf.CNN2DFormat;
+import org.deeplearning4j.nn.conf.DataFormat;
+import org.deeplearning4j.nn.conf.RNNFormat;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.inputs.InvalidInputTypeException;
 import org.deeplearning4j.nn.conf.preprocessor.BaseInputPreProcessor;
@ -54,25 +57,30 @@ public class ReshapePreprocessor extends BaseInputPreProcessor {
    private final long[] inputShape;
    private final long[] targetShape;
    private boolean hasMiniBatchDimension;
-
-    /**
-     * @deprecated Use constructor {@link #ReshapePreprocessor(long[], long[], boolean)}
-     */
-    @Deprecated
-    public ReshapePreprocessor(long[] inputShape, long[] targetShape) {
-        this(inputShape, targetShape, false);
-    }
+    private DataFormat format;

    /**
     * @param inputShape            Input shape, with or without leading minibatch dimension, depending on value of hasMiniBatchDimension
     * @param targetShape           Target shape, with or without leading minibatch dimension, depending on value of hasMiniBatchDimension
     * @param hasMiniBatchDimension If true: shapes should be of the form [minibatch, x, y, ...]; if false: shapes should be of form [x, y, ...]
     */
+    public ReshapePreprocessor(long[] inputShape, long[] targetShape, boolean hasMiniBatchDimension) {
+        this(inputShape, targetShape, hasMiniBatchDimension, null);
+    }
+
+    /**
+     * @param inputShape            Input shape, with or without leading minibatch dimension, depending on value of hasMiniBatchDimension
+     * @param targetShape           Target shape, with or without leading minibatch dimension, depending on value of hasMiniBatchDimension
+     * @param hasMiniBatchDimension If true: shapes should be of the form [minibatch, x, y, ...]; if false: shapes should be of form [x, y, ...]
+     * @param dataFormat            May be null. If non-null:
+     */
    public ReshapePreprocessor(@JsonProperty("inputShape") long[] inputShape, @JsonProperty("targetShape") long[] targetShape,
-                               @JsonProperty("hasMiniBatchDimension") boolean hasMiniBatchDimension) {
+                               @JsonProperty("hasMiniBatchDimension") boolean hasMiniBatchDimension,
+                               @JsonProperty("dataFormat") DataFormat dataFormat) {
        this.inputShape = inputShape;
        this.targetShape = targetShape;
        this.hasMiniBatchDimension = hasMiniBatchDimension;
+        this.format = dataFormat;
    }

    private long[] getShape(long[] originalShape, long minibatch) {
@ -140,13 +148,26 @@ public class ReshapePreprocessor extends BaseInputPreProcessor {
                ret = InputType.feedForward(shape[1]);
                break;
            case 3:
-                ret = InputType.recurrent(shape[2], shape[1]);
+                RNNFormat format = RNNFormat.NCW;
+                if(this.format != null && this.format instanceof RNNFormat)
+                    format = (RNNFormat)this.format;
+
+                ret = InputType.recurrent(shape[2], shape[1], format);
                break;
            case 4:
                if (inputShape.length == 1 || inputType.getType() == InputType.Type.RNN) {
                    ret = InputType.convolutional(shape[1], shape[2], shape[3]);
                } else {
-                    ret = InputType.convolutional(shape[2], shape[3], shape[1]);
+
+                    CNN2DFormat cnnFormat = CNN2DFormat.NCHW;
+                    if (this.format != null && this.format instanceof CNN2DFormat)
+                        cnnFormat = (CNN2DFormat) this.format;
+
+                    if (cnnFormat == CNN2DFormat.NCHW) {
+                        ret = InputType.convolutional(shape[2], shape[3], shape[1], cnnFormat);
+                    } else {
+                        ret = InputType.convolutional(shape[1], shape[2], shape[3], cnnFormat);
+                    }
                }
                break;
            default:
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/TensorFlowCnnToFeedForwardPreProcessor.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/TensorFlowCnnToFeedForwardPreProcessor.java
@ -27,26 +27,25 @@ import org.nd4j.shade.jackson.annotation.JsonCreator;
 import org.nd4j.shade.jackson.annotation.JsonProperty;

 /**
- * Specialized CnnToFeedForwardInputPreProcessor for use with
- * Convolutional layers imported from Keras using the TensorFlow
- * backend.
- *
- * @author dave@skymind.io
+ * @deprecated Exists only for backward compatibility of older pretrained models. Should not be used.
+ * Use {@link CnnToFeedForwardPreProcessor} for all new models instead.
 */
-@Slf4j
+@Slf4j @Deprecated
 public class TensorFlowCnnToFeedForwardPreProcessor extends CnnToFeedForwardPreProcessor {

-    @JsonCreator
+    @JsonCreator @Deprecated
    public TensorFlowCnnToFeedForwardPreProcessor(@JsonProperty("inputHeight") long inputHeight,
                                                  @JsonProperty("inputWidth") long inputWidth,
                                                  @JsonProperty("numChannels") long numChannels) {
        super(inputHeight, inputWidth, numChannels);
    }

+    @Deprecated
    public TensorFlowCnnToFeedForwardPreProcessor(long inputHeight, long inputWidth) {
        super(inputHeight, inputWidth);
    }

+    @Deprecated
    public TensorFlowCnnToFeedForwardPreProcessor() {
        super();
    }
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasLayerUtils.java
@ -31,6 +31,7 @@ import org.deeplearning4j.nn.modelimport.keras.layers.advanced.activations.*;
 import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.*;
 import org.deeplearning4j.nn.modelimport.keras.layers.core.*;
 import org.deeplearning4j.nn.modelimport.keras.layers.embeddings.KerasEmbedding;
+import org.deeplearning4j.nn.modelimport.keras.layers.local.KerasLocallyConnected1D;
 import org.deeplearning4j.nn.modelimport.keras.layers.noise.KerasAlphaDropout;
 import org.deeplearning4j.nn.modelimport.keras.layers.noise.KerasGaussianDropout;
 import org.deeplearning4j.nn.modelimport.keras.layers.noise.KerasGaussianNoise;
@ -319,6 +320,8 @@ public class KerasLayerUtils {
            layer = new KerasELU(layerConfig, enforceTrainingConfig);
        } else if(layerClassName.equals(conf.getLAYER_CLASS_NAME_SOFTMAX())){
            layer = new KerasSoftmax(layerConfig, enforceTrainingConfig);
+        } else if (layerClassName.equals(conf.getLAYER_CLASS_NAME_LOCALLY_CONNECTED_1D())){
+            layer = new KerasLocallyConnected1D(layerConfig, enforceTrainingConfig);
        } else if (conf instanceof Keras2LayerConfiguration){
            Keras2LayerConfiguration k2conf = (Keras2LayerConfiguration)conf;
            if (layerClassName.equals(k2conf.getTENSORFLOW_OP_LAYER())){
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/TFKerasTests.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/TFKerasTests.java
@ -1,50 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2020 Konduit K.K.
- *
- * This program and the accompanying materials are made available under the
- * terms of the Apache License, Version 2.0 which is available at
- * https://www.apache.org/licenses/LICENSE-2.0.
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- ******************************************************************************/
-
-package org.deeplearning4j.nn.modelimport.keras;
-
-import org.deeplearning4j.BaseDL4JTest;
-import org.deeplearning4j.nn.graph.ComputationGraph;
-import org.junit.Assert;
-import org.junit.Test;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.resources.Resources;
-
-import java.io.File;
-import java.util.Arrays;
-
-public class TFKerasTests extends BaseDL4JTest{
-
-    @Test
-    public void testModelWithTFOp1() throws Exception{
-        File f = Resources.asFile("modelimport/keras/tfkeras/reshape.h5");
-       ComputationGraph graph = KerasModelImport.importKerasModelAndWeights(f.getAbsolutePath());
-        INDArray out = graph.outputSingle(Nd4j.zeros(12, 2, 3));
-        Assert.assertArrayEquals(new long[]{12, 3}, out.shape());
-    }
-
-    @Test
-    public void testModelWithTFOp2() throws Exception{
-        File f = Resources.asFile("modelimport/keras/tfkeras/permute.h5");
-        ComputationGraph graph = KerasModelImport.importKerasModelAndWeights(f.getAbsolutePath());
-        INDArray out = graph.outputSingle(Nd4j.zeros(12, 2, 3));
-        // dl4j's feedforward doesn't support 3D output, so batch and time axes gets squashed
-        long[] expectedShape = new long[]{12 * 2, 5};
-        Assert.assertArrayEquals(expectedShape, out.shape());
-    }
-
-}
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/TestTFKerasModelImport.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/TestTFKerasModelImport.java
@ -0,0 +1,147 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.deeplearning4j.nn.modelimport.keras;
+
+import org.apache.commons.io.FileUtils;
+import org.datavec.python.keras.Model;
+import org.deeplearning4j.BaseDL4JTest;
+import org.deeplearning4j.nn.graph.ComputationGraph;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.nd4j.common.tests.ResourceUtils;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.concurrency.AffinityManager;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.resources.Resources;
+
+import java.io.File;
+import java.util.List;
+
+
+@RunWith(Parameterized.class)
+public class TestTFKerasModelImport extends BaseDL4JTest{
+
+    @Rule
+    public TemporaryFolder testDir = new TemporaryFolder();
+
+    private String modelFile;
+
+    @Override
+    public long getTimeoutMilliseconds(){
+        return 300000;
+    } // installing TF will take a while
+
+
+    @Parameterized.Parameters(name = "file={0}")
+    public static Object[] params() throws Exception {
+        List<String> paths = ResourceUtils.listClassPathFiles("modelimport/keras/tfkeras", true, false);
+       return paths.toArray(new String[0]);
+    }
+
+    public TestTFKerasModelImport(String modelFile){
+        this.modelFile = modelFile;
+    }
+
+    @Test
+    public void testModelImport() throws Exception{
+        testModelImportWithData(modelFile);
+    }
+
+    private void testModelImportWithData(String path) throws Exception{
+        System.out.println(path);
+        // TODO multi input/output
+        INDArray inputArray;
+        INDArray expectedOutputArray;
+        File f = Resources.asFile(path);        //May in in JAR that HDF5 can't read from
+        File modelFile = new File(testDir.getRoot(), f.getName());
+        FileUtils.copyFile(f, modelFile);
+
+        synchronized (Hdf5Archive.LOCK_OBJECT){
+            Hdf5Archive hdf5Archive = new Hdf5Archive(modelFile.getAbsolutePath());
+            List<String> rootGroups = hdf5Archive.getGroups();
+            if (rootGroups.contains("data")){
+                String inputName = hdf5Archive.readAttributeAsString("input_names", "data");
+                String outputName = hdf5Archive.readAttributeAsString("output_names", "data");
+                inputArray = hdf5Archive.readDataSet(inputName, "data");
+                expectedOutputArray = hdf5Archive.readDataSet(outputName, "data");
+            }
+            else{
+                hdf5Archive.close();
+                return;
+            }
+            hdf5Archive.close();
+        }
+        INDArray outputArray;
+
+        ComputationGraph dl4jModel = KerasModelImport.importKerasModelAndWeights(path);
+        outputArray = dl4jModel.outputSingle(inputArray);
+
+        expectedOutputArray = expectedOutputArray.castTo(DataType.FLOAT);
+        outputArray = outputArray.castTo(DataType.FLOAT);
+        if (path.contains("misc_")){
+            //shape relaxation
+            expectedOutputArray = expectedOutputArray.reshape( -1);
+            outputArray = outputArray.reshape(-1);
+        }
+
+        System.out.println(outputArray.toString());
+        System.out.println(expectedOutputArray.toString());
+        Assert.assertArrayEquals(expectedOutputArray.shape(), outputArray.shape());
+        Assert.assertTrue(expectedOutputArray.equalsWithEps(outputArray, 1e-3));
+    }
+
+    private void testModelImportWithKeras(String path) throws Exception{
+        Model kerasModel = new Model(path);
+        ComputationGraph dl4jModel = KerasModelImport.importKerasModelAndWeights(path);
+        Assert.assertEquals(kerasModel.numInputs(), dl4jModel.getNumInputArrays());
+        Assert.assertEquals(kerasModel.numOutputs(), dl4jModel.getNumOutputArrays());
+        INDArray[] kerasInputArrays = new INDArray[kerasModel.numInputs()];
+        INDArray[] dl4jInputArrays = new INDArray[kerasModel.numInputs()];
+
+        for (int i = 0; i < kerasInputArrays.length; i ++) {
+            long[] shape = kerasModel.inputShapeAt(i);
+            for (int j = 0; j < shape.length; j++) {
+                if (shape[j] < 0) {
+                    shape[j] = 1;
+                }
+            }
+
+            kerasInputArrays[i] = Nd4j.rand(shape);
+        }
+
+        INDArray[] kerasOut = kerasModel.predict(kerasInputArrays);
+        INDArray[] dl4jOut = dl4jModel.output(dl4jInputArrays);
+
+        Assert.assertEquals(kerasOut.length, dl4jOut.length);
+
+        for (int i = 0; i < kerasOut.length; i++){
+            INDArray kerasOutArr = kerasOut[i];
+            kerasOutArr = kerasOutArr.reshape(1, -1);// bit of relaxation on shape
+            kerasOutArr= kerasOutArr.castTo(DataType.DOUBLE);
+            Nd4j.getAffinityManager().ensureLocation(dl4jOut[i], AffinityManager.Location.HOST);
+            INDArray dl4jOutArr = dl4jOut[i].reshape(1, -1);
+            System.out.println(kerasOutArr.shapeInfoToString());
+            System.out.println(dl4jOutArr.shapeInfoToString());
+            Assert.assertEquals(kerasOutArr, dl4jOutArr);
+        }
+    }
+}
--- a/Show More
+++ b/Show More