diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java
index 297067862..751b6f6bf 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTMTest.java
@@ -186,7 +186,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
                         lstm.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS),
                         lstm.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS), false, null, null,
                         false, true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, null, true,
-                        null, CacheMode.NONE, LayerWorkspaceMgr.noWorkspaces()).fwdPassOutput;
+                        null, CacheMode.NONE, LayerWorkspaceMgr.noWorkspaces(), true).fwdPassOutput;
 
         final INDArray[] fwdPassTrue = LSTMHelpers.activateHelper(lstm, lstm.conf(), new ActivationSigmoid(),
                         lstm.input(),
@@ -194,7 +194,7 @@ public class GravesBidirectionalLSTMTest extends BaseDL4JTest {
                         lstm.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS),
                         lstm.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS), false, null, null,
                         true, true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, null, true, null,
-                        CacheMode.NONE, LayerWorkspaceMgr.noWorkspaces()).fwdPassOutputAsArrays;
+                        CacheMode.NONE, LayerWorkspaceMgr.noWorkspaces(), true).fwdPassOutputAsArrays;
 
         //I have no idea what the heck this does --Ben
         for (int i = 0; i < timeSeriesLength; i++) {
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java
index f9af153ad..ee8bbdc64 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/dropout/Dropout.java
@@ -18,6 +18,8 @@ package org.deeplearning4j.nn.conf.dropout;
 
 import lombok.Data;
 import lombok.EqualsAndHashCode;
+import lombok.Getter;
+import lombok.Setter;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.workspace.ArrayType;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
@@ -26,11 +28,11 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.MulOp;
 import org.nd4j.linalg.api.ops.random.impl.DropOutInverted;
+import org.nd4j.linalg.exception.ND4JOpProfilerException;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.schedule.ISchedule;
 import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties;
 import org.nd4j.shade.jackson.annotation.JsonProperty;
-import org.nd4j.util.OneTimeLogger;
 
 /**
  * Implements standard (inverted) dropout.<br>
@@ -64,17 +66,29 @@ import org.nd4j.util.OneTimeLogger;
  * @author Alex Black
  */
 @Data
-@JsonIgnoreProperties({"mask", "helper"})
-@EqualsAndHashCode(exclude = {"mask", "helper"})
+@JsonIgnoreProperties({"mask", "helper", "helperCountFail"})
+@EqualsAndHashCode(exclude = {"mask", "helper", "helperCountFail"})
 @Slf4j
 public class Dropout implements IDropout {
 
+    /**
+     * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed?
+     * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in
+     * (non-CuDNN) implementation for LSTM/GravesLSTM will be used
+     *
+     */
+    @Getter
+    @Setter
+    protected boolean helperAllowFallback = true;
+
     private double p;
     private ISchedule pSchedule;
     private transient INDArray mask;
     private transient DropoutHelper helper;
     private boolean initializedHelper = false;
 
+    private int helperCountFail = 0;
+
     /**
      * @param activationRetainProbability Probability of retaining an activation - see {@link Dropout} javadoc
      */
@@ -96,6 +110,18 @@ public class Dropout implements IDropout {
         this(Double.NaN, activationRetainProbabilitySchedule);
     }
 
+    /**
+     * When using a helper (CuDNN or MKLDNN in some cases) and an error is encountered, should fallback to the non-helper implementation be allowed?
+     * If set to false, an exception in the helper will be propagated back to the user. If false, the built-in
+     * (non-helper) implementation for Dropout will be used
+     *
+     * @param allowFallback Whether fallback to non-helper implementation should be used
+     */
+    public Dropout helperAllowFallback(boolean allowFallback) {
+        this.setHelperAllowFallback(allowFallback);
+        return this;
+    }
+
     protected Dropout(@JsonProperty("p") double activationRetainProbability, @JsonProperty("pSchedule") ISchedule activationRetainProbabilitySchedule) {
         this.p = activationRetainProbability;
         this.pSchedule = activationRetainProbabilitySchedule;
@@ -141,9 +167,29 @@ public class Dropout implements IDropout {
             initializeHelper(output.dataType());
         }
 
-        if(helper != null){
-            helper.applyDropout(inputActivations, output, p);
-            return output;
+        if(helper != null && (helperCountFail == 0 || !isHelperAllowFallback())){
+            boolean helperWorked = false;
+            try {
+                helper.applyDropout(inputActivations, output, p);
+                helperWorked = true;
+            }catch (ND4JOpProfilerException e){
+                throw e;    //NaN panic etc for debugging
+            } catch (Exception e){
+                if(e.getMessage().contains("Failed to allocate")){
+                    //This is a memory exception - don't fallback to built-in implementation
+                    throw e;
+                }
+
+                if(isHelperAllowFallback()){
+                    helperCountFail++;
+                    log.warn("CuDNN execution failed - falling back on built-in implementation",e);
+                } else {
+                    throw new RuntimeException("Error during Dropout CuDNN helper forward pass - helperAllowFallback() is set to false", e);
+                }
+            }
+
+            if(helperWorked)
+                return output;
         }
 
         INDArray inputCast = inputActivations;
@@ -159,9 +205,29 @@ public class Dropout implements IDropout {
 
     @Override
     public INDArray backprop(INDArray gradAtOutput, INDArray gradAtInput, int iteration, int epoch) {
-        if(helper != null){
-            helper.backprop(gradAtOutput, gradAtInput);
-            return gradAtInput;
+        if(helper != null && (helperCountFail == 0 || !isHelperAllowFallback())){
+            boolean helperWorked = false;
+            try {
+                helper.backprop(gradAtOutput, gradAtInput);
+                helperWorked = true;
+            }catch (ND4JOpProfilerException e){
+                throw e;    //NaN panic etc for debugging
+            } catch (Exception e){
+                if(e.getMessage().contains("Failed to allocate")){
+                    //This is a memory exception - don't fallback to built-in implementation
+                    throw e;
+                }
+
+                if(isHelperAllowFallback()){
+                    helperCountFail++;
+                    log.warn("CuDNN execution failed - falling back on built-in implementation",e);
+                } else {
+                    throw new RuntimeException("Error during Dropout CuDNN helper backprop - helperAllowFallback() is set to false", e);
+                }
+            }
+
+            if(helperWorked)
+                return gradAtInput;
         }
 
         Preconditions.checkState(mask != null, "Cannot perform backprop: Dropout mask array is absent (already cleared?)");
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java
index 82bda5647..b051c4b36 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/AbstractLSTM.java
@@ -17,8 +17,6 @@
 package org.deeplearning4j.nn.conf.layers;
 
 import lombok.*;
-import org.deeplearning4j.nn.params.LSTMParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.activations.IActivation;
 import org.nd4j.linalg.activations.impl.ActivationSigmoid;
@@ -35,11 +33,13 @@ public abstract class AbstractLSTM extends BaseRecurrentLayer {
 
     protected double forgetGateBiasInit;
     protected IActivation gateActivationFn = new ActivationSigmoid();
+    protected boolean helperAllowFallback = true;
 
     protected AbstractLSTM(Builder builder) {
         super(builder);
         this.forgetGateBiasInit = builder.forgetGateBiasInit;
         this.gateActivationFn = builder.gateActivationFn;
+        this.helperAllowFallback = builder.helperAllowFallback;
     }
 
     @AllArgsConstructor
@@ -60,6 +60,14 @@ public abstract class AbstractLSTM extends BaseRecurrentLayer {
          */
         protected IActivation gateActivationFn = new ActivationSigmoid();
 
+        /**
+         * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed?
+         * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in
+         * (non-CuDNN) implementation for LSTM/GravesLSTM will be used
+         *
+         */
+        protected boolean helperAllowFallback = true;
+
         /**
          * Set forget gate bias initalizations. Values in range 1-5 can potentially help with learning or longer-term
          * dependencies.
@@ -100,6 +108,18 @@ public abstract class AbstractLSTM extends BaseRecurrentLayer {
             return (T) this;
         }
 
+        /**
+         * When using a helper (CuDNN or MKLDNN in some cases) and an error is encountered, should fallback to the non-helper implementation be allowed?
+         * If set to false, an exception in the helper will be propagated back to the user. If false, the built-in
+         * (non-helper) implementation for LSTM/GravesLSTM will be used
+         *
+         * @param allowFallback Whether fallback to non-helper implementation should be used
+         */
+        public T helperAllowFallback(boolean allowFallback) {
+            this.setHelperAllowFallback(allowFallback);
+            return (T) this;
+        }
+
     }
 
 }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java
index 53c00acac..4c470fec5 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java
@@ -428,16 +428,31 @@ public class BatchNormalization extends FeedForwardLayer {
 
         /**
          * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed?
-         * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in
+         * If set to false, an exception in CuDNN will be propagated back to the user. If true, the built-in
          * (non-CuDNN) implementation for BatchNormalization will be used
          *
+         * @deprecated Use {@link #helperAllowFallback(boolean)}
+         *
          * @param allowFallback Whether fallback to non-CuDNN implementation should be used
          */
+        @Deprecated
         public Builder cudnnAllowFallback(boolean allowFallback) {
             this.setCudnnAllowFallback(allowFallback);
             return this;
         }
 
+        /**
+         * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed?
+         * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in
+         * (non-MKL/CuDNN) implementation for BatchNormalizationLayer will be used
+         *
+         * @param allowFallback Whether fallback to non-CuDNN implementation should be used
+         */
+        public Builder helperAllowFallback(boolean allowFallback) {
+            this.cudnnAllowFallback = allowFallback;
+            return this;
+        }
+
         /**
          * How should the moving average of variance be stored? Two different parameterizations are supported.
          * useLogStd(false): equivalent to 1.0.0-beta3 and earlier. The variance "parameter" is stored directly as
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
index 3d2e35d24..4fdf1e9cc 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/ConvolutionLayer.java
@@ -533,14 +533,29 @@ public class ConvolutionLayer extends FeedForwardLayer {
 
         /**
          * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed?
-         * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in
+         * If set to false, an exception in CuDNN will be propagated back to the user. If true, the built-in
          * (non-CuDNN) implementation for ConvolutionLayer will be used
          *
+         * @deprecated Use {@link #helperAllowFallback(boolean)}
+         *
          * @param allowFallback Whether fallback to non-CuDNN implementation should be used
          */
+        @Deprecated
         public T cudnnAllowFallback(boolean allowFallback) {
             this.setCudnnAllowFallback(allowFallback);
             return (T) this;
         }
+
+        /**
+         * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed?
+         * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in
+         * (non-MKL/CuDNN) implementation for ConvolutionLayer will be used
+         *
+         * @param allowFallback Whether fallback to non-CuDNN implementation should be used
+         */
+        public T helperAllowFallback(boolean allowFallback) {
+            this.cudnnAllowFallback = allowFallback;
+            return (T) this;
+        }
     }
 }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java
index d7aa869a1..1a2a89a24 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.java
@@ -53,11 +53,13 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer {
 
     private double forgetGateBiasInit;
     private IActivation gateActivationFn = new ActivationSigmoid();
+    protected boolean helperAllowFallback = true;
 
     private GravesBidirectionalLSTM(Builder builder) {
         super(builder);
         this.forgetGateBiasInit = builder.forgetGateBiasInit;
         this.gateActivationFn = builder.gateActivationFn;
+        this.helperAllowFallback = builder.helperAllowFallback;
 
         initializeConstraints(builder);
     }
@@ -123,6 +125,14 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer {
          */
         private IActivation gateActivationFn = new ActivationSigmoid();
 
+        /**
+         * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed?
+         * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in
+         * (non-CuDNN) implementation for GravesBidirectionalLSTM will be used
+         *
+         */
+        protected boolean helperAllowFallback = true;
+
         /**
          * Set forget gate bias initalizations. Values in range 1-5 can potentially help with learning or longer-term
          * dependencies.
@@ -163,6 +173,18 @@ public class GravesBidirectionalLSTM extends BaseRecurrentLayer {
             return this;
         }
 
+        /**
+         * When using a helper (CuDNN or MKLDNN in some cases) and an error is encountered, should fallback to the non-helper implementation be allowed?
+         * If set to false, an exception in the helper will be propagated back to the user. If false, the built-in
+         * (non-helper) implementation for GravesBidirectionalLSTM will be used
+         *
+         * @param allowFallback Whether fallback to non-helper implementation should be used
+         */
+        public Builder helperAllowFallback(boolean allowFallback) {
+            this.setHelperAllowFallback(allowFallback);
+            return (Builder) this;
+        }
+
         @SuppressWarnings("unchecked")
         public GravesBidirectionalLSTM build() {
             return new GravesBidirectionalLSTM(this);
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java
index dfc2df9c8..b16703569 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/LocalResponseNormalization.java
@@ -238,16 +238,31 @@ public class LocalResponseNormalization extends Layer {
 
         /**
          * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed?
-         * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in
+         * If set to false, an exception in CuDNN will be propagated back to the user. If true, the built-in
          * (non-CuDNN) implementation for BatchNormalization will be used
          *
+         * @deprecated Use {@link #helperAllowFallback(boolean)}
+         *
          * @param allowFallback Whether fallback to non-CuDNN implementation should be used
          */
+        @Deprecated
         public Builder cudnnAllowFallback(boolean allowFallback) {
             this.setCudnnAllowFallback(allowFallback);
             return this;
         }
 
+        /**
+         * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed?
+         * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in
+         * (non-MKL/CuDNN) implementation for LocalResponseNormalizationLayer will be used
+         *
+         * @param allowFallback Whether fallback to non-CuDNN implementation should be used
+         */
+        public Builder helperAllowFallback(boolean allowFallback) {
+            this.cudnnAllowFallback = allowFallback;
+            return this;
+        }
+
         @Override
         public LocalResponseNormalization build() {
             return new LocalResponseNormalization(this);
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java
index 877e216da..0d0ccba9b 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Subsampling3DLayer.java
@@ -455,15 +455,30 @@ public class Subsampling3DLayer extends NoParamLayer {
 
         /**
          * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed?
-         * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in
+         * If set to false, an exception in CuDNN will be propagated back to the user. If true, the built-in
          * (non-CuDNN) implementation for ConvolutionLayer will be used
          *
+         * @deprecated Use {@link #helperAllowFallback(boolean)}
+         *
          * @param allowFallback Whether fallback to non-CuDNN implementation should be used
          */
+        @Deprecated
         public T cudnnAllowFallback(boolean allowFallback) {
             this.setCudnnAllowFallback(allowFallback);
             return (T) this;
         }
+
+        /**
+         * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed?
+         * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in
+         * (non-MKL/CuDNN) implementation for Subsampling3DLayer will be used
+         *
+         * @param allowFallback Whether fallback to non-CuDNN implementation should be used
+         */
+        public T helperAllowFallback(boolean allowFallback) {
+            this.cudnnAllowFallback = allowFallback;
+            return (T) this;
+        }
     }
 
 }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
index b2e4df6b8..c20526cf1 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
@@ -480,17 +480,32 @@ public class SubsamplingLayer extends NoParamLayer {
         }
 
         /**
-         * When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation be allowed?
-         * If set to false, an exception in CuDNN will be propagated back to the user. If false, the built-in
-         * (non-CuDNN) implementation for ConvolutionLayer will be used
+         * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed?
+         * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in
+         * (non-MKL/CuDNN) implementation for ConvolutionLayer will be used
+         *
+         * @deprecated Use {@link #helperAllowFallback(boolean)}
          *
          * @param allowFallback Whether fallback to non-CuDNN implementation should be used
          */
+        @Deprecated
         public T cudnnAllowFallback(boolean allowFallback) {
             this.cudnnAllowFallback = allowFallback;
             return (T) this;
         }
 
+        /**
+         * When using CuDNN or MKLDNN and an error is encountered, should fallback to the non-helper implementation be allowed?
+         * If set to false, an exception in the helper will be propagated back to the user. If true, the built-in
+         * (non-MKL/CuDNN) implementation for SubsamplingLayer will be used
+         *
+         * @param allowFallback Whether fallback to non-CuDNN implementation should be used
+         */
+        public T helperAllowFallback(boolean allowFallback) {
+            this.cudnnAllowFallback = allowFallback;
+            return (T) this;
+        }
+
         /**
          * When doing average pooling, should the padding values be included in the divisor or not?<br>
          * Not applicable for max and p-norm pooling.<br>
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
index d6cab0273..75e265b4e 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/convolution/ConvolutionLayer.java
@@ -378,7 +378,7 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
                         log.warn("CuDNN execution failed - falling back on built-in implementation",e);
                     }
                 } else {
-                    throw new RuntimeException(e);
+                    throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", e);
                 }
             }
             if (ret != null) {
@@ -453,8 +453,30 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
         //String afn = conf.getLayer().getActivationFunction();
         IActivation afn = layerConf().getActivationFn();
 
-        if (helper != null && Shape.strideDescendingCAscendingF(z)) {
-            INDArray ret = helper.activate(z, layerConf().getActivationFn(), training);
+        if (helper != null && Shape.strideDescendingCAscendingF(z) && (helperCountFail == 0 || !layerConf().isCudnnAllowFallback())) {
+            INDArray ret = null;
+            try {
+                ret = helper.activate(z, layerConf().getActivationFn(), training);
+            } catch (ND4JOpProfilerException e){
+                throw e;    //NaN panic etc for debugging
+            } catch (Exception e) {
+                if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
+                    //This is a memory exception - don't fallback to built-in implementation
+                    throw e;
+                }
+
+                if (layerConf().isCudnnAllowFallback()) {
+                    helperCountFail++;
+                    if (helper instanceof MKLDNNConvHelper) {
+                        log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
+                    } else {
+                        log.warn("CuDNN execution failed - falling back on built-in implementation", e);
+                    }
+                } else {
+                    throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", e);
+                }
+            }
+
             if (ret != null) {
                 return ret;
             }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/BaseMKLDNNHelper.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/BaseMKLDNNHelper.java
index c7af77002..d3a63d79e 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/BaseMKLDNNHelper.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/BaseMKLDNNHelper.java
@@ -22,7 +22,7 @@ import java.lang.reflect.Method;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 /**
- * Base class for MLK-DNN Helpers
+ * Base class for MKL-DNN Helpers
  * @author Alex Black
  */
 public class BaseMKLDNNHelper {
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java
index 1590adcc7..265946bd8 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/BaseRecurrentLayer.java
@@ -41,6 +41,8 @@ public abstract class BaseRecurrentLayer<LayerConfT extends org.deeplearning4j.n
      */
     protected Map<String, INDArray> tBpttStateMap = new ConcurrentHashMap<>();
 
+    protected int helperCountFail = 0;
+
     public BaseRecurrentLayer(NeuralNetConfiguration conf, DataType dataType) {
         super(conf, dataType);
     }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java
index 78e15e167..6fc96dc80 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java
@@ -17,7 +17,6 @@
 package org.deeplearning4j.nn.layers.recurrent;
 
 import lombok.extern.slf4j.Slf4j;
-import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.MaskState;
 import org.deeplearning4j.nn.conf.CacheMode;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -90,7 +89,8 @@ public class GravesBidirectionalLSTM
 
         final FwdPassReturn fwdPass = activateHelperDirectional(true, null, null, true, true, workspaceMgr);
 
-        final Pair<Gradient, INDArray> forwardsGradient = LSTMHelpers.backpropGradientHelper(this.conf,
+        final Pair<Gradient, INDArray> forwardsGradient = LSTMHelpers.backpropGradientHelper(this,
+                        this.conf,
                         this.layerConf().getGateActivationFn(), this.input,
                         getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS),
                         getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), epsilon,
@@ -98,13 +98,14 @@ public class GravesBidirectionalLSTM
                         GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS,
                         GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS,
                         GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS, gradientViews, maskArray, true,
-                        null, workspaceMgr);
+                        null, workspaceMgr, layerConf().isHelperAllowFallback());
 
 
 
         final FwdPassReturn backPass = activateHelperDirectional(true, null, null, true, false, workspaceMgr);
 
-        final Pair<Gradient, INDArray> backwardsGradient = LSTMHelpers.backpropGradientHelper(this.conf,
+        final Pair<Gradient, INDArray> backwardsGradient = LSTMHelpers.backpropGradientHelper(this,
+                        this.conf,
                         this.layerConf().getGateActivationFn(), this.input,
                         getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS),
                         getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS), epsilon,
@@ -112,7 +113,7 @@ public class GravesBidirectionalLSTM
                         GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS,
                         GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS,
                         GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS, gradientViews, maskArray, true,
-                        null, workspaceMgr);
+                        null, workspaceMgr, layerConf().isHelperAllowFallback());
 
 
         //merge the gradient, which is key value pair of String,INDArray
@@ -175,7 +176,7 @@ public class GravesBidirectionalLSTM
                             getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS), training, null, null,
                             forBackprop || (cacheMode != CacheMode.NONE && training), true,
                             GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, maskArray, true, null,
-                            forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr);
+                            forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, layerConf().isHelperAllowFallback());
 
             backwardsEval = LSTMHelpers.activateHelper(this, this.conf, this.layerConf().getGateActivationFn(),
                             this.input,
@@ -184,7 +185,7 @@ public class GravesBidirectionalLSTM
                             getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS), training, null, null,
                             forBackprop || (cacheMode != CacheMode.NONE && training), false,
                             GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, maskArray, true, null,
-                            forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr);
+                            forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, layerConf().isHelperAllowFallback());
 
             cachedPassForward = forwardsEval;
             cachedPassBackward = backwardsEval;
@@ -230,7 +231,7 @@ public class GravesBidirectionalLSTM
             return LSTMHelpers.activateHelper(this, this.conf, this.layerConf().getGateActivationFn(), this.input,
                             getParam(recurrentKey), getParam(inputKey), getParam(biasKey), training,
                             prevOutputActivations, prevMemCellState, forBackprop, forwards, inputKey, maskArray, true,
-                            null, forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr);
+                            null, forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, layerConf().isHelperAllowFallback());
         }
     }
 
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java
index a2f38b324..13f30b8bb 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java
@@ -17,7 +17,6 @@
 package org.deeplearning4j.nn.layers.recurrent;
 
 import lombok.extern.slf4j.Slf4j;
-import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.MaskState;
 import org.deeplearning4j.nn.conf.CacheMode;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -92,11 +91,12 @@ public class GravesLSTM extends BaseRecurrentLayer<org.deeplearning4j.nn.conf.la
         }
 
 
-        Pair<Gradient, INDArray> p = LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input,
+        Pair<Gradient, INDArray> p = LSTMHelpers.backpropGradientHelper(this,
+                        this.conf, this.layerConf().getGateActivationFn(), this.input,
                         recurrentWeights, inputWeights, epsilon, truncatedBPTT, tbpttBackwardLength, fwdPass, true,
                         GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY,
                         GravesLSTMParamInitializer.BIAS_KEY, gradientViews, maskArray, true, null,
-                        workspaceMgr);
+                        workspaceMgr, layerConf().isHelperAllowFallback());
 
         weightNoiseParams.clear();
         p.setSecond(backpropDropOutIfPresent(p.getSecond()));
@@ -141,7 +141,7 @@ public class GravesLSTM extends BaseRecurrentLayer<org.deeplearning4j.nn.conf.la
                         this.input, recurrentWeights, inputWeights, biases, training, prevOutputActivations,
                         prevMemCellState, forBackprop || (cacheMode != CacheMode.NONE && training), true,
                         GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, maskArray, true, null,
-                        cacheMode, workspaceMgr);
+                        cacheMode, workspaceMgr, layerConf().isHelperAllowFallback());
 
 
         if (training && cacheMode != CacheMode.NONE) {
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java
index 53bf363b5..c3e77bb99 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java
@@ -17,7 +17,6 @@
 package org.deeplearning4j.nn.layers.recurrent;
 
 import lombok.extern.slf4j.Slf4j;
-import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.MaskState;
 import org.deeplearning4j.nn.conf.CacheMode;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -32,8 +31,6 @@ import org.nd4j.linalg.primitives.Pair;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.nd4j.util.OneTimeLogger;
 
-import java.util.Properties;
-
 /**
  * LSTM layer implementation.
  *
@@ -116,10 +113,12 @@ public class LSTM extends BaseRecurrentLayer<org.deeplearning4j.nn.conf.layers.L
         }
 
 
-        Pair<Gradient,INDArray> p = LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input,
+        Pair<Gradient,INDArray> p = LSTMHelpers.backpropGradientHelper(this,
+                        this.conf, this.layerConf().getGateActivationFn(), this.input,
                         recurrentWeights, inputWeights, epsilon, truncatedBPTT, tbpttBackwardLength, fwdPass, true,
                         LSTMParamInitializer.INPUT_WEIGHT_KEY, LSTMParamInitializer.RECURRENT_WEIGHT_KEY,
-                        LSTMParamInitializer.BIAS_KEY, gradientViews, null, false, helper, workspaceMgr);
+                        LSTMParamInitializer.BIAS_KEY, gradientViews, null, false, helper, workspaceMgr,
+                        layerConf().isHelperAllowFallback());
 
         weightNoiseParams.clear();
         p.setSecond(backpropDropOutIfPresent(p.getSecond()));
@@ -161,7 +160,7 @@ public class LSTM extends BaseRecurrentLayer<org.deeplearning4j.nn.conf.layers.L
                         this.input, recurrentWeights, inputWeights, biases, training, prevOutputActivations,
                         prevMemCellState, (training && cacheMode != CacheMode.NONE) || forBackprop, true,
                         LSTMParamInitializer.INPUT_WEIGHT_KEY, maskArray, false, helper,
-                        forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr);
+                        forBackprop ? cacheMode : CacheMode.NONE, workspaceMgr, layerConf().isHelperAllowFallback());
 
         if (training && cacheMode != CacheMode.NONE) {
             cachedFwdPass = fwd;
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java
index 10cf9cde5..881e3d786 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java
@@ -29,6 +29,7 @@ import org.deeplearning4j.nn.conf.memory.MemoryReport;
 import org.deeplearning4j.nn.gradient.DefaultGradient;
 import org.deeplearning4j.nn.gradient.Gradient;
 import org.deeplearning4j.nn.layers.BaseLayer;
+import org.deeplearning4j.nn.layers.mkldnn.MKLDNNConvHelper;
 import org.deeplearning4j.nn.workspace.ArrayType;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.nd4j.linalg.activations.IActivation;
@@ -38,6 +39,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.MulOp;
 import org.nd4j.linalg.api.ops.impl.transforms.same.TimesOneMinus;
 import org.nd4j.linalg.api.shape.Shape;
+import org.nd4j.linalg.exception.ND4JOpProfilerException;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.indexing.NDArrayIndex;
 import org.nd4j.linalg.primitives.Pair;
@@ -81,7 +83,7 @@ public class LSTMHelpers {
      * Returns FwdPassReturn object with activations/INDArrays. Allows activateHelper to be used for forward pass, backward pass
      * and rnnTimeStep whilst being reasonably efficient for all
      */
-    static public FwdPassReturn activateHelper(final BaseLayer layer, final NeuralNetConfiguration conf,
+    static public FwdPassReturn activateHelper(final BaseRecurrentLayer layer, final NeuralNetConfiguration conf,
                                                final IActivation gateActivationFn, //Activation function for the gates - sigmoid or hard sigmoid (must be found in range 0 to 1)
                                                INDArray input, final INDArray recurrentWeights, //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
                                                final INDArray originalInputWeights, //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg]
@@ -91,7 +93,7 @@ public class LSTMHelpers {
                                                final String inputWeightKey, INDArray maskArray, //Input mask: should only be used with bidirectional RNNs + variable length
                                                final boolean hasPeepholeConnections, //True for GravesLSTM, false for LSTM
                                                final LSTMHelper helper, final CacheMode cacheMode, // cacheMode for layer calling this helper
-                                               final LayerWorkspaceMgr workspaceMgr
+                                               final LayerWorkspaceMgr workspaceMgr, boolean isHelperAllowFallback
                                                ) {
 
         //Mini-batch data format: for mini-batch size m, nIn inputs, and T time series length
@@ -198,10 +200,28 @@ public class LSTMHelpers {
             prevOutputActivations = Nd4j.zeros(input.dataType(), new long[] {miniBatchSize, hiddenLayerSize});
         }
 
-        if (helper != null) {
-            FwdPassReturn ret = helper.activate(layer, conf, gateActivationFn, input, recurrentWeights, inputWeights,
-                            biases, training, prevOutputActivations, prevMemCellState, forBackprop, forwards,
-                            inputWeightKey, maskArray, hasPeepholeConnections, workspaceMgr);
+        if (helper != null && (layer.helperCountFail == 0 || !isHelperAllowFallback)) {
+            FwdPassReturn ret = null;
+            try {
+                ret = helper.activate(layer, conf, gateActivationFn, input, recurrentWeights, inputWeights,
+                        biases, training, prevOutputActivations, prevMemCellState, forBackprop, forwards,
+                        inputWeightKey, maskArray, hasPeepholeConnections, workspaceMgr);
+            }catch (ND4JOpProfilerException e){
+                throw e;    //NaN panic etc for debugging
+            } catch (Exception e){
+                if(e.getMessage().contains("Failed to allocate")){
+                    //This is a memory exception - don't fallback to built-in implementation
+                    throw e;
+                }
+
+                if(isHelperAllowFallback){
+                    layer.helperCountFail++;
+                    log.warn("MKL/CuDNN execution failed - falling back on built-in implementation",e);
+                } else {
+                    throw new RuntimeException("Error during LSTM MKL/CuDNN helper forward pass - helperAllowFallback() is set to false", e);
+                }
+            }
+
             if (ret != null) {
                 return ret;
             }
@@ -424,7 +444,7 @@ public class LSTMHelpers {
         }
     }
 
-    static public Pair<Gradient, INDArray> backpropGradientHelper(final NeuralNetConfiguration conf,
+    static public Pair<Gradient, INDArray> backpropGradientHelper(final BaseRecurrentLayer layer, final NeuralNetConfiguration conf,
                     final IActivation gateActivationFn, INDArray input, final INDArray recurrentWeights, //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
                     final INDArray inputWeights, //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg]
                     final INDArray epsilon, final boolean truncatedBPTT, final int tbpttBackwardLength,
@@ -433,7 +453,8 @@ public class LSTMHelpers {
                     final Map<String, INDArray> gradientViews, INDArray maskArray, //Input mask: should only be used with bidirectional RNNs + variable length
                     final boolean hasPeepholeConnections, //True for GravesLSTM, false for LSTM
                     final LSTMHelper helper,
-                    final LayerWorkspaceMgr workspaceMgr) {
+                    final LayerWorkspaceMgr workspaceMgr,
+                    final boolean isHelperAllowFallback) {
 
         input = input.castTo(inputWeights.dataType());  //No-op if
 
@@ -496,11 +517,29 @@ public class LSTMHelpers {
             rwGradientsGG = rwGradientsOut.get(all(), NDArrayIndex.point(4 * hiddenLayerSize + 2)).reshape(1, recurrentWeights.size(0));
         }
 
-        if (helper != null) {
-            Pair<Gradient, INDArray> ret = helper.backpropGradient(conf, gateActivationFn, input, recurrentWeights,
-                            inputWeights, epsilon, truncatedBPTT, tbpttBackwardLength, fwdPass, forwards,
-                            inputWeightKey, recurrentWeightKey, biasWeightKey, gradientViews, maskArray,
-                            hasPeepholeConnections, workspaceMgr);
+        if (helper != null && (layer.helperCountFail == 0 || !isHelperAllowFallback)) {
+            Pair<Gradient, INDArray> ret = null;
+            try {
+                ret = helper.backpropGradient(conf, gateActivationFn, input, recurrentWeights,
+                        inputWeights, epsilon, truncatedBPTT, tbpttBackwardLength, fwdPass, forwards,
+                        inputWeightKey, recurrentWeightKey, biasWeightKey, gradientViews, maskArray,
+                        hasPeepholeConnections, workspaceMgr);
+            }catch (ND4JOpProfilerException e){
+                throw e;    //NaN panic etc for debugging
+            } catch (Exception e){
+                if(e.getMessage().contains("Failed to allocate")){
+                    //This is a memory exception - don't fallback to built-in implementation
+                    throw e;
+                }
+
+                if(isHelperAllowFallback){
+                    layer.helperCountFail++;
+                    log.warn("MKL/CuDNN execution failed - falling back on built-in implementation",e);
+                } else {
+                    throw new RuntimeException("Error during LSTM MKL/CuDNN helper backprop - helperAllowFallback() is set to false", e);
+                }
+            }
+
             if (ret != null) {
                 return ret;
             }
diff --git a/docs/deeplearning4j/templates/benchmark.md b/docs/deeplearning4j/templates/benchmark.md
index 330ff99a6..93e30fda9 100644
--- a/docs/deeplearning4j/templates/benchmark.md
+++ b/docs/deeplearning4j/templates/benchmark.md
@@ -45,7 +45,7 @@ Ideally, these should be excluded from any timing/performance results you report
 For example: what BLAS implementation (MKL, OpenBLAS, etc)? If you are using CUDA, are you using CuDNN?
 ND4J and DL4J can use these libraries (MKL, CuDNN) when they are available - but are not always available by default. If they are not made available, performance can be lower - sometimes considerably.
 
-This is especially important when comparing results between libraries: for example, if you compared two libraries (one using OpenBLAS, another using MLK) your results may simply reflect the performance differences it the BLAS library being used - and not the performance oth the libraries being tested. Similarly, one library with CuDNN and another without CuDNN may simply reflect the performance benefit of using CuDNN.
+This is especially important when comparing results between libraries: for example, if you compared two libraries (one using OpenBLAS, another using MKL) your results may simply reflect the performance differences it the BLAS library being used - and not the performance oth the libraries being tested. Similarly, one library with CuDNN and another without CuDNN may simply reflect the performance benefit of using CuDNN.
 
 
 3. How are things configured?