diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
index 99f8aeff0..00c0cf7d6 100755
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
@@ -2278,6 +2278,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
         LayerWorkspaceMgr allNone = noWS ? LayerWorkspaceMgr.noWorkspaces(helperWorkspaces) : null;
         List<MemoryWorkspace>[] closeAtEndIteraton = (List<MemoryWorkspace>[])new List[topologicalOrder.length];
         MemoryWorkspace initialWorkspace = Nd4j.getMemoryManager().getCurrentWorkspace();
+        Throwable t = null;
         try {
             for (int i = 0; i <= stopIndex; i++) {
                 GraphVertex current = vertices[topologicalOrder[i]];
@@ -2302,14 +2303,14 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                                 .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG)
                                 .build();
 
-                        if(detachedInputs){
+                        if (detachedInputs) {
                             //Sometimes (like: external errors use cases) we don't want the activations/inputs to be
                             // in a workspace
                             workspaceMgr.setScopedOutFor(ArrayType.INPUT);
                             workspaceMgr.setScopedOutFor(ArrayType.ACTIVATIONS);
                         } else {
                             //Don't leverage out of async MultiDataSetIterator workspaces
-                            if(features[0].isAttached()){
+                            if (features[0].isAttached()) {
                                 workspaceMgr.setNoLeverageOverride(features[0].data().getParentWorkspace().getId());
                             }
                         }
@@ -2326,7 +2327,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                 if (ArrayUtils.contains(layerIndexes, vIdx)) {
                     isRequiredOutput = true;
 
-                    if(outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)){
+                    if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) {
                         //Place activations in user-specified workspace
                         origWSAct = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATIONS);
                         origWSActConf = workspaceMgr.getConfiguration(ArrayType.ACTIVATIONS);
@@ -2345,7 +2346,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                 //Open the relevant workspace for the activations.
                 //Note that this will be closed only once the current vertex's activations have been consumed
                 MemoryWorkspace wsActivations = null;
-                if(outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || !isRequiredOutput ){    //Open WS if (a) no external/output WS (if present, it's already open), or (b) not being placed in external/output WS
+                if (outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || !isRequiredOutput) {    //Open WS if (a) no external/output WS (if present, it's already open), or (b) not being placed in external/output WS
                     wsActivations = workspaceMgr.notifyScopeEntered(ArrayType.ACTIVATIONS);
                     openActivationsWorkspaces.put(wsActivations, workspaceMgr);
                 }
@@ -2353,11 +2354,11 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                 //Note that because we're opening activation workspaces not in any defined order (i.e., workspace
                 // use isn't simply nested), we'll manually override the previous workspace setting. Otherwise, when we
                 // close these workspaces, the "current" workspace may be set to the incorrect one
-                if(wsActivations != null )
+                if (wsActivations != null)
                     wsActivations.setPreviousWorkspace(initialWorkspace);
 
                 int closeableAt = vertexOutputsFullyConsumedByStep[vIdx];
-                if(outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || (wsActivations != null && !outputWorkspace.getId().equals(wsActivations.getId()))) {
+                if (outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || (wsActivations != null && !outputWorkspace.getId().equals(wsActivations.getId()))) {
                     if (closeAtEndIteraton[closeableAt] == null) {
                         closeAtEndIteraton[closeableAt] = new ArrayList<>();
                     }
@@ -2373,18 +2374,18 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                         out = features[vIdx];
                     } else {
 
-                        if(fwdPassType == FwdPassType.STANDARD){
+                        if (fwdPassType == FwdPassType.STANDARD) {
                             //Standard feed-forward case
                             out = current.doForward(train, workspaceMgr);
-                        } else if(fwdPassType == FwdPassType.RNN_TIMESTEP){
+                        } else if (fwdPassType == FwdPassType.RNN_TIMESTEP) {
                             if (current.hasLayer()) {
                                 //Layer
                                 INDArray input = current.getInputs()[0];
                                 Layer l = current.getLayer();
                                 if (l instanceof RecurrentLayer) {
                                     out = ((RecurrentLayer) l).rnnTimeStep(reshapeTimeStepInput(input), workspaceMgr);
-                                } else if(l instanceof org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer && ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer)l).getUnderlying() instanceof RecurrentLayer){
-                                    RecurrentLayer rl = ((RecurrentLayer) ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer)l).getUnderlying());
+                                } else if (l instanceof org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer && ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) l).getUnderlying() instanceof RecurrentLayer) {
+                                    RecurrentLayer rl = ((RecurrentLayer) ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) l).getUnderlying());
                                     out = rl.rnnTimeStep(reshapeTimeStepInput(input), workspaceMgr);
                                 } else if (l instanceof MultiLayerNetwork) {
                                     out = ((MultiLayerNetwork) l).rnnTimeStep(reshapeTimeStepInput(input));
@@ -2402,7 +2403,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                         validateArrayWorkspaces(workspaceMgr, out, ArrayType.ACTIVATIONS, vName, false, "Feed forward (inference)");
                     }
 
-                    if(inputsTo != null) {  //Output vertices may not input to any other vertices
+                    if (inputsTo != null) {  //Output vertices may not input to any other vertices
                         for (VertexIndices v : inputsTo) {
                             //Note that we don't have to do anything special here: the activations are always detached in
                             // this method
@@ -2412,13 +2413,13 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                         }
                     }
 
-                    if(clearLayerInputs) {
+                    if (clearLayerInputs) {
                         current.clear();
                     }
 
-                    if(isRequiredOutput){
+                    if (isRequiredOutput) {
                         outputs[ArrayUtils.indexOf(layerIndexes, vIdx)] = out;
-                        if(origWSAct != null){
+                        if (origWSAct != null) {
                             //Reset the configuration, as we may reuse this workspace manager...
                             workspaceMgr.setWorkspace(ArrayType.ACTIVATIONS, origWSAct, origWSActConf);
                         }
@@ -2428,14 +2429,16 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                 //Close any activations workspaces that we no longer require
                 //Note that activations workspaces can be closed only once the corresponding output activations have
                 // been fully consumed
-                if(closeAtEndIteraton[i] != null){
-                    for(MemoryWorkspace wsAct : closeAtEndIteraton[i]){
+                if (closeAtEndIteraton[i] != null) {
+                    for (MemoryWorkspace wsAct : closeAtEndIteraton[i]) {
                         wsAct.close();
                         LayerWorkspaceMgr canNowReuse = openActivationsWorkspaces.remove(wsAct);
                         freeWorkspaceManagers.add(canNowReuse);
                     }
                 }
             }
+        } catch (Throwable t2){
+            t = t2;
         } finally {
             //Close all open workspaces... usually this list will be empty, but not if an exception is thrown
             //Though if stopIndex < numLayers, some might still be open
@@ -2444,7 +2447,15 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                     //Edge case here: seems that scoping out can increase the tagScope of the current WS
                     //and if we hit an exception during forward pass, we aren't guaranteed to call close a sufficient
                     // number of times to actually close it, in all cases
-                    ws.close();
+                    try{
+                        ws.close();
+                    } catch (Throwable t2){
+                        if(t != null){
+                            log.error("Encountered second exception while trying to close workspace after initial exception");
+                            log.error("Original exception:", t);
+                            throw t2;
+                        }
+                    }
                 }
             }
             Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace);
@@ -2581,28 +2592,29 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
 
         boolean traceLog = log.isTraceEnabled();
 
-        try{
-            for(int i=topologicalOrder.length-1; i>= 0; i--){
+        Throwable t = null;
+        try {
+            for (int i = topologicalOrder.length - 1; i >= 0; i--) {
                 boolean hitFrozen = false;
                 GraphVertex current = vertices[topologicalOrder[i]];
                 int vIdx = current.getVertexIndex();
                 String vertexName = current.getVertexName();
 
-                if(traceLog){
+                if (traceLog) {
                     log.trace("About backprop: {} (\"{}\") - {}", i, vertexName, current.getClass().getSimpleName());
                 }
 
                 //FIXME: make the frozen vertex feature extraction more flexible
-                if (current.hasLayer() && current.getLayer() instanceof FrozenLayer || current instanceof FrozenVertex){
+                if (current.hasLayer() && current.getLayer() instanceof FrozenLayer || current instanceof FrozenVertex) {
                     hitFrozen = true;
                 }
 
-                if (current.isInputVertex() || hitFrozen){
+                if (current.isInputVertex() || hitFrozen) {
                     //Close any activation gradient workspaces that we no longer require
                     //Note that activation gradient workspaces can be closed only once the corresponding activations
                     // gradients have been fully consumed
-                    if(closeAtEndIteraton[i] != null){
-                        for(MemoryWorkspace wsAct : closeAtEndIteraton[i]){
+                    if (closeAtEndIteraton[i] != null) {
+                        for (MemoryWorkspace wsAct : closeAtEndIteraton[i]) {
                             wsAct.close();
                             LayerWorkspaceMgr canNowReuse = openActivationsWorkspaces.remove(wsAct);
                             freeWorkspaceManagers.add(canNowReuse);
@@ -2680,7 +2692,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                 wsActivationGrads.setPreviousWorkspace(initialWorkspace);
 
                 int closeableAt = vertexActGradsFullyConsumedByStep[vIdx];
-                if(closeableAt >= 0) {
+                if (closeableAt >= 0) {
                     if (closeAtEndIteraton[closeableAt] == null) {
                         closeAtEndIteraton[closeableAt] = new ArrayList<>();
                     }
@@ -2689,14 +2701,14 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
 
                 Pair<Gradient, INDArray[]> pair;
                 INDArray[] epsilons;
-                try(MemoryWorkspace wsWorkingMem = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)){
+                try (MemoryWorkspace wsWorkingMem = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)) {
                     pair = current.doBackward(truncatedBPTT, workspaceMgr);
                     epsilons = pair.getSecond();
 
                     //Validate workspace location for the activation gradients:
                     //validateArrayWorkspaces(LayerWorkspaceMgr mgr, INDArray array, ArrayType arrayType, String vertexName, boolean isInputVertex, String op){
                     for (INDArray epsilon : epsilons) {
-                        if(epsilon != null) {
+                        if (epsilon != null) {
                             //May be null for EmbeddingLayer, etc
                             validateArrayWorkspaces(workspaceMgr, epsilon, ArrayType.ACTIVATION_GRAD, vertexName, false, "Backprop");
                         }
@@ -2732,15 +2744,15 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                         tempList.addFirst(new Triple<>(newName, entry.getValue(),
                                 g.flatteningOrderForVariable(origName)));
                     }
-                    for (Triple<String, INDArray, Character> t : tempList)
-                        gradients.addFirst(t);
+                    for (Triple<String, INDArray, Character> triple : tempList)
+                        gradients.addFirst(triple);
                 }
 
                 //Close any activation gradient workspaces that we no longer require
                 //Note that activation gradient workspaces can be closed only once the corresponding activations
                 // gradients have been fully consumed
-                if(closeAtEndIteraton[i] != null){
-                    for(MemoryWorkspace wsAct : closeAtEndIteraton[i]){
+                if (closeAtEndIteraton[i] != null) {
+                    for (MemoryWorkspace wsAct : closeAtEndIteraton[i]) {
                         wsAct.close();
                         LayerWorkspaceMgr canNowReuse = openActivationsWorkspaces.remove(wsAct);
                         freeWorkspaceManagers.add(canNowReuse);
@@ -2748,23 +2760,32 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                     closeAtEndIteraton[i] = null;
                 }
 
-                if(traceLog){
+                if (traceLog) {
                     log.trace("Completed backprop: {} (\"{}\") - {}", i, vertexName, current.getClass().getSimpleName());
                 }
             }
-
+        } catch (Throwable t2){
+            t = t2;
         } finally {
             //Close all open workspaces... usually this list will be empty, but not if an exception is thrown
             for(MemoryWorkspace ws : openActivationsWorkspaces.keySet()){
-                ws.close();
+                try{
+                    ws.close();
+                } catch (Throwable t2){
+                    if(t != null){
+                        log.error("Encountered second exception while trying to close workspace after initial exception");
+                        log.error("Original exception:", t);
+                        throw t2;
+                    }
+                }
             }
             Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace);
         }
 
         //Now, add the gradients in the order we need them in for flattening (same as params order)
         Gradient gradient = new DefaultGradient(flattenedGradients);
-        for (Triple<String, INDArray, Character> t : gradients) {
-            gradient.setGradientFor(t.getFirst(), t.getSecond(), t.getThird());
+        for (Triple<String, INDArray, Character> tr : gradients) {
+            gradient.setGradientFor(tr.getFirst(), tr.getSecond(), tr.getThird());
         }
 
         this.gradient = gradient;
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
index 731ca398b..dd495a620 100755
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
@@ -1242,17 +1242,18 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
 
         boolean traceLog = log.isTraceEnabled();
 
+        Throwable t = null;
         try {
             for (int i = 0; i <= layerIndex; i++) {
                 LayerWorkspaceMgr mgr = (i % 2 == 0 ? mgrEven : mgrOdd);
 
-                if(traceLog){
+                if (traceLog) {
                     log.trace("About to forward pass: {} - {}", i, layers[i].getClass().getSimpleName());
                 }
 
                 //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet)
                 //Hence: put inputs in working memory
-                if(i == 0 && wsm != WorkspaceMode.NONE){
+                if (i == 0 && wsm != WorkspaceMode.NONE) {
                     mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG);
                 }
 
@@ -1268,7 +1269,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
                     temp.setPreviousWorkspace(initialWorkspace);
 
 
-                    if(i == 0 && input.isAttached()){
+                    if (i == 0 && input.isAttached()) {
                         //Don't leverage out of async DataSetIterator workspaces
                         mgr.setNoLeverageOverride(input.data().getParentWorkspace().getId());
                     }
@@ -1279,8 +1280,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
                         validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, true, "Output of layer (inference)");
                     }
 
-                    if ( i == layerIndex ) {
-                        if(outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)){
+                    if (i == layerIndex) {
+                        if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) {
                             //Place activations in user-specified workspace
                             mgr.setWorkspace(ArrayType.ACTIVATIONS, outputWorkspace.getId(), outputWorkspace.getWorkspaceConfiguration());
                         } else {
@@ -1289,15 +1290,15 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
                         }
                     }
 
-                    if(fwdPassType == FwdPassType.STANDARD){
+                    if (fwdPassType == FwdPassType.STANDARD) {
                         //Standard feed-forward case
                         input = layers[i].activate(input, train, mgr);
-                    } else if(fwdPassType == FwdPassType.RNN_TIMESTEP){
+                    } else if (fwdPassType == FwdPassType.RNN_TIMESTEP) {
                         //rnnTimeStep case
                         if (layers[i] instanceof RecurrentLayer) {
                             input = ((RecurrentLayer) layers[i]).rnnTimeStep(reshapeTimeStepInput(input), mgr);
-                        } else if(layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer)layers[i]).getUnderlying() instanceof RecurrentLayer){
-                            RecurrentLayer rl = ((RecurrentLayer) ((BaseWrapperLayer)layers[i]).getUnderlying());
+                        } else if (layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer) layers[i]).getUnderlying() instanceof RecurrentLayer) {
+                            RecurrentLayer rl = ((RecurrentLayer) ((BaseWrapperLayer) layers[i]).getUnderlying());
                             input = rl.rnnTimeStep(reshapeTimeStepInput(input), mgr);
                         } else if (layers[i] instanceof MultiLayerNetwork) {
                             input = ((MultiLayerNetwork) layers[i]).rnnTimeStep(reshapeTimeStepInput(input));
@@ -1311,34 +1312,51 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
                     //Validation: Exception if invalid (bad layer implementation)
                     validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, false, "Output of layer (inference)");
 
-                    if(wsActCloseNext != null){
+                    if (wsActCloseNext != null) {
                         wsActCloseNext.close();
                     }
                     wsActCloseNext = temp;
                     temp = null;
                 }
 
-                if(traceLog){
+                if (traceLog) {
                     log.trace("Completed forward pass: {} - {}", i, layers[i].getClass().getSimpleName());
                 }
 
                 //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet)
                 //Hence: put inputs in working memory -> set back to default for next use of workspace mgr
-                if(i == 0 && wsm != WorkspaceMode.NONE){
+                if (i == 0 && wsm != WorkspaceMode.NONE) {
                     mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG);            //Inputs should always be in the previous WS
                 }
             }
-
+        } catch (Throwable t2){
+            t = t2;
         } finally {
             if(wsActCloseNext != null){
-                wsActCloseNext.close();
+                try {
+                    wsActCloseNext.close();
+                } catch (Throwable t2){
+                    if(t != null){
+                        log.error("Encountered second exception while trying to close workspace after initial exception");
+                        log.error("Original exception:", t);
+                        throw t2;
+                    }
+                }
             }
             if(temp != null){
                 //Should only be non-null on exception
                 while(temp.isScopeActive()){
                     //For safety, should never occur in theory: a single close() call may not be sufficient, if
                     // workspace scope was borrowed and not properly closed when exception occurred
-                    temp.close();
+                    try{
+                        temp.close();
+                    } catch (Throwable t2){
+                        if(t != null){
+                            log.error("Encountered second exception while trying to close workspace after initial exception");
+                            log.error("Original exception:", t);
+                            throw t2;
+                        }
+                    }
                 }
             }
 
@@ -1871,13 +1889,14 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
 
         boolean traceLog = log.isTraceEnabled();
 
+        Throwable t = null;
         try {
             for (int i = layers.length - 1; i >= 0; i--) {
                 if (layers[i] instanceof FrozenLayer) {
                     break;
                 }
 
-                if(traceLog){
+                if (traceLog) {
                     log.trace("About to backprop: {} - {}", i, layers[i].getClass().getSimpleName());
                 }
 
@@ -1897,7 +1916,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
 
                 //Open activation gradients WS *then* BP working memory, so BP working memory is opened last for use in layers
                 wsActGradTemp = workspaceMgr.notifyScopeEntered(ArrayType.ACTIVATION_GRAD);
-                try(MemoryWorkspace wsBPWorking = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)){
+                try (MemoryWorkspace wsBPWorking = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)) {
 
                     //Note that because we're opening activation workspaces not in a simple nested order, we'll manually
                     // override the previous workspace setting. Otherwise, when we close these workspaces, the "current"
@@ -1907,7 +1926,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
 
                     INDArray eps = (i == layers.length - 1 ? epsilon : currPair.getRight());  //eps is null for OutputLayer
 
-                    if(!tbptt){
+                    if (!tbptt) {
                         //Standard case
                         currPair = layers[i].backpropGradient(eps, workspaceMgr);
                     } else {
@@ -1920,7 +1939,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
                         }
                     }
 
-                    if(currPair.getSecond() != null) {
+                    if (currPair.getSecond() != null) {
                         //Edge case: may be null for Embedding layer, for example
                         validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, i,
                                 false, "Backprop");
@@ -1936,38 +1955,56 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
                         currPair = new Pair<>(currPair.getFirst(),
                                 this.layerWiseConfigurations.getInputPreProcess(i)
                                         .backprop(currPair.getSecond(), getInputMiniBatchSize(), workspaceMgr));
-                        if (i > 0 && currPair.getSecond() != null){
+                        if (i > 0 && currPair.getSecond() != null) {
                             validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, i,
                                     true, "Backprop");
                         }
                     }
 
-                    if(i == 0 ){
-                        if(returnInputActGrad && currPair.getSecond() != null){
+                    if (i == 0) {
+                        if (returnInputActGrad && currPair.getSecond() != null) {
                             currPair.setSecond(currPair.getSecond().detach());
                         } else {
                             currPair.setSecond(null);
                         }
                     }
 
-                    if(wsActGradCloseNext != null){
+                    if (wsActGradCloseNext != null) {
                         wsActGradCloseNext.close();
                     }
                     wsActGradCloseNext = wsActGradTemp;
                     wsActGradTemp = null;
                 }
 
-                if(traceLog){
+                if (traceLog) {
                     log.trace("Completed backprop: {} - {}", i, layers[i].getClass().getSimpleName());
                 }
             }
+        } catch (Throwable thr ){
+            t = thr;
         } finally {
             if(wsActGradCloseNext != null){
-                wsActGradCloseNext.close();
+                try {
+                    wsActGradCloseNext.close();
+                } catch (Throwable t2){
+                    if(t != null){
+                        log.error("Encountered second exception while trying to close workspace after initial exception");
+                        log.error("Original exception:", t);
+                        throw t2;
+                    }
+                }
             }
-            if(wsActGradTemp != null){
+            if(wsActGradTemp != null) {
                 //Should only be non-null on exception
-                wsActGradTemp.close();
+                try {
+                    wsActGradTemp.close();
+                } catch (Throwable t2) {
+                    if (t != null) {
+                        log.error("Encountered second exception while trying to close workspace after initial exception");
+                        log.error("Original exception:", t);
+                        throw t2;
+                    }
+                }
             }
             Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace);
         }