diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index 99f8aeff0..00c0cf7d6 100755 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -2278,6 +2278,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { LayerWorkspaceMgr allNone = noWS ? LayerWorkspaceMgr.noWorkspaces(helperWorkspaces) : null; List[] closeAtEndIteraton = (List[])new List[topologicalOrder.length]; MemoryWorkspace initialWorkspace = Nd4j.getMemoryManager().getCurrentWorkspace(); + Throwable t = null; try { for (int i = 0; i <= stopIndex; i++) { GraphVertex current = vertices[topologicalOrder[i]]; @@ -2302,14 +2303,14 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) .build(); - if(detachedInputs){ + if (detachedInputs) { //Sometimes (like: external errors use cases) we don't want the activations/inputs to be // in a workspace workspaceMgr.setScopedOutFor(ArrayType.INPUT); workspaceMgr.setScopedOutFor(ArrayType.ACTIVATIONS); } else { //Don't leverage out of async MultiDataSetIterator workspaces - if(features[0].isAttached()){ + if (features[0].isAttached()) { workspaceMgr.setNoLeverageOverride(features[0].data().getParentWorkspace().getId()); } } @@ -2326,7 +2327,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { if (ArrayUtils.contains(layerIndexes, vIdx)) { isRequiredOutput = true; - if(outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)){ + if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) { //Place activations in user-specified workspace origWSAct = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATIONS); origWSActConf = workspaceMgr.getConfiguration(ArrayType.ACTIVATIONS); @@ -2345,7 +2346,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { //Open the relevant workspace for the activations. //Note that this will be closed only once the current vertex's activations have been consumed MemoryWorkspace wsActivations = null; - if(outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || !isRequiredOutput ){ //Open WS if (a) no external/output WS (if present, it's already open), or (b) not being placed in external/output WS + if (outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || !isRequiredOutput) { //Open WS if (a) no external/output WS (if present, it's already open), or (b) not being placed in external/output WS wsActivations = workspaceMgr.notifyScopeEntered(ArrayType.ACTIVATIONS); openActivationsWorkspaces.put(wsActivations, workspaceMgr); } @@ -2353,11 +2354,11 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { //Note that because we're opening activation workspaces not in any defined order (i.e., workspace // use isn't simply nested), we'll manually override the previous workspace setting. Otherwise, when we // close these workspaces, the "current" workspace may be set to the incorrect one - if(wsActivations != null ) + if (wsActivations != null) wsActivations.setPreviousWorkspace(initialWorkspace); int closeableAt = vertexOutputsFullyConsumedByStep[vIdx]; - if(outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || (wsActivations != null && !outputWorkspace.getId().equals(wsActivations.getId()))) { + if (outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || (wsActivations != null && !outputWorkspace.getId().equals(wsActivations.getId()))) { if (closeAtEndIteraton[closeableAt] == null) { closeAtEndIteraton[closeableAt] = new ArrayList<>(); } @@ -2373,18 +2374,18 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { out = features[vIdx]; } else { - if(fwdPassType == FwdPassType.STANDARD){ + if (fwdPassType == FwdPassType.STANDARD) { //Standard feed-forward case out = current.doForward(train, workspaceMgr); - } else if(fwdPassType == FwdPassType.RNN_TIMESTEP){ + } else if (fwdPassType == FwdPassType.RNN_TIMESTEP) { if (current.hasLayer()) { //Layer INDArray input = current.getInputs()[0]; Layer l = current.getLayer(); if (l instanceof RecurrentLayer) { out = ((RecurrentLayer) l).rnnTimeStep(reshapeTimeStepInput(input), workspaceMgr); - } else if(l instanceof org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer && ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer)l).getUnderlying() instanceof RecurrentLayer){ - RecurrentLayer rl = ((RecurrentLayer) ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer)l).getUnderlying()); + } else if (l instanceof org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer && ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) l).getUnderlying() instanceof RecurrentLayer) { + RecurrentLayer rl = ((RecurrentLayer) ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) l).getUnderlying()); out = rl.rnnTimeStep(reshapeTimeStepInput(input), workspaceMgr); } else if (l instanceof MultiLayerNetwork) { out = ((MultiLayerNetwork) l).rnnTimeStep(reshapeTimeStepInput(input)); @@ -2402,7 +2403,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { validateArrayWorkspaces(workspaceMgr, out, ArrayType.ACTIVATIONS, vName, false, "Feed forward (inference)"); } - if(inputsTo != null) { //Output vertices may not input to any other vertices + if (inputsTo != null) { //Output vertices may not input to any other vertices for (VertexIndices v : inputsTo) { //Note that we don't have to do anything special here: the activations are always detached in // this method @@ -2412,13 +2413,13 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } } - if(clearLayerInputs) { + if (clearLayerInputs) { current.clear(); } - if(isRequiredOutput){ + if (isRequiredOutput) { outputs[ArrayUtils.indexOf(layerIndexes, vIdx)] = out; - if(origWSAct != null){ + if (origWSAct != null) { //Reset the configuration, as we may reuse this workspace manager... workspaceMgr.setWorkspace(ArrayType.ACTIVATIONS, origWSAct, origWSActConf); } @@ -2428,14 +2429,16 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { //Close any activations workspaces that we no longer require //Note that activations workspaces can be closed only once the corresponding output activations have // been fully consumed - if(closeAtEndIteraton[i] != null){ - for(MemoryWorkspace wsAct : closeAtEndIteraton[i]){ + if (closeAtEndIteraton[i] != null) { + for (MemoryWorkspace wsAct : closeAtEndIteraton[i]) { wsAct.close(); LayerWorkspaceMgr canNowReuse = openActivationsWorkspaces.remove(wsAct); freeWorkspaceManagers.add(canNowReuse); } } } + } catch (Throwable t2){ + t = t2; } finally { //Close all open workspaces... usually this list will be empty, but not if an exception is thrown //Though if stopIndex < numLayers, some might still be open @@ -2444,7 +2447,15 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { //Edge case here: seems that scoping out can increase the tagScope of the current WS //and if we hit an exception during forward pass, we aren't guaranteed to call close a sufficient // number of times to actually close it, in all cases - ws.close(); + try{ + ws.close(); + } catch (Throwable t2){ + if(t != null){ + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } } Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace); @@ -2581,28 +2592,29 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { boolean traceLog = log.isTraceEnabled(); - try{ - for(int i=topologicalOrder.length-1; i>= 0; i--){ + Throwable t = null; + try { + for (int i = topologicalOrder.length - 1; i >= 0; i--) { boolean hitFrozen = false; GraphVertex current = vertices[topologicalOrder[i]]; int vIdx = current.getVertexIndex(); String vertexName = current.getVertexName(); - if(traceLog){ + if (traceLog) { log.trace("About backprop: {} (\"{}\") - {}", i, vertexName, current.getClass().getSimpleName()); } //FIXME: make the frozen vertex feature extraction more flexible - if (current.hasLayer() && current.getLayer() instanceof FrozenLayer || current instanceof FrozenVertex){ + if (current.hasLayer() && current.getLayer() instanceof FrozenLayer || current instanceof FrozenVertex) { hitFrozen = true; } - if (current.isInputVertex() || hitFrozen){ + if (current.isInputVertex() || hitFrozen) { //Close any activation gradient workspaces that we no longer require //Note that activation gradient workspaces can be closed only once the corresponding activations // gradients have been fully consumed - if(closeAtEndIteraton[i] != null){ - for(MemoryWorkspace wsAct : closeAtEndIteraton[i]){ + if (closeAtEndIteraton[i] != null) { + for (MemoryWorkspace wsAct : closeAtEndIteraton[i]) { wsAct.close(); LayerWorkspaceMgr canNowReuse = openActivationsWorkspaces.remove(wsAct); freeWorkspaceManagers.add(canNowReuse); @@ -2680,7 +2692,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { wsActivationGrads.setPreviousWorkspace(initialWorkspace); int closeableAt = vertexActGradsFullyConsumedByStep[vIdx]; - if(closeableAt >= 0) { + if (closeableAt >= 0) { if (closeAtEndIteraton[closeableAt] == null) { closeAtEndIteraton[closeableAt] = new ArrayList<>(); } @@ -2689,14 +2701,14 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { Pair pair; INDArray[] epsilons; - try(MemoryWorkspace wsWorkingMem = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)){ + try (MemoryWorkspace wsWorkingMem = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)) { pair = current.doBackward(truncatedBPTT, workspaceMgr); epsilons = pair.getSecond(); //Validate workspace location for the activation gradients: //validateArrayWorkspaces(LayerWorkspaceMgr mgr, INDArray array, ArrayType arrayType, String vertexName, boolean isInputVertex, String op){ for (INDArray epsilon : epsilons) { - if(epsilon != null) { + if (epsilon != null) { //May be null for EmbeddingLayer, etc validateArrayWorkspaces(workspaceMgr, epsilon, ArrayType.ACTIVATION_GRAD, vertexName, false, "Backprop"); } @@ -2732,15 +2744,15 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { tempList.addFirst(new Triple<>(newName, entry.getValue(), g.flatteningOrderForVariable(origName))); } - for (Triple t : tempList) - gradients.addFirst(t); + for (Triple triple : tempList) + gradients.addFirst(triple); } //Close any activation gradient workspaces that we no longer require //Note that activation gradient workspaces can be closed only once the corresponding activations // gradients have been fully consumed - if(closeAtEndIteraton[i] != null){ - for(MemoryWorkspace wsAct : closeAtEndIteraton[i]){ + if (closeAtEndIteraton[i] != null) { + for (MemoryWorkspace wsAct : closeAtEndIteraton[i]) { wsAct.close(); LayerWorkspaceMgr canNowReuse = openActivationsWorkspaces.remove(wsAct); freeWorkspaceManagers.add(canNowReuse); @@ -2748,23 +2760,32 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { closeAtEndIteraton[i] = null; } - if(traceLog){ + if (traceLog) { log.trace("Completed backprop: {} (\"{}\") - {}", i, vertexName, current.getClass().getSimpleName()); } } - + } catch (Throwable t2){ + t = t2; } finally { //Close all open workspaces... usually this list will be empty, but not if an exception is thrown for(MemoryWorkspace ws : openActivationsWorkspaces.keySet()){ - ws.close(); + try{ + ws.close(); + } catch (Throwable t2){ + if(t != null){ + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace); } //Now, add the gradients in the order we need them in for flattening (same as params order) Gradient gradient = new DefaultGradient(flattenedGradients); - for (Triple t : gradients) { - gradient.setGradientFor(t.getFirst(), t.getSecond(), t.getThird()); + for (Triple tr : gradients) { + gradient.setGradientFor(tr.getFirst(), tr.getSecond(), tr.getThird()); } this.gradient = gradient; diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 731ca398b..dd495a620 100755 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -1242,17 +1242,18 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura boolean traceLog = log.isTraceEnabled(); + Throwable t = null; try { for (int i = 0; i <= layerIndex; i++) { LayerWorkspaceMgr mgr = (i % 2 == 0 ? mgrEven : mgrOdd); - if(traceLog){ + if (traceLog) { log.trace("About to forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); } //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet) //Hence: put inputs in working memory - if(i == 0 && wsm != WorkspaceMode.NONE){ + if (i == 0 && wsm != WorkspaceMode.NONE) { mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG); } @@ -1268,7 +1269,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura temp.setPreviousWorkspace(initialWorkspace); - if(i == 0 && input.isAttached()){ + if (i == 0 && input.isAttached()) { //Don't leverage out of async DataSetIterator workspaces mgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); } @@ -1279,8 +1280,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, true, "Output of layer (inference)"); } - if ( i == layerIndex ) { - if(outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)){ + if (i == layerIndex) { + if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) { //Place activations in user-specified workspace mgr.setWorkspace(ArrayType.ACTIVATIONS, outputWorkspace.getId(), outputWorkspace.getWorkspaceConfiguration()); } else { @@ -1289,15 +1290,15 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura } } - if(fwdPassType == FwdPassType.STANDARD){ + if (fwdPassType == FwdPassType.STANDARD) { //Standard feed-forward case input = layers[i].activate(input, train, mgr); - } else if(fwdPassType == FwdPassType.RNN_TIMESTEP){ + } else if (fwdPassType == FwdPassType.RNN_TIMESTEP) { //rnnTimeStep case if (layers[i] instanceof RecurrentLayer) { input = ((RecurrentLayer) layers[i]).rnnTimeStep(reshapeTimeStepInput(input), mgr); - } else if(layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer)layers[i]).getUnderlying() instanceof RecurrentLayer){ - RecurrentLayer rl = ((RecurrentLayer) ((BaseWrapperLayer)layers[i]).getUnderlying()); + } else if (layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer) layers[i]).getUnderlying() instanceof RecurrentLayer) { + RecurrentLayer rl = ((RecurrentLayer) ((BaseWrapperLayer) layers[i]).getUnderlying()); input = rl.rnnTimeStep(reshapeTimeStepInput(input), mgr); } else if (layers[i] instanceof MultiLayerNetwork) { input = ((MultiLayerNetwork) layers[i]).rnnTimeStep(reshapeTimeStepInput(input)); @@ -1311,34 +1312,51 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura //Validation: Exception if invalid (bad layer implementation) validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, false, "Output of layer (inference)"); - if(wsActCloseNext != null){ + if (wsActCloseNext != null) { wsActCloseNext.close(); } wsActCloseNext = temp; temp = null; } - if(traceLog){ + if (traceLog) { log.trace("Completed forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); } //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet) //Hence: put inputs in working memory -> set back to default for next use of workspace mgr - if(i == 0 && wsm != WorkspaceMode.NONE){ + if (i == 0 && wsm != WorkspaceMode.NONE) { mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG); //Inputs should always be in the previous WS } } - + } catch (Throwable t2){ + t = t2; } finally { if(wsActCloseNext != null){ - wsActCloseNext.close(); + try { + wsActCloseNext.close(); + } catch (Throwable t2){ + if(t != null){ + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } if(temp != null){ //Should only be non-null on exception while(temp.isScopeActive()){ //For safety, should never occur in theory: a single close() call may not be sufficient, if // workspace scope was borrowed and not properly closed when exception occurred - temp.close(); + try{ + temp.close(); + } catch (Throwable t2){ + if(t != null){ + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } } @@ -1871,13 +1889,14 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura boolean traceLog = log.isTraceEnabled(); + Throwable t = null; try { for (int i = layers.length - 1; i >= 0; i--) { if (layers[i] instanceof FrozenLayer) { break; } - if(traceLog){ + if (traceLog) { log.trace("About to backprop: {} - {}", i, layers[i].getClass().getSimpleName()); } @@ -1897,7 +1916,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura //Open activation gradients WS *then* BP working memory, so BP working memory is opened last for use in layers wsActGradTemp = workspaceMgr.notifyScopeEntered(ArrayType.ACTIVATION_GRAD); - try(MemoryWorkspace wsBPWorking = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)){ + try (MemoryWorkspace wsBPWorking = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)) { //Note that because we're opening activation workspaces not in a simple nested order, we'll manually // override the previous workspace setting. Otherwise, when we close these workspaces, the "current" @@ -1907,7 +1926,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura INDArray eps = (i == layers.length - 1 ? epsilon : currPair.getRight()); //eps is null for OutputLayer - if(!tbptt){ + if (!tbptt) { //Standard case currPair = layers[i].backpropGradient(eps, workspaceMgr); } else { @@ -1920,7 +1939,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura } } - if(currPair.getSecond() != null) { + if (currPair.getSecond() != null) { //Edge case: may be null for Embedding layer, for example validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, i, false, "Backprop"); @@ -1936,38 +1955,56 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura currPair = new Pair<>(currPair.getFirst(), this.layerWiseConfigurations.getInputPreProcess(i) .backprop(currPair.getSecond(), getInputMiniBatchSize(), workspaceMgr)); - if (i > 0 && currPair.getSecond() != null){ + if (i > 0 && currPair.getSecond() != null) { validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, i, true, "Backprop"); } } - if(i == 0 ){ - if(returnInputActGrad && currPair.getSecond() != null){ + if (i == 0) { + if (returnInputActGrad && currPair.getSecond() != null) { currPair.setSecond(currPair.getSecond().detach()); } else { currPair.setSecond(null); } } - if(wsActGradCloseNext != null){ + if (wsActGradCloseNext != null) { wsActGradCloseNext.close(); } wsActGradCloseNext = wsActGradTemp; wsActGradTemp = null; } - if(traceLog){ + if (traceLog) { log.trace("Completed backprop: {} - {}", i, layers[i].getClass().getSimpleName()); } } + } catch (Throwable thr ){ + t = thr; } finally { if(wsActGradCloseNext != null){ - wsActGradCloseNext.close(); + try { + wsActGradCloseNext.close(); + } catch (Throwable t2){ + if(t != null){ + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } - if(wsActGradTemp != null){ + if(wsActGradTemp != null) { //Should only be non-null on exception - wsActGradTemp.close(); + try { + wsActGradTemp.close(); + } catch (Throwable t2) { + if (t != null) { + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace); }