Use DL4J workspaces for SameDiff layers in MLN/CG (#23)

* #8329 DL4J workspace integration for SameDiff layers Signed-off-by: AlexDBlack <blacka101@gmail.com> * Fix bug for Nd4j.createUninitializedDetached for scalars (length 0 shape array) Signed-off-by: AlexDBlack <blacka101@gmail.com> * SameDiff output layer, graph vertex, various fixes Signed-off-by: AlexDBlack <blacka101@gmail.com> * Javadoc Signed-off-by: AlexDBlack <blacka101@gmail.com>
2019-11-02 17:42:01 +11:00 · 2019-11-02 17:42:01 +11:00 · 9efd811508
commit 9efd811508
parent e9a7a13c00
9 changed files with 760 additions and 545 deletions
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/TestBatchNormBp.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/TestBatchNormBp.java
@ -96,8 +96,8 @@ public class TestBatchNormBp {
        bn.setInput(in, LayerWorkspaceMgr.noWorkspaces());
        Pair<Gradient,INDArray> p = net.backpropGradient(eps, LayerWorkspaceMgr.noWorkspaces());

-        h.preOutput(in, true, new int[]{1,3}, gamma, beta, mean, var, 0.5, e, LayerWorkspaceMgr.noWorkspaces());
-        Pair<Gradient,INDArray> pmkl = h.backpropGradient(in, eps, new int[]{1,3}, gamma, beta, dLdg, dLdb, e, LayerWorkspaceMgr.noWorkspaces());
+        h.preOutput(in, true, new long[]{1,3}, gamma, beta, mean, var, 0.5, e, LayerWorkspaceMgr.noWorkspaces());
+        Pair<Gradient,INDArray> pmkl = h.backpropGradient(in, eps, new long[]{1,3}, gamma, beta, dLdg, dLdb, e, LayerWorkspaceMgr.noWorkspaces());

        INDArray dldin_dl4j = p.getSecond();

--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffDense.java
@ -80,6 +80,7 @@ public class TestSameDiffDense extends BaseDL4JTest {

    @Test
    public void testSameDiffDenseForward() {
+        for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
            for (int minibatch : new int[]{5, 1}) {
                int nIn = 3;
                int nOut = 4;
@ -97,8 +98,10 @@ public class TestSameDiffDense extends BaseDL4JTest {
                };

                for (Activation a : afns) {
-                log.info("Starting test - " + a);
+                    log.info("Starting test - " + a + ", workspace = " + wsm);
                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+                            .inferenceWorkspaceMode(wsm)
+                            .trainingWorkspaceMode(wsm)
                            .list()
                            .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut)
                                    .activation(a)
@ -146,9 +149,11 @@ public class TestSameDiffDense extends BaseDL4JTest {
                }
            }
        }
+    }

    @Test
    public void testSameDiffDenseForwardMultiLayer() {
+        for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
            for (int minibatch : new int[]{5, 1}) {
                int nIn = 3;
                int nOut = 4;
@ -166,7 +171,7 @@ public class TestSameDiffDense extends BaseDL4JTest {
                };

                for (Activation a : afns) {
-                log.info("Starting test - " + a);
+                    log.info("Starting test - " + a + " - workspace=" + wsm);
                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                            .seed(12345)
                            .list()
@ -201,7 +206,6 @@ public class TestSameDiffDense extends BaseDL4JTest {
                    MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
                    net2.init();

-//                net.params().assign(net2.params());
                    assertEquals(net2.params(), net.params());

                    //Check params:
@ -231,6 +235,7 @@ public class TestSameDiffDense extends BaseDL4JTest {
                }
            }
        }
+    }

    @Test
    public void testSameDiffDenseBackward() {
@ -244,10 +249,13 @@ public class TestSameDiffDense extends BaseDL4JTest {
                Activation[] afns = new Activation[]{
                        Activation.TANH,
                        Activation.SIGMOID,
-                        Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN,
+                        Activation.ELU,
+                        Activation.IDENTITY,
+                        Activation.SOFTPLUS,
+                        Activation.SOFTSIGN,
                        Activation.HARDTANH,
-                        Activation.CUBE,    //https://github.com/deeplearning4j/nd4j/issues/2426
-                        Activation.RELU      //JVM crash
+                        Activation.CUBE,
+                        Activation.RELU
                };

                for (Activation a : afns) {
@ -337,12 +345,13 @@ public class TestSameDiffDense extends BaseDL4JTest {

        int nIn = 4;
        int nOut = 3;
-        boolean workspaces = true;
+
+        for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .seed(12345)
-                .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE)
-                .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE)
+                    .trainingWorkspaceMode(wsm)
+                    .inferenceWorkspaceMode(wsm)
                    .updater(new Adam(0.1))
                    .list()
                    .layer(new SameDiffDense.Builder().nIn(nIn).nOut(5).activation(Activation.TANH).build())
@ -373,7 +382,7 @@ public class TestSameDiffDense extends BaseDL4JTest {
            assertEquals(netStandard.params(), netSD.params());
            assertEquals(netStandard.paramTable(), netSD.paramTable());

-        DataSetIterator iter = new IrisDataSetIterator(150,150);
+            DataSetIterator iter = new IrisDataSetIterator(150, 150);
            DataSet ds = iter.next();

            INDArray outSD = netSD.output(ds.getFeatures());
@ -381,7 +390,7 @@ public class TestSameDiffDense extends BaseDL4JTest {

            assertEquals(outStd, outSD);

-        for( int i=0; i<3; i++ ){
+            for (int i = 0; i < 3; i++) {
                netSD.fit(ds);
                netStandard.fit(ds);
                String s = String.valueOf(i);
@ -396,13 +405,14 @@ public class TestSameDiffDense extends BaseDL4JTest {
            INDArray outMb = netStandard.output(newIn);
            assertEquals(outMb, outMbsd);
        }
+    }

    @Test
    public void gradientCheck() {
        int nIn = 4;
        int nOut = 4;

-        for (boolean workspaces : new boolean[]{false, true}) {
+        for (boolean workspaces : new boolean[]{true, false}) {
            for (Activation a : new Activation[]{Activation.TANH, Activation.IDENTITY}) {

                String msg = "workspaces: " + workspaces + ", " + a;
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/TestSameDiffLambda.java
@ -21,6 +21,7 @@ import org.deeplearning4j.BaseDL4JTest;
 import org.deeplearning4j.TestUtils;
 import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.WorkspaceMode;
 import org.deeplearning4j.nn.conf.graph.ElementWiseVertex;
 import org.deeplearning4j.nn.conf.graph.ScaleVertex;
 import org.deeplearning4j.nn.conf.graph.ShiftVertex;
@ -52,8 +53,14 @@ public class TestSameDiffLambda extends BaseDL4JTest {

    @Test
    public void testSameDiffLamdaLayerBasic(){
+        for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
+            log.info("--- Workspace Mode: {} ---", wsm);
+
+
            Nd4j.getRandom().setSeed(12345);
            ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
+                    .trainingWorkspaceMode(wsm)
+                    .inferenceWorkspaceMode(wsm)
                    .seed(12345)
                    .updater(new Adam(0.01))
                    .graphBuilder()
@ -67,6 +74,8 @@ public class TestSameDiffLambda extends BaseDL4JTest {

            //Equavalent, not using SameDiff Lambda:
            ComputationGraphConfiguration confStd = new NeuralNetConfiguration.Builder()
+                    .trainingWorkspaceMode(wsm)
+                    .inferenceWorkspaceMode(wsm)
                    .seed(12345)
                    .updater(new Adam(0.01))
                    .graphBuilder()
@ -87,7 +96,7 @@ public class TestSameDiffLambda extends BaseDL4JTest {

            lambda.setParams(std.params());

-        INDArray in = Nd4j.rand(3,5);
+            INDArray in = Nd4j.rand(3, 5);
            INDArray labels = TestUtils.randomOneHot(3, 5);
            DataSet ds = new DataSet(in, labels);

@ -101,7 +110,7 @@ public class TestSameDiffLambda extends BaseDL4JTest {

            assertEquals(scoreStd, scoreLambda, 1e-6);

-        for( int i=0; i<3; i++ ){
+            for (int i = 0; i < 3; i++) {
                lambda.fit(ds);
                std.fit(ds);

@ -122,11 +131,17 @@ public class TestSameDiffLambda extends BaseDL4JTest {
            INDArray outMb = std.output(newIn)[0];
            assertEquals(outMb, outMbsd);
        }
+    }

    @Test
    public void testSameDiffLamdaVertexBasic(){
+        for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
+            log.info("--- Workspace Mode: {} ---", wsm);
+
            Nd4j.getRandom().setSeed(12345);
            ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
+                    .trainingWorkspaceMode(wsm)
+                    .inferenceWorkspaceMode(wsm)
                    .dataType(DataType.DOUBLE)
                    .seed(12345)
                    .updater(new Adam(0.01))
@ -142,6 +157,8 @@ public class TestSameDiffLambda extends BaseDL4JTest {

            //Equavalent, not using SameDiff Lambda:
            ComputationGraphConfiguration confStd = new NeuralNetConfiguration.Builder()
+                    .trainingWorkspaceMode(wsm)
+                    .inferenceWorkspaceMode(wsm)
                    .dataType(DataType.DOUBLE)
                    .seed(12345)
                    .updater(new Adam(0.01))
@ -163,8 +180,8 @@ public class TestSameDiffLambda extends BaseDL4JTest {

            lambda.setParams(std.params());

-        INDArray in1 = Nd4j.rand(3,5);
-        INDArray in2 = Nd4j.rand(3,5);
+            INDArray in1 = Nd4j.rand(3, 5);
+            INDArray in2 = Nd4j.rand(3, 5);
            INDArray labels = TestUtils.randomOneHot(3, 5);
            MultiDataSet mds = new org.nd4j.linalg.dataset.MultiDataSet(new INDArray[]{in1, in2}, new INDArray[]{labels});

@ -178,7 +195,7 @@ public class TestSameDiffLambda extends BaseDL4JTest {

            assertEquals(scoreStd, scoreLambda, 1e-6);

-        for( int i=0; i<3; i++ ){
+            for (int i = 0; i < 3; i++) {
                lambda.fit(mds);
                std.fit(mds);

@ -200,4 +217,5 @@ public class TestSameDiffLambda extends BaseDL4JTest {
            INDArray outMb = std.output(newIn1, newIn2)[0];
            assertEquals(outMb, outMbsd);
        }
+    }
 }
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/DL4JSameDiffMemoryMgr.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/DL4JSameDiffMemoryMgr.java
@ -0,0 +1,68 @@
+package org.deeplearning4j.nn.layers.samediff;
+
+import org.nd4j.autodiff.samediff.internal.memory.AbstractMemoryMgr;
+import org.nd4j.base.Preconditions;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.memory.MemoryWorkspace;
+import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.shape.LongShapeDescriptor;
+import org.nd4j.linalg.factory.Nd4j;
+
+/**
+ * A SameDiff {@link org.nd4j.autodiff.samediff.internal.SessionMemMgr} that uses DL4J workspaces for memory management.
+ * Any op outputs are allocated in the output workspace if they are returned to the layer; otherwise they are placed in
+ * the DL4J working memory workspace
+ *
+ * @author Alex Black
+ */
+public class DL4JSameDiffMemoryMgr extends AbstractMemoryMgr {
+
+    private final String workingMemoryWs;
+    private final String outputWs;
+    private final WorkspaceConfiguration confWorking;
+    private final WorkspaceConfiguration confOutput;
+
+    //Note: if the working memory or output workspace names are null -> detached memory
+    public DL4JSameDiffMemoryMgr(String workingMemoryWs, String outputWs, WorkspaceConfiguration confWorking,
+                                 WorkspaceConfiguration confOutput){
+        this.workingMemoryWs = workingMemoryWs;
+        this.outputWs = outputWs;
+        this.confWorking = confWorking;
+        this.confOutput = confOutput;
+    }
+
+
+    @Override
+    public INDArray allocate(boolean detached, DataType dataType, long... shape) {
+        String wsName = detached ? outputWs : workingMemoryWs;
+        WorkspaceConfiguration wsConf = detached ? confOutput : confWorking;
+
+        if(wsName == null){
+            //Scoped out
+            INDArray ret = Nd4j.createUninitializedDetached(dataType, shape);
+            Preconditions.checkState(!ret.isAttached(), "Returned array should be detached");
+            return ret;
+        } else {
+            MemoryWorkspace ws = Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(wsConf, wsName);
+            try (MemoryWorkspace mw = ws.notifyScopeBorrowed()) {
+                return Nd4j.createUninitialized(dataType, shape);
+            }
+        }
+    }
+
+    @Override
+    public INDArray allocate(boolean detached, LongShapeDescriptor descriptor) {
+        return allocate(detached, descriptor.dataType(), descriptor.getShape());
+    }
+
+    @Override
+    public void release(INDArray array) {
+        //No-op - DL4J workspaces handles this
+    }
+
+    @Override
+    public void close() {
+        //No-op - DL4J workspaces handles this
+    }
+}
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffGraphVertex.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffGraphVertex.java
@ -31,9 +31,12 @@ import org.deeplearning4j.nn.workspace.ArrayType;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.nd4j.autodiff.samediff.SDVariable;
 import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.autodiff.samediff.internal.InferenceSession;
+import org.nd4j.autodiff.samediff.internal.SessionMemMgr;
 import org.nd4j.base.Preconditions;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.memory.MemoryWorkspace;
+import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
 import org.nd4j.linalg.factory.Nd4j;
@ -95,9 +98,10 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
    @Override
    public INDArray doForward(boolean training, LayerWorkspaceMgr workspaceMgr) {
        try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
-            if(sameDiff == null){
+            if (sameDiff == null) {
                doInit();
            }
+        }

        Map<String,INDArray> phMap = new HashMap<>();
        config.validateInput(inputs);
@ -112,6 +116,25 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
            }
        }

+
+        //Configure memory management for SameDiff instance - use DL4J workspaces
+        String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.FF_WORKING_MEM);
+        String wsNameOutput = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATIONS);
+        WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.FF_WORKING_MEM);
+        WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATIONS);
+        boolean actScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATIONS);
+        Preconditions.checkState(actScopedOut || wsNameOutput != null, "Activations must have a workspace or must be scoped out");
+        SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameOutput, confWorking, confOutput);
+
+        InferenceSession is = sameDiff.getSessions().get(Thread.currentThread().getId());
+        if(is == null){
+            is = new InferenceSession(sameDiff);
+            sameDiff.getSessions().put(Thread.currentThread().getId(), is);
+        }
+        is.setMmgr(mmgr);
+
+
+
        if(paramTable != null && paramTable.size() > 0) {
            //Because DL4J parameters are views, and SameDiff uses DeviceLocal (which doesn't support views), we need to update the arrays on each iteration
            //TODO Find a more efficient solution for this
@ -122,23 +145,29 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
        }
        INDArray result = sameDiff.outputSingle(phMap, outputKey);

+        //Edge case: "vertex" is just an identity activation, for example
+        //TODO there may be a cleaner way to do this...
+        if(!actScopedOut && !result.data().getParentWorkspace().getId().equals(wsNameOutput)){
+            result = workspaceMgr.dup(ArrayType.ACTIVATIONS, result);
+        } else if(actScopedOut && result.isAttached()){
+            result = result.detach();
+        }
+
        //Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere
        sameDiff.clearPlaceholders(true);
        sameDiff.clearOpInputs();
        return workspaceMgr.dup(ArrayType.ACTIVATIONS, result);
    }
-    }

    @Override
    public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) {
        Gradient g = new DefaultGradient();

-        INDArray[] dLdIns;
-        boolean[] noClose = new boolean[getNumInputArrays()];
-        try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()){
-            if(sameDiff == null){
+        try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
+            if (sameDiff == null) {
                doInit();
            }
+        }

        List<String> inputNames = config.getVertexParams().getInputs();
        if(!sameDiff.hasGradientFunction()) {
@ -147,6 +176,24 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
            sameDiff.createGradFunction(inArr);
        }
        config.validateInput(inputs);
+
+        //Configure memory management for SameDiff instance - use DL4J workspaces
+        Map<Long,InferenceSession> sessionMap = sameDiff.getFunction("grad").getSessions();
+        if(!sessionMap.containsKey(Thread.currentThread().getId())){
+            sessionMap.put(Thread.currentThread().getId(), new InferenceSession(sameDiff.getFunction("grad")));
+        }
+        String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.BP_WORKING_MEM);
+        String wsNameActGrad = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATION_GRAD);
+        WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.BP_WORKING_MEM);
+        WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATION_GRAD);
+
+        boolean actGradScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATION_GRAD);
+        Preconditions.checkState(actGradScopedOut || wsNameActGrad != null, "Activation gradients must have a workspace or be scoped out");
+        SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameActGrad, confWorking, confOutput);
+        sessionMap.get(Thread.currentThread().getId()).setMmgr(mmgr);
+
+
+
        Map<String,INDArray> phMap = new HashMap<>();
        List<String> inputs = config.getVertexParams().getInputs();
        int i=0;
@ -182,11 +229,10 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
            INDArray sdGrad = gradsMap.get(s);
            INDArray dl4jGrad = gradTable.get(s);
            dl4jGrad.assign(sdGrad);                                            //TODO OPTIMIZE THIS
-                sdGrad.close(); //TODO optimize this
            g.gradientForVariable().put(s, dl4jGrad);
        }

-            dLdIns = new INDArray[inputs.size()];
+        INDArray[] dLdIns = new INDArray[inputs.size()];
        String fnName = fn.getGradPlaceholderName();
        for(int j=0; j<inputs.size(); j++ ){
            String name = inputs.get(j);
@ -197,17 +243,14 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
                //Edge case with lambda vertices like identity: SameDiff doesn't store the placeholders
                // So, this getArr() can be trying to get placeholder from SameDiff instance, when it's available here
                dLdIns[j] = epsilon;
-                    noClose[j] = true;
-                }
-            }
            }

-        //TODO optimize
-        for( int i=0; i<dLdIns.length; i++ ){
-            INDArray before = dLdIns[i];
-            dLdIns[i] = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIns[i]);
-            if(!noClose[i]){
-                before.close();
+            //Edge case: "vertex" is just an identity activation, for example
+            //TODO there may be a cleaner way to do this...
+            if(!actGradScopedOut && !dLdIns[j].data().getParentWorkspace().getId().equals(wsNameActGrad)){
+                dLdIns[j] = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIns[j]);
+            } else if(actGradScopedOut && dLdIns[j].isAttached()){
+                dLdIns[j] = dLdIns[j].detach();
            }
        }

--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffLayer.java
@ -26,9 +26,12 @@ import org.deeplearning4j.nn.gradient.Gradient;
 import org.deeplearning4j.nn.layers.AbstractLayer;
 import org.nd4j.autodiff.samediff.SDVariable;
 import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.autodiff.samediff.internal.InferenceSession;
+import org.nd4j.autodiff.samediff.internal.SessionMemMgr;
 import org.nd4j.base.Preconditions;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.memory.MemoryWorkspace;
+import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
 import org.nd4j.linalg.factory.Nd4j;
@ -81,9 +84,10 @@ public class SameDiffLayer extends AbstractLayer<AbstractSameDiffLayer> {
        assertInputSet(false);

        try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
-            if(sameDiff == null){
+            if (sameDiff == null) {
                doInit();
            }
+        }

        org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) layerConf();
        bl.validateInput(input);
@ -103,21 +107,39 @@ public class SameDiffLayer extends AbstractLayer<AbstractSameDiffLayer> {
            sameDiff.assignArray(arr, sameDiff.getVariable(e.getKey()));
        }

+        //Configure memory management for SameDiff instance - use DL4J workspaces
+        String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.FF_WORKING_MEM);
+        String wsNameOutput = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATIONS);
+        WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.FF_WORKING_MEM);
+        WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATIONS);
+        boolean actScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATIONS);
+        Preconditions.checkState(actScopedOut || wsNameOutput != null, "Activations must have a workspace or must be scoped out");
+        SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameOutput, confWorking, confOutput);
+
+        InferenceSession is = sameDiff.getSessions().get(Thread.currentThread().getId());
+        if(is == null){
+            is = new InferenceSession(sameDiff);
+            sameDiff.getSessions().put(Thread.currentThread().getId(), is);
+        }
+        is.setMmgr(mmgr);
+
        Map<String,INDArray> out = sameDiff.output(phMap, outputKey);
        INDArray result = out.get(outputKey);

+        //Edge case - identity activation
+        //TODO there may be a cleaner way to do this...
+        if(!actScopedOut && !result.data().getParentWorkspace().getId().equals(wsNameOutput)){
+            result = workspaceMgr.dup(ArrayType.ACTIVATIONS, result);
+        } else if(actScopedOut && result.isAttached()){
+            result = result.detach();
+        }
+
+
        //Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere
        sameDiff.clearPlaceholders(true);
        sameDiff.clearOpInputs();

-            INDArray ret = workspaceMgr.dup(ArrayType.ACTIVATIONS, result);
-            if(!result.isAttached() && result.closeable()) {
-                //May be attached in rare edge case - for identity, or if gradients are passed through from output to input
-                // unchaned, as in identity, add scalar, etc
-                result.close();
-            }
-            return ret;
-        }
+        return result;
    }


@ -128,15 +150,31 @@ public class SameDiffLayer extends AbstractLayer<AbstractSameDiffLayer> {
        Gradient g = new DefaultGradient();

        INDArray dLdIn;
-        boolean noCloseEps = false;
-        try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()){
-            if(sameDiff == null){
+
+        try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
+            if (sameDiff == null) {
                doInit();
            }
-            if(!sameDiff.hasGradientFunction()) {
+            if (!sameDiff.hasGradientFunction()) {
                //Create when scoped out, to ensure any arrays are not in WS
                sameDiff.createGradFunction(INPUT_KEY);
            }
+        }
+        //Configure memory management for SameDiff instance - use DL4J workspaces
+        Map<Long,InferenceSession> sessionMap = sameDiff.getFunction("grad").getSessions();
+        if(!sessionMap.containsKey(Thread.currentThread().getId())){
+            sessionMap.put(Thread.currentThread().getId(), new InferenceSession(sameDiff.getFunction("grad")));
+        }
+        String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.BP_WORKING_MEM);
+        String wsNameActGrad = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATION_GRAD);
+        WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.BP_WORKING_MEM);
+        WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATION_GRAD);
+
+        boolean actGradScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATION_GRAD);
+        Preconditions.checkState(actGradScopedOut || wsNameActGrad != null, "Activation gradients must have a workspace or be scoped out");
+        SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameActGrad, confWorking, confOutput);
+        sessionMap.get(Thread.currentThread().getId()).setMmgr(mmgr);
+

        org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) layerConf();
        bl.validateInput(input);
@ -167,28 +205,16 @@ public class SameDiffLayer extends AbstractLayer<AbstractSameDiffLayer> {
            INDArray dl4jGrad = gradTable.get(s);
            dl4jGrad.assign(sdGrad);                                            //TODO OPTIMIZE THIS
            g.gradientForVariable().put(s, dl4jGrad);
-                sdGrad.close();
        }

        dLdIn = m.get(INPUT_KEY);

-            if(dLdIn == null && fn.getGradPlaceholderName().equals(INPUT_KEY)){
-                //Edge case with lambda layers like identity: SameDiff doesn't store the placeholders
-                // So, this getArr() can be trying to get placeholder from SameDiff instance, when it's available here
-                dLdIn = epsilon;
-                noCloseEps = true;
-            }
-        }

        //Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere
        sameDiff.clearPlaceholders(true);
        sameDiff.clearOpInputs();

        Pair<Gradient, INDArray> ret = new Pair<>(g, workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIn));   //TODO OPTIMIZE THIS
-        if(!noCloseEps && !dLdIn.isAttached() && dLdIn.closeable()) {
-            //Edge case: identity etc - might just pass gradient array through unchanged
-            dLdIn.close();
-        }
        return ret;
    }

--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/samediff/SameDiffOutputLayer.java
@ -29,9 +29,12 @@ import org.deeplearning4j.nn.workspace.ArrayType;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.nd4j.autodiff.samediff.SDVariable;
 import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.autodiff.samediff.internal.InferenceSession;
+import org.nd4j.autodiff.samediff.internal.SessionMemMgr;
 import org.nd4j.base.Preconditions;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.memory.MemoryWorkspace;
+import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
 import org.nd4j.linalg.dataset.api.DataSet;
@ -95,9 +98,28 @@ public class SameDiffOutputLayer extends AbstractLayer<org.deeplearning4j.nn.con

        //TODO optimize
        try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
-            if(sameDiff == null){
+            if (sameDiff == null) {
                doInit();
            }
+        }
+
+        //Configure memory management for SameDiff instance - use DL4J workspaces
+        String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.FF_WORKING_MEM);
+        String wsNameOutput = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATIONS);
+        WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.FF_WORKING_MEM);
+        WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATIONS);
+        boolean actScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATIONS);
+        Preconditions.checkState(actScopedOut || wsNameOutput != null, "Activations must have a workspace or must be scoped out");
+        SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameOutput, confWorking, confOutput);
+
+        InferenceSession is = sameDiff.getSessions().get(Thread.currentThread().getId());
+        if(is == null){
+            is = new InferenceSession(sameDiff);
+            sameDiff.getSessions().put(Thread.currentThread().getId(), is);
+        }
+        is.setMmgr(mmgr);
+
+

        //Because DL4J parameters are views, and SameDiff uses DeviceLocal (which doesn't support views), we need to update the arrays on each iteration
        //TODO Find a more efficient solution for this
@ -120,16 +142,16 @@ public class SameDiffOutputLayer extends AbstractLayer<org.deeplearning4j.nn.con
        sameDiff.clearPlaceholders(true);
        sameDiff.clearOpInputs();

-            if(activations) {
-                Preconditions.checkNotNull(out, "Activations (result) array for variable \"%s\" was " +
-                        "null - error during execution or this variable (as defined by method activationsVertexName()) " +
-                        "does not exist", layerConf().activationsVertexName());
-                return workspaceMgr.dup(ArrayType.ACTIVATIONS, out);
-            } else {
+        //Edge case: vertex is just an Identity function, for example
+        //TODO there may be a cleaner way to do this...
+        if(!actScopedOut && !out.data().getParentWorkspace().getId().equals(wsNameOutput)){
+            out = workspaceMgr.dup(ArrayType.ACTIVATIONS, out);
+        } else if(actScopedOut && out.isAttached()){
+            out = out.detach();
+        }
+
        return out;
    }
-        }
-    }


    @Override
@ -141,12 +163,31 @@ public class SameDiffOutputLayer extends AbstractLayer<org.deeplearning4j.nn.con
        Gradient g = new DefaultGradient();

        INDArray dLdIn;
-        try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()){
-            if(sameDiff == null){
+        try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
+            if (sameDiff == null) {
                //Usually doInit will be called in forward pass; not necessarily the case in output layers
                // (for efficiency, we skip output layer forward pass in MultiLayerNetwork/ComputationGraph)
                doInit();
            }
+            if(sameDiff.getFunction("grad") == null)
+                sameDiff.createGradFunction(INPUT_KEY);
+        }
+
+        //Configure memory management for SameDiff instance - use DL4J workspaces
+        Map<Long,InferenceSession> sessionMap = sameDiff.getFunction("grad").getSessions();
+        if(!sessionMap.containsKey(Thread.currentThread().getId())){
+            sessionMap.put(Thread.currentThread().getId(), new InferenceSession(sameDiff.getFunction("grad")));
+        }
+        String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.BP_WORKING_MEM);
+        String wsNameActGrad = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATION_GRAD);
+        WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.BP_WORKING_MEM);
+        WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATION_GRAD);
+
+        boolean actGradScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATION_GRAD);
+        Preconditions.checkState(actGradScopedOut || wsNameActGrad != null, "Activation gradients must have a workspace or be scoped out");
+        SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameActGrad, confWorking, confOutput);
+        sessionMap.get(Thread.currentThread().getId()).setMmgr(mmgr);
+
        if(!sameDiff.hasGradientFunction()) {
            //Create when scoped out, to ensure any arrays are not in WS
            sameDiff.createGradFunction(INPUT_KEY);
@ -179,16 +220,19 @@ public class SameDiffOutputLayer extends AbstractLayer<org.deeplearning4j.nn.con
        }

        dLdIn = grads.get(INPUT_KEY);
-        }

        //Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere
        sameDiff.clearPlaceholders(true);
        sameDiff.clearOpInputs();

-        Pair<Gradient,INDArray> p = new Pair<>(g, workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIn));   //TODO OPTIMIZE THIS
-        if(dLdIn.closeable())
-            dLdIn.close();
-        return p;
+        //TODO there may be a cleaner way to do this...
+        if(!actGradScopedOut && !dLdIn.data().getParentWorkspace().getId().equals(wsNameActGrad)){
+            dLdIn = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIn);
+        } else if(actGradScopedOut && dLdIn.isAttached()){
+            dLdIn = dLdIn.detach();
+        }
+
+        return new Pair<>(g, dLdIn);
    }

    /**Returns the parameters of the neural network as a flattened row vector
@ -312,7 +356,8 @@ public class SameDiffOutputLayer extends AbstractLayer<org.deeplearning4j.nn.con

    @Override
    public double computeScore(double fullNetRegTerm, boolean training, LayerWorkspaceMgr workspaceMgr) {
-        return (activateHelper(false, workspaceMgr).getDouble(0) + fullNetRegTerm) / input.size(0);
+        INDArray scoreArr = activateHelper(false, workspaceMgr);
+        return (scoreArr.getDouble(0) + fullNetRegTerm) / input.size(0);
    }

    @Override
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java
@ -309,11 +309,11 @@ public abstract class BaseNDArray implements INDArray, Iterable {
     * @param ordering the ordering of the ndarray
     */
    public BaseNDArray(int[] shape, int[] stride, long offset, char ordering) {
-        this(Nd4j.createBuffer(ArrayUtil.prodLong(shape)), shape, stride, offset, ordering);
+        this(Nd4j.createBuffer(shape.length == 0 ? 1 : ArrayUtil.prodLong(shape)), shape, stride, offset, ordering);
    }

    public BaseNDArray(long[] shape, long[] stride, long offset, char ordering) {
-        this(Nd4j.createBuffer(ArrayUtil.prodLong(shape)), shape, stride, offset, ordering);
+        this(Nd4j.createBuffer(shape.length == 0 ? 1 : ArrayUtil.prodLong(shape)), shape, stride, offset, ordering);
    }

    /**
@ -326,19 +326,19 @@ public abstract class BaseNDArray implements INDArray, Iterable {
     * @param initialize Whether to initialize the INDArray. If true: initialize. If false: don't.
     */
    public BaseNDArray(int[] shape, int[] stride, long offset, char ordering, boolean initialize) {
-        this(Nd4j.createBuffer(ArrayUtil.prodLong(shape), initialize), shape, stride, offset, ordering);
+        this(Nd4j.createBuffer(shape.length == 0 ? 1 : ArrayUtil.prodLong(shape), initialize), shape, stride, offset, ordering);
    }

    public BaseNDArray(long[] shape, long[] stride, long offset, char ordering, boolean initialize) {
-        this(Nd4j.createBuffer(ArrayUtil.prodLong(shape), initialize), shape, stride, offset, ordering);
+        this(Nd4j.createBuffer(shape.length == 0 ? 1 : ArrayUtil.prodLong(shape), initialize), shape, stride, offset, ordering);
    }

    public BaseNDArray(DataType type, long[] shape, long[] stride, long offset, char ordering, boolean initialize) {
-        this(Nd4j.createBuffer(type, ArrayUtil.prodLong(shape), initialize), type, shape, stride, offset, ordering);
+        this(Nd4j.createBuffer(type, shape.length == 0 ? 1 : ArrayUtil.prodLong(shape), initialize), type, shape, stride, offset, ordering);
    }

    public BaseNDArray(DataType type, long[] shape, long[] stride, long offset, char ordering, boolean initialize, MemoryWorkspace workspace) {
-        this(Nd4j.createBuffer(type, ArrayUtil.prodLong(shape), initialize, workspace), type, shape, stride, offset, ordering);
+        this(Nd4j.createBuffer(type, shape.length == 0 ? 1 : ArrayUtil.prodLong(shape), initialize, workspace), type, shape, stride, offset, ordering);
    }


--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/workspace/BasicWorkspaceTests.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/workspace/BasicWorkspaceTests.java
@ -319,6 +319,11 @@ public class BasicWorkspaceTests extends BaseNd4jTest {
            long reqMemory = 5 * Nd4j.sizeOfDataType(array1.dataType());
            assertEquals(reqMemory + reqMemory % 8, wsI.getPrimaryOffset());
            assertEquals(array1, array2);
+
+            INDArray array3 = Nd4j.createUninitializedDetached(DataType.FLOAT, new long[0]);
+            assertTrue(array3.isScalar());
+            assertEquals(1, array3.length());
+            assertEquals(1, array3.data().length());
        }
    }