Use DL4J workspaces for SameDiff layers in MLN/CG (#23)

* #8329 DL4J workspace integration for SameDiff layers

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fix bug for Nd4j.createUninitializedDetached for scalars (length 0 shape array)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* SameDiff output layer, graph vertex, various fixes

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Javadoc

Signed-off-by: AlexDBlack <blacka101@gmail.com>
master
Alex Black 2019-11-02 17:42:01 +11:00 committed by GitHub
parent e9a7a13c00
commit 9efd811508
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 760 additions and 545 deletions

View File

@ -96,8 +96,8 @@ public class TestBatchNormBp {
bn.setInput(in, LayerWorkspaceMgr.noWorkspaces()); bn.setInput(in, LayerWorkspaceMgr.noWorkspaces());
Pair<Gradient,INDArray> p = net.backpropGradient(eps, LayerWorkspaceMgr.noWorkspaces()); Pair<Gradient,INDArray> p = net.backpropGradient(eps, LayerWorkspaceMgr.noWorkspaces());
h.preOutput(in, true, new int[]{1,3}, gamma, beta, mean, var, 0.5, e, LayerWorkspaceMgr.noWorkspaces()); h.preOutput(in, true, new long[]{1,3}, gamma, beta, mean, var, 0.5, e, LayerWorkspaceMgr.noWorkspaces());
Pair<Gradient,INDArray> pmkl = h.backpropGradient(in, eps, new int[]{1,3}, gamma, beta, dLdg, dLdb, e, LayerWorkspaceMgr.noWorkspaces()); Pair<Gradient,INDArray> pmkl = h.backpropGradient(in, eps, new long[]{1,3}, gamma, beta, dLdg, dLdb, e, LayerWorkspaceMgr.noWorkspaces());
INDArray dldin_dl4j = p.getSecond(); INDArray dldin_dl4j = p.getSecond();

View File

@ -80,6 +80,7 @@ public class TestSameDiffDense extends BaseDL4JTest {
@Test @Test
public void testSameDiffDenseForward() { public void testSameDiffDenseForward() {
for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
for (int minibatch : new int[]{5, 1}) { for (int minibatch : new int[]{5, 1}) {
int nIn = 3; int nIn = 3;
int nOut = 4; int nOut = 4;
@ -97,8 +98,10 @@ public class TestSameDiffDense extends BaseDL4JTest {
}; };
for (Activation a : afns) { for (Activation a : afns) {
log.info("Starting test - " + a); log.info("Starting test - " + a + ", workspace = " + wsm);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.inferenceWorkspaceMode(wsm)
.trainingWorkspaceMode(wsm)
.list() .list()
.layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut)
.activation(a) .activation(a)
@ -146,9 +149,11 @@ public class TestSameDiffDense extends BaseDL4JTest {
} }
} }
} }
}
@Test @Test
public void testSameDiffDenseForwardMultiLayer() { public void testSameDiffDenseForwardMultiLayer() {
for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
for (int minibatch : new int[]{5, 1}) { for (int minibatch : new int[]{5, 1}) {
int nIn = 3; int nIn = 3;
int nOut = 4; int nOut = 4;
@ -166,7 +171,7 @@ public class TestSameDiffDense extends BaseDL4JTest {
}; };
for (Activation a : afns) { for (Activation a : afns) {
log.info("Starting test - " + a); log.info("Starting test - " + a + " - workspace=" + wsm);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.seed(12345) .seed(12345)
.list() .list()
@ -201,7 +206,6 @@ public class TestSameDiffDense extends BaseDL4JTest {
MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
net2.init(); net2.init();
// net.params().assign(net2.params());
assertEquals(net2.params(), net.params()); assertEquals(net2.params(), net.params());
//Check params: //Check params:
@ -231,6 +235,7 @@ public class TestSameDiffDense extends BaseDL4JTest {
} }
} }
} }
}
@Test @Test
public void testSameDiffDenseBackward() { public void testSameDiffDenseBackward() {
@ -244,10 +249,13 @@ public class TestSameDiffDense extends BaseDL4JTest {
Activation[] afns = new Activation[]{ Activation[] afns = new Activation[]{
Activation.TANH, Activation.TANH,
Activation.SIGMOID, Activation.SIGMOID,
Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, Activation.ELU,
Activation.IDENTITY,
Activation.SOFTPLUS,
Activation.SOFTSIGN,
Activation.HARDTANH, Activation.HARDTANH,
Activation.CUBE, //https://github.com/deeplearning4j/nd4j/issues/2426 Activation.CUBE,
Activation.RELU //JVM crash Activation.RELU
}; };
for (Activation a : afns) { for (Activation a : afns) {
@ -337,12 +345,13 @@ public class TestSameDiffDense extends BaseDL4JTest {
int nIn = 4; int nIn = 4;
int nOut = 3; int nOut = 3;
boolean workspaces = true;
for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.seed(12345) .seed(12345)
.trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .trainingWorkspaceMode(wsm)
.inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(wsm)
.updater(new Adam(0.1)) .updater(new Adam(0.1))
.list() .list()
.layer(new SameDiffDense.Builder().nIn(nIn).nOut(5).activation(Activation.TANH).build()) .layer(new SameDiffDense.Builder().nIn(nIn).nOut(5).activation(Activation.TANH).build())
@ -396,13 +405,14 @@ public class TestSameDiffDense extends BaseDL4JTest {
INDArray outMb = netStandard.output(newIn); INDArray outMb = netStandard.output(newIn);
assertEquals(outMb, outMbsd); assertEquals(outMb, outMbsd);
} }
}
@Test @Test
public void gradientCheck() { public void gradientCheck() {
int nIn = 4; int nIn = 4;
int nOut = 4; int nOut = 4;
for (boolean workspaces : new boolean[]{false, true}) { for (boolean workspaces : new boolean[]{true, false}) {
for (Activation a : new Activation[]{Activation.TANH, Activation.IDENTITY}) { for (Activation a : new Activation[]{Activation.TANH, Activation.IDENTITY}) {
String msg = "workspaces: " + workspaces + ", " + a; String msg = "workspaces: " + workspaces + ", " + a;

View File

@ -21,6 +21,7 @@ import org.deeplearning4j.BaseDL4JTest;
import org.deeplearning4j.TestUtils; import org.deeplearning4j.TestUtils;
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration; import org.deeplearning4j.nn.conf.ComputationGraphConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.WorkspaceMode;
import org.deeplearning4j.nn.conf.graph.ElementWiseVertex; import org.deeplearning4j.nn.conf.graph.ElementWiseVertex;
import org.deeplearning4j.nn.conf.graph.ScaleVertex; import org.deeplearning4j.nn.conf.graph.ScaleVertex;
import org.deeplearning4j.nn.conf.graph.ShiftVertex; import org.deeplearning4j.nn.conf.graph.ShiftVertex;
@ -52,8 +53,14 @@ public class TestSameDiffLambda extends BaseDL4JTest {
@Test @Test
public void testSameDiffLamdaLayerBasic(){ public void testSameDiffLamdaLayerBasic(){
for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
log.info("--- Workspace Mode: {} ---", wsm);
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
.trainingWorkspaceMode(wsm)
.inferenceWorkspaceMode(wsm)
.seed(12345) .seed(12345)
.updater(new Adam(0.01)) .updater(new Adam(0.01))
.graphBuilder() .graphBuilder()
@ -67,6 +74,8 @@ public class TestSameDiffLambda extends BaseDL4JTest {
//Equavalent, not using SameDiff Lambda: //Equavalent, not using SameDiff Lambda:
ComputationGraphConfiguration confStd = new NeuralNetConfiguration.Builder() ComputationGraphConfiguration confStd = new NeuralNetConfiguration.Builder()
.trainingWorkspaceMode(wsm)
.inferenceWorkspaceMode(wsm)
.seed(12345) .seed(12345)
.updater(new Adam(0.01)) .updater(new Adam(0.01))
.graphBuilder() .graphBuilder()
@ -122,11 +131,17 @@ public class TestSameDiffLambda extends BaseDL4JTest {
INDArray outMb = std.output(newIn)[0]; INDArray outMb = std.output(newIn)[0];
assertEquals(outMb, outMbsd); assertEquals(outMb, outMbsd);
} }
}
@Test @Test
public void testSameDiffLamdaVertexBasic(){ public void testSameDiffLamdaVertexBasic(){
for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
log.info("--- Workspace Mode: {} ---", wsm);
Nd4j.getRandom().setSeed(12345); Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
.trainingWorkspaceMode(wsm)
.inferenceWorkspaceMode(wsm)
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.seed(12345) .seed(12345)
.updater(new Adam(0.01)) .updater(new Adam(0.01))
@ -142,6 +157,8 @@ public class TestSameDiffLambda extends BaseDL4JTest {
//Equavalent, not using SameDiff Lambda: //Equavalent, not using SameDiff Lambda:
ComputationGraphConfiguration confStd = new NeuralNetConfiguration.Builder() ComputationGraphConfiguration confStd = new NeuralNetConfiguration.Builder()
.trainingWorkspaceMode(wsm)
.inferenceWorkspaceMode(wsm)
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.seed(12345) .seed(12345)
.updater(new Adam(0.01)) .updater(new Adam(0.01))
@ -201,3 +218,4 @@ public class TestSameDiffLambda extends BaseDL4JTest {
assertEquals(outMb, outMbsd); assertEquals(outMb, outMbsd);
} }
} }
}

View File

@ -0,0 +1,68 @@
package org.deeplearning4j.nn.layers.samediff;
import org.nd4j.autodiff.samediff.internal.memory.AbstractMemoryMgr;
import org.nd4j.base.Preconditions;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.shape.LongShapeDescriptor;
import org.nd4j.linalg.factory.Nd4j;
/**
* A SameDiff {@link org.nd4j.autodiff.samediff.internal.SessionMemMgr} that uses DL4J workspaces for memory management.
* Any op outputs are allocated in the output workspace if they are returned to the layer; otherwise they are placed in
* the DL4J working memory workspace
*
* @author Alex Black
*/
public class DL4JSameDiffMemoryMgr extends AbstractMemoryMgr {
private final String workingMemoryWs;
private final String outputWs;
private final WorkspaceConfiguration confWorking;
private final WorkspaceConfiguration confOutput;
//Note: if the working memory or output workspace names are null -> detached memory
public DL4JSameDiffMemoryMgr(String workingMemoryWs, String outputWs, WorkspaceConfiguration confWorking,
WorkspaceConfiguration confOutput){
this.workingMemoryWs = workingMemoryWs;
this.outputWs = outputWs;
this.confWorking = confWorking;
this.confOutput = confOutput;
}
@Override
public INDArray allocate(boolean detached, DataType dataType, long... shape) {
String wsName = detached ? outputWs : workingMemoryWs;
WorkspaceConfiguration wsConf = detached ? confOutput : confWorking;
if(wsName == null){
//Scoped out
INDArray ret = Nd4j.createUninitializedDetached(dataType, shape);
Preconditions.checkState(!ret.isAttached(), "Returned array should be detached");
return ret;
} else {
MemoryWorkspace ws = Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(wsConf, wsName);
try (MemoryWorkspace mw = ws.notifyScopeBorrowed()) {
return Nd4j.createUninitialized(dataType, shape);
}
}
}
@Override
public INDArray allocate(boolean detached, LongShapeDescriptor descriptor) {
return allocate(detached, descriptor.dataType(), descriptor.getShape());
}
@Override
public void release(INDArray array) {
//No-op - DL4J workspaces handles this
}
@Override
public void close() {
//No-op - DL4J workspaces handles this
}
}

View File

@ -31,9 +31,12 @@ import org.deeplearning4j.nn.workspace.ArrayType;
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SDVariable;
import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.autodiff.samediff.SameDiff;
import org.nd4j.autodiff.samediff.internal.InferenceSession;
import org.nd4j.autodiff.samediff.internal.SessionMemMgr;
import org.nd4j.base.Preconditions; import org.nd4j.base.Preconditions;
import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction; import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.factory.Nd4j;
@ -98,6 +101,7 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
if (sameDiff == null) { if (sameDiff == null) {
doInit(); doInit();
} }
}
Map<String,INDArray> phMap = new HashMap<>(); Map<String,INDArray> phMap = new HashMap<>();
config.validateInput(inputs); config.validateInput(inputs);
@ -112,6 +116,25 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
} }
} }
//Configure memory management for SameDiff instance - use DL4J workspaces
String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.FF_WORKING_MEM);
String wsNameOutput = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATIONS);
WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.FF_WORKING_MEM);
WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATIONS);
boolean actScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATIONS);
Preconditions.checkState(actScopedOut || wsNameOutput != null, "Activations must have a workspace or must be scoped out");
SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameOutput, confWorking, confOutput);
InferenceSession is = sameDiff.getSessions().get(Thread.currentThread().getId());
if(is == null){
is = new InferenceSession(sameDiff);
sameDiff.getSessions().put(Thread.currentThread().getId(), is);
}
is.setMmgr(mmgr);
if(paramTable != null && paramTable.size() > 0) { if(paramTable != null && paramTable.size() > 0) {
//Because DL4J parameters are views, and SameDiff uses DeviceLocal (which doesn't support views), we need to update the arrays on each iteration //Because DL4J parameters are views, and SameDiff uses DeviceLocal (which doesn't support views), we need to update the arrays on each iteration
//TODO Find a more efficient solution for this //TODO Find a more efficient solution for this
@ -122,23 +145,29 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
} }
INDArray result = sameDiff.outputSingle(phMap, outputKey); INDArray result = sameDiff.outputSingle(phMap, outputKey);
//Edge case: "vertex" is just an identity activation, for example
//TODO there may be a cleaner way to do this...
if(!actScopedOut && !result.data().getParentWorkspace().getId().equals(wsNameOutput)){
result = workspaceMgr.dup(ArrayType.ACTIVATIONS, result);
} else if(actScopedOut && result.isAttached()){
result = result.detach();
}
//Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere //Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere
sameDiff.clearPlaceholders(true); sameDiff.clearPlaceholders(true);
sameDiff.clearOpInputs(); sameDiff.clearOpInputs();
return workspaceMgr.dup(ArrayType.ACTIVATIONS, result); return workspaceMgr.dup(ArrayType.ACTIVATIONS, result);
} }
}
@Override @Override
public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) { public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) {
Gradient g = new DefaultGradient(); Gradient g = new DefaultGradient();
INDArray[] dLdIns;
boolean[] noClose = new boolean[getNumInputArrays()];
try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
if (sameDiff == null) { if (sameDiff == null) {
doInit(); doInit();
} }
}
List<String> inputNames = config.getVertexParams().getInputs(); List<String> inputNames = config.getVertexParams().getInputs();
if(!sameDiff.hasGradientFunction()) { if(!sameDiff.hasGradientFunction()) {
@ -147,6 +176,24 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
sameDiff.createGradFunction(inArr); sameDiff.createGradFunction(inArr);
} }
config.validateInput(inputs); config.validateInput(inputs);
//Configure memory management for SameDiff instance - use DL4J workspaces
Map<Long,InferenceSession> sessionMap = sameDiff.getFunction("grad").getSessions();
if(!sessionMap.containsKey(Thread.currentThread().getId())){
sessionMap.put(Thread.currentThread().getId(), new InferenceSession(sameDiff.getFunction("grad")));
}
String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.BP_WORKING_MEM);
String wsNameActGrad = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATION_GRAD);
WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.BP_WORKING_MEM);
WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATION_GRAD);
boolean actGradScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATION_GRAD);
Preconditions.checkState(actGradScopedOut || wsNameActGrad != null, "Activation gradients must have a workspace or be scoped out");
SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameActGrad, confWorking, confOutput);
sessionMap.get(Thread.currentThread().getId()).setMmgr(mmgr);
Map<String,INDArray> phMap = new HashMap<>(); Map<String,INDArray> phMap = new HashMap<>();
List<String> inputs = config.getVertexParams().getInputs(); List<String> inputs = config.getVertexParams().getInputs();
int i=0; int i=0;
@ -182,11 +229,10 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
INDArray sdGrad = gradsMap.get(s); INDArray sdGrad = gradsMap.get(s);
INDArray dl4jGrad = gradTable.get(s); INDArray dl4jGrad = gradTable.get(s);
dl4jGrad.assign(sdGrad); //TODO OPTIMIZE THIS dl4jGrad.assign(sdGrad); //TODO OPTIMIZE THIS
sdGrad.close(); //TODO optimize this
g.gradientForVariable().put(s, dl4jGrad); g.gradientForVariable().put(s, dl4jGrad);
} }
dLdIns = new INDArray[inputs.size()]; INDArray[] dLdIns = new INDArray[inputs.size()];
String fnName = fn.getGradPlaceholderName(); String fnName = fn.getGradPlaceholderName();
for(int j=0; j<inputs.size(); j++ ){ for(int j=0; j<inputs.size(); j++ ){
String name = inputs.get(j); String name = inputs.get(j);
@ -197,17 +243,14 @@ public class SameDiffGraphVertex extends BaseGraphVertex {
//Edge case with lambda vertices like identity: SameDiff doesn't store the placeholders //Edge case with lambda vertices like identity: SameDiff doesn't store the placeholders
// So, this getArr() can be trying to get placeholder from SameDiff instance, when it's available here // So, this getArr() can be trying to get placeholder from SameDiff instance, when it's available here
dLdIns[j] = epsilon; dLdIns[j] = epsilon;
noClose[j] = true;
}
}
} }
//TODO optimize //Edge case: "vertex" is just an identity activation, for example
for( int i=0; i<dLdIns.length; i++ ){ //TODO there may be a cleaner way to do this...
INDArray before = dLdIns[i]; if(!actGradScopedOut && !dLdIns[j].data().getParentWorkspace().getId().equals(wsNameActGrad)){
dLdIns[i] = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIns[i]); dLdIns[j] = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIns[j]);
if(!noClose[i]){ } else if(actGradScopedOut && dLdIns[j].isAttached()){
before.close(); dLdIns[j] = dLdIns[j].detach();
} }
} }

View File

@ -26,9 +26,12 @@ import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.layers.AbstractLayer; import org.deeplearning4j.nn.layers.AbstractLayer;
import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SDVariable;
import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.autodiff.samediff.SameDiff;
import org.nd4j.autodiff.samediff.internal.InferenceSession;
import org.nd4j.autodiff.samediff.internal.SessionMemMgr;
import org.nd4j.base.Preconditions; import org.nd4j.base.Preconditions;
import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction; import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.factory.Nd4j;
@ -84,6 +87,7 @@ public class SameDiffLayer extends AbstractLayer<AbstractSameDiffLayer> {
if (sameDiff == null) { if (sameDiff == null) {
doInit(); doInit();
} }
}
org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) layerConf(); org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) layerConf();
bl.validateInput(input); bl.validateInput(input);
@ -103,21 +107,39 @@ public class SameDiffLayer extends AbstractLayer<AbstractSameDiffLayer> {
sameDiff.assignArray(arr, sameDiff.getVariable(e.getKey())); sameDiff.assignArray(arr, sameDiff.getVariable(e.getKey()));
} }
//Configure memory management for SameDiff instance - use DL4J workspaces
String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.FF_WORKING_MEM);
String wsNameOutput = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATIONS);
WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.FF_WORKING_MEM);
WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATIONS);
boolean actScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATIONS);
Preconditions.checkState(actScopedOut || wsNameOutput != null, "Activations must have a workspace or must be scoped out");
SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameOutput, confWorking, confOutput);
InferenceSession is = sameDiff.getSessions().get(Thread.currentThread().getId());
if(is == null){
is = new InferenceSession(sameDiff);
sameDiff.getSessions().put(Thread.currentThread().getId(), is);
}
is.setMmgr(mmgr);
Map<String,INDArray> out = sameDiff.output(phMap, outputKey); Map<String,INDArray> out = sameDiff.output(phMap, outputKey);
INDArray result = out.get(outputKey); INDArray result = out.get(outputKey);
//Edge case - identity activation
//TODO there may be a cleaner way to do this...
if(!actScopedOut && !result.data().getParentWorkspace().getId().equals(wsNameOutput)){
result = workspaceMgr.dup(ArrayType.ACTIVATIONS, result);
} else if(actScopedOut && result.isAttached()){
result = result.detach();
}
//Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere //Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere
sameDiff.clearPlaceholders(true); sameDiff.clearPlaceholders(true);
sameDiff.clearOpInputs(); sameDiff.clearOpInputs();
INDArray ret = workspaceMgr.dup(ArrayType.ACTIVATIONS, result); return result;
if(!result.isAttached() && result.closeable()) {
//May be attached in rare edge case - for identity, or if gradients are passed through from output to input
// unchaned, as in identity, add scalar, etc
result.close();
}
return ret;
}
} }
@ -128,7 +150,7 @@ public class SameDiffLayer extends AbstractLayer<AbstractSameDiffLayer> {
Gradient g = new DefaultGradient(); Gradient g = new DefaultGradient();
INDArray dLdIn; INDArray dLdIn;
boolean noCloseEps = false;
try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) { try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
if (sameDiff == null) { if (sameDiff == null) {
doInit(); doInit();
@ -137,6 +159,22 @@ public class SameDiffLayer extends AbstractLayer<AbstractSameDiffLayer> {
//Create when scoped out, to ensure any arrays are not in WS //Create when scoped out, to ensure any arrays are not in WS
sameDiff.createGradFunction(INPUT_KEY); sameDiff.createGradFunction(INPUT_KEY);
} }
}
//Configure memory management for SameDiff instance - use DL4J workspaces
Map<Long,InferenceSession> sessionMap = sameDiff.getFunction("grad").getSessions();
if(!sessionMap.containsKey(Thread.currentThread().getId())){
sessionMap.put(Thread.currentThread().getId(), new InferenceSession(sameDiff.getFunction("grad")));
}
String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.BP_WORKING_MEM);
String wsNameActGrad = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATION_GRAD);
WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.BP_WORKING_MEM);
WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATION_GRAD);
boolean actGradScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATION_GRAD);
Preconditions.checkState(actGradScopedOut || wsNameActGrad != null, "Activation gradients must have a workspace or be scoped out");
SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameActGrad, confWorking, confOutput);
sessionMap.get(Thread.currentThread().getId()).setMmgr(mmgr);
org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) layerConf(); org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer bl = (org.deeplearning4j.nn.conf.layers.samediff.SameDiffLayer) layerConf();
bl.validateInput(input); bl.validateInput(input);
@ -167,28 +205,16 @@ public class SameDiffLayer extends AbstractLayer<AbstractSameDiffLayer> {
INDArray dl4jGrad = gradTable.get(s); INDArray dl4jGrad = gradTable.get(s);
dl4jGrad.assign(sdGrad); //TODO OPTIMIZE THIS dl4jGrad.assign(sdGrad); //TODO OPTIMIZE THIS
g.gradientForVariable().put(s, dl4jGrad); g.gradientForVariable().put(s, dl4jGrad);
sdGrad.close();
} }
dLdIn = m.get(INPUT_KEY); dLdIn = m.get(INPUT_KEY);
if(dLdIn == null && fn.getGradPlaceholderName().equals(INPUT_KEY)){
//Edge case with lambda layers like identity: SameDiff doesn't store the placeholders
// So, this getArr() can be trying to get placeholder from SameDiff instance, when it's available here
dLdIn = epsilon;
noCloseEps = true;
}
}
//Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere //Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere
sameDiff.clearPlaceholders(true); sameDiff.clearPlaceholders(true);
sameDiff.clearOpInputs(); sameDiff.clearOpInputs();
Pair<Gradient, INDArray> ret = new Pair<>(g, workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIn)); //TODO OPTIMIZE THIS Pair<Gradient, INDArray> ret = new Pair<>(g, workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIn)); //TODO OPTIMIZE THIS
if(!noCloseEps && !dLdIn.isAttached() && dLdIn.closeable()) {
//Edge case: identity etc - might just pass gradient array through unchanged
dLdIn.close();
}
return ret; return ret;
} }

View File

@ -29,9 +29,12 @@ import org.deeplearning4j.nn.workspace.ArrayType;
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SDVariable;
import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.autodiff.samediff.SameDiff;
import org.nd4j.autodiff.samediff.internal.InferenceSession;
import org.nd4j.autodiff.samediff.internal.SessionMemMgr;
import org.nd4j.base.Preconditions; import org.nd4j.base.Preconditions;
import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction; import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
import org.nd4j.linalg.dataset.api.DataSet; import org.nd4j.linalg.dataset.api.DataSet;
@ -98,6 +101,25 @@ public class SameDiffOutputLayer extends AbstractLayer<org.deeplearning4j.nn.con
if (sameDiff == null) { if (sameDiff == null) {
doInit(); doInit();
} }
}
//Configure memory management for SameDiff instance - use DL4J workspaces
String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.FF_WORKING_MEM);
String wsNameOutput = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATIONS);
WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.FF_WORKING_MEM);
WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATIONS);
boolean actScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATIONS);
Preconditions.checkState(actScopedOut || wsNameOutput != null, "Activations must have a workspace or must be scoped out");
SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameOutput, confWorking, confOutput);
InferenceSession is = sameDiff.getSessions().get(Thread.currentThread().getId());
if(is == null){
is = new InferenceSession(sameDiff);
sameDiff.getSessions().put(Thread.currentThread().getId(), is);
}
is.setMmgr(mmgr);
//Because DL4J parameters are views, and SameDiff uses DeviceLocal (which doesn't support views), we need to update the arrays on each iteration //Because DL4J parameters are views, and SameDiff uses DeviceLocal (which doesn't support views), we need to update the arrays on each iteration
//TODO Find a more efficient solution for this //TODO Find a more efficient solution for this
@ -120,16 +142,16 @@ public class SameDiffOutputLayer extends AbstractLayer<org.deeplearning4j.nn.con
sameDiff.clearPlaceholders(true); sameDiff.clearPlaceholders(true);
sameDiff.clearOpInputs(); sameDiff.clearOpInputs();
if(activations) { //Edge case: vertex is just an Identity function, for example
Preconditions.checkNotNull(out, "Activations (result) array for variable \"%s\" was " + //TODO there may be a cleaner way to do this...
"null - error during execution or this variable (as defined by method activationsVertexName()) " + if(!actScopedOut && !out.data().getParentWorkspace().getId().equals(wsNameOutput)){
"does not exist", layerConf().activationsVertexName()); out = workspaceMgr.dup(ArrayType.ACTIVATIONS, out);
return workspaceMgr.dup(ArrayType.ACTIVATIONS, out); } else if(actScopedOut && out.isAttached()){
} else { out = out.detach();
}
return out; return out;
} }
}
}
@Override @Override
@ -147,6 +169,25 @@ public class SameDiffOutputLayer extends AbstractLayer<org.deeplearning4j.nn.con
// (for efficiency, we skip output layer forward pass in MultiLayerNetwork/ComputationGraph) // (for efficiency, we skip output layer forward pass in MultiLayerNetwork/ComputationGraph)
doInit(); doInit();
} }
if(sameDiff.getFunction("grad") == null)
sameDiff.createGradFunction(INPUT_KEY);
}
//Configure memory management for SameDiff instance - use DL4J workspaces
Map<Long,InferenceSession> sessionMap = sameDiff.getFunction("grad").getSessions();
if(!sessionMap.containsKey(Thread.currentThread().getId())){
sessionMap.put(Thread.currentThread().getId(), new InferenceSession(sameDiff.getFunction("grad")));
}
String wsNameWorking = workspaceMgr.getWorkspaceName(ArrayType.BP_WORKING_MEM);
String wsNameActGrad = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATION_GRAD);
WorkspaceConfiguration confWorking = workspaceMgr.getConfiguration(ArrayType.BP_WORKING_MEM);
WorkspaceConfiguration confOutput = workspaceMgr.getConfiguration(ArrayType.ACTIVATION_GRAD);
boolean actGradScopedOut = workspaceMgr.isScopedOut(ArrayType.ACTIVATION_GRAD);
Preconditions.checkState(actGradScopedOut || wsNameActGrad != null, "Activation gradients must have a workspace or be scoped out");
SessionMemMgr mmgr = new DL4JSameDiffMemoryMgr(wsNameWorking, wsNameActGrad, confWorking, confOutput);
sessionMap.get(Thread.currentThread().getId()).setMmgr(mmgr);
if(!sameDiff.hasGradientFunction()) { if(!sameDiff.hasGradientFunction()) {
//Create when scoped out, to ensure any arrays are not in WS //Create when scoped out, to ensure any arrays are not in WS
sameDiff.createGradFunction(INPUT_KEY); sameDiff.createGradFunction(INPUT_KEY);
@ -179,16 +220,19 @@ public class SameDiffOutputLayer extends AbstractLayer<org.deeplearning4j.nn.con
} }
dLdIn = grads.get(INPUT_KEY); dLdIn = grads.get(INPUT_KEY);
}
//Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere //Clear placeholders and op inputs to ensure no out-of-scope arrays are still referenced anywhere
sameDiff.clearPlaceholders(true); sameDiff.clearPlaceholders(true);
sameDiff.clearOpInputs(); sameDiff.clearOpInputs();
Pair<Gradient,INDArray> p = new Pair<>(g, workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIn)); //TODO OPTIMIZE THIS //TODO there may be a cleaner way to do this...
if(dLdIn.closeable()) if(!actGradScopedOut && !dLdIn.data().getParentWorkspace().getId().equals(wsNameActGrad)){
dLdIn.close(); dLdIn = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, dLdIn);
return p; } else if(actGradScopedOut && dLdIn.isAttached()){
dLdIn = dLdIn.detach();
}
return new Pair<>(g, dLdIn);
} }
/**Returns the parameters of the neural network as a flattened row vector /**Returns the parameters of the neural network as a flattened row vector
@ -312,7 +356,8 @@ public class SameDiffOutputLayer extends AbstractLayer<org.deeplearning4j.nn.con
@Override @Override
public double computeScore(double fullNetRegTerm, boolean training, LayerWorkspaceMgr workspaceMgr) { public double computeScore(double fullNetRegTerm, boolean training, LayerWorkspaceMgr workspaceMgr) {
return (activateHelper(false, workspaceMgr).getDouble(0) + fullNetRegTerm) / input.size(0); INDArray scoreArr = activateHelper(false, workspaceMgr);
return (scoreArr.getDouble(0) + fullNetRegTerm) / input.size(0);
} }
@Override @Override

View File

@ -309,11 +309,11 @@ public abstract class BaseNDArray implements INDArray, Iterable {
* @param ordering the ordering of the ndarray * @param ordering the ordering of the ndarray
*/ */
public BaseNDArray(int[] shape, int[] stride, long offset, char ordering) { public BaseNDArray(int[] shape, int[] stride, long offset, char ordering) {
this(Nd4j.createBuffer(ArrayUtil.prodLong(shape)), shape, stride, offset, ordering); this(Nd4j.createBuffer(shape.length == 0 ? 1 : ArrayUtil.prodLong(shape)), shape, stride, offset, ordering);
} }
public BaseNDArray(long[] shape, long[] stride, long offset, char ordering) { public BaseNDArray(long[] shape, long[] stride, long offset, char ordering) {
this(Nd4j.createBuffer(ArrayUtil.prodLong(shape)), shape, stride, offset, ordering); this(Nd4j.createBuffer(shape.length == 0 ? 1 : ArrayUtil.prodLong(shape)), shape, stride, offset, ordering);
} }
/** /**
@ -326,19 +326,19 @@ public abstract class BaseNDArray implements INDArray, Iterable {
* @param initialize Whether to initialize the INDArray. If true: initialize. If false: don't. * @param initialize Whether to initialize the INDArray. If true: initialize. If false: don't.
*/ */
public BaseNDArray(int[] shape, int[] stride, long offset, char ordering, boolean initialize) { public BaseNDArray(int[] shape, int[] stride, long offset, char ordering, boolean initialize) {
this(Nd4j.createBuffer(ArrayUtil.prodLong(shape), initialize), shape, stride, offset, ordering); this(Nd4j.createBuffer(shape.length == 0 ? 1 : ArrayUtil.prodLong(shape), initialize), shape, stride, offset, ordering);
} }
public BaseNDArray(long[] shape, long[] stride, long offset, char ordering, boolean initialize) { public BaseNDArray(long[] shape, long[] stride, long offset, char ordering, boolean initialize) {
this(Nd4j.createBuffer(ArrayUtil.prodLong(shape), initialize), shape, stride, offset, ordering); this(Nd4j.createBuffer(shape.length == 0 ? 1 : ArrayUtil.prodLong(shape), initialize), shape, stride, offset, ordering);
} }
public BaseNDArray(DataType type, long[] shape, long[] stride, long offset, char ordering, boolean initialize) { public BaseNDArray(DataType type, long[] shape, long[] stride, long offset, char ordering, boolean initialize) {
this(Nd4j.createBuffer(type, ArrayUtil.prodLong(shape), initialize), type, shape, stride, offset, ordering); this(Nd4j.createBuffer(type, shape.length == 0 ? 1 : ArrayUtil.prodLong(shape), initialize), type, shape, stride, offset, ordering);
} }
public BaseNDArray(DataType type, long[] shape, long[] stride, long offset, char ordering, boolean initialize, MemoryWorkspace workspace) { public BaseNDArray(DataType type, long[] shape, long[] stride, long offset, char ordering, boolean initialize, MemoryWorkspace workspace) {
this(Nd4j.createBuffer(type, ArrayUtil.prodLong(shape), initialize, workspace), type, shape, stride, offset, ordering); this(Nd4j.createBuffer(type, shape.length == 0 ? 1 : ArrayUtil.prodLong(shape), initialize, workspace), type, shape, stride, offset, ordering);
} }

View File

@ -319,6 +319,11 @@ public class BasicWorkspaceTests extends BaseNd4jTest {
long reqMemory = 5 * Nd4j.sizeOfDataType(array1.dataType()); long reqMemory = 5 * Nd4j.sizeOfDataType(array1.dataType());
assertEquals(reqMemory + reqMemory % 8, wsI.getPrimaryOffset()); assertEquals(reqMemory + reqMemory % 8, wsI.getPrimaryOffset());
assertEquals(array1, array2); assertEquals(array1, array2);
INDArray array3 = Nd4j.createUninitializedDetached(DataType.FLOAT, new long[0]);
assertTrue(array3.isScalar());
assertEquals(1, array3.length());
assertEquals(1, array3.data().length());
} }
} }