DL4J Time Distributed + fixes + Vertx module profiles fix (#78)

* Add test profiles to vertx module

* Arbiter test tweaks

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Add TimeDistributed wrapper layer

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Tests for TimeDistributed layer

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Small test dependency exclusion for Spark module

* Fixes, more thorough tests

Signed-off-by: AlexDBlack <blacka101@gmail.com>
master
Alex Black 2019-11-25 16:00:21 +11:00 committed by GitHub
parent e910ce75ec
commit 5b2ee72673
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 297 additions and 3 deletions

View File

@ -305,7 +305,7 @@ public class TestGraphLocalExecution {
@Test
public void testLocalExecutionEarlyStopping() throws Exception {
EarlyStoppingConfiguration<ComputationGraph> esConf = new EarlyStoppingConfiguration.Builder<ComputationGraph>()
.epochTerminationConditions(new MaxEpochsTerminationCondition(6))
.epochTerminationConditions(new MaxEpochsTerminationCondition(4))
.scoreCalculator(new ScoreProvider())
.modelSaver(new InMemoryModelSaver()).build();
Map<String, Object> commands = new HashMap<>();
@ -348,7 +348,7 @@ public class TestGraphLocalExecution {
.dataProvider(dataProvider)
.scoreFunction(ScoreFunctions.testSetF1())
.modelSaver(new FileModelSaver(modelSavePath))
.terminationConditions(new MaxTimeCondition(30, TimeUnit.SECONDS),
.terminationConditions(new MaxTimeCondition(45, TimeUnit.SECONDS),
new MaxCandidatesCondition(10))
.build();

View File

@ -32,7 +32,7 @@ public class TestDataFactoryProviderMnist implements DataSetIteratorFactory {
private int terminationIter;
public TestDataFactoryProviderMnist(){
this(16, 10);
this(16, 4);
}
@Override

View File

@ -0,0 +1,88 @@
package org.deeplearning4j.nn.layers.recurrent;
import org.deeplearning4j.BaseDL4JTest;
import org.deeplearning4j.TestUtils;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.WorkspaceMode;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.LSTM;
import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
import org.deeplearning4j.nn.conf.layers.recurrent.TimeDistributed;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.Test;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.learning.config.Adam;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import static org.junit.Assert.assertEquals;
public class TestTimeDistributed extends BaseDL4JTest {
@Test
public void testTimeDistributed(){
for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder()
.trainingWorkspaceMode(wsm)
.inferenceWorkspaceMode(wsm)
.seed(12345)
.updater(new Adam(0.1))
.list()
.layer(new LSTM.Builder().nIn(3).nOut(3).build())
.layer(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build())
.layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.setInputType(InputType.recurrent(3))
.build();
MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
.trainingWorkspaceMode(wsm)
.inferenceWorkspaceMode(wsm)
.seed(12345)
.updater(new Adam(0.1))
.list()
.layer(new LSTM.Builder().nIn(3).nOut(3).build())
.layer(new TimeDistributed(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build(), 2))
.layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX)
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
.setInputType(InputType.recurrent(3))
.build();
MultiLayerNetwork net1 = new MultiLayerNetwork(conf1);
MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
net1.init();
net2.init();
for( int mb : new int[]{1, 5}) {
for(char inLabelOrder : new char[]{'c', 'f'}) {
INDArray in = Nd4j.rand(DataType.FLOAT, mb, 3, 5).dup(inLabelOrder);
INDArray out1 = net1.output(in);
INDArray out2 = net2.output(in);
assertEquals(out1, out2);
INDArray labels = TestUtils.randomOneHotTimeSeries(mb, 3, 5).dup(inLabelOrder);
DataSet ds = new DataSet(in, labels);
net1.fit(ds);
net2.fit(ds);
assertEquals(net1.params(), net2.params());
MultiLayerNetwork net3 = TestUtils.testModelSerialization(net2);
out2 = net2.output(in);
INDArray out3 = net3.output(in);
assertEquals(out2, out3);
}
}
}
}
}

View File

@ -0,0 +1,81 @@
package org.deeplearning4j.nn.conf.layers.recurrent;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NonNull;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.Layer;
import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer;
import org.deeplearning4j.nn.layers.recurrent.TimeDistributedLayer;
import org.deeplearning4j.optimize.api.TrainingListener;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.shade.jackson.annotation.JsonProperty;
import java.util.Collection;
/**
* TimeDistributed wrapper layer.<br>
* Note: only the "Feed forward layer time distributed in an RNN" is currently supported.
* For example, a time distributed dense layer.<br>
* Usage: {@code .layer(new TimeDistributed(new DenseLayer.Builder()....build(), timeAxis))}<br>
* Note that for DL4J RNNs, time axis is always 2 - i.e., RNN activations have shape [minibatch, size, sequenceLength]
*
* @author Alex Black
*/
@Data
@EqualsAndHashCode(callSuper = true)
public class TimeDistributed extends BaseWrapperLayer {
private final int timeAxis;
/**
* @param underlying Underlying (internal) layer - should be a feed forward type such as DenseLayer
* @param timeAxis Time axis, should be 2 for DL4J RNN activations (shape [minibatch, size, sequenceLength])
*/
public TimeDistributed(@JsonProperty("underlying") @NonNull Layer underlying, @JsonProperty("timeAxis") int timeAxis) {
super(underlying);
this.timeAxis = timeAxis;
}
@Override
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
NeuralNetConfiguration conf2 = conf.clone();
conf2.setLayer(((TimeDistributed) conf2.getLayer()).getUnderlying());
return new TimeDistributedLayer(underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView,
initializeParams, networkDataType), timeAxis);
}
@Override
public InputType getOutputType(int layerIndex, InputType inputType) {
if (inputType.getType() != InputType.Type.RNN) {
throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer (layer #" + layerIndex + ")");
}
InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
InputType ff = InputType.feedForward(rnn.getSize());
InputType ffOut = underlying.getOutputType(layerIndex, ff);
return InputType.recurrent(ffOut.arrayElementsPerExample(), rnn.getTimeSeriesLength());
}
@Override
public void setNIn(InputType inputType, boolean override) {
if (inputType.getType() != InputType.Type.RNN) {
throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer");
}
InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
InputType ff = InputType.feedForward(rnn.getSize());
underlying.setNIn(ff, override);
}
@Override
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
//No preprocessor - the wrapper layer operates as the preprocessor
return null;
}
}

View File

@ -0,0 +1,110 @@
package org.deeplearning4j.nn.layers.recurrent;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer;
import org.deeplearning4j.nn.workspace.ArrayType;
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.primitives.Pair;
import org.nd4j.linalg.util.ArrayUtil;
/**
* TimeDistributed wrapper layer.<br>
* Note: only the "Feed forward layer time distributed in an RNN" is currently supported.
* For example, a time distributed dense layer.<br>
* Usage: {@code .layer(new TimeDistributed(new DenseLayer.Builder()....build(), timeAxis))}<br>
* Note that for DL4J RNNs, time axis is always 2 - i.e., RNN activations have shape [minibatch, size, sequenceLength]
*
* @author Alex Black
*/
public class TimeDistributedLayer extends BaseWrapperLayer {
private final int timeAxis;
public TimeDistributedLayer(Layer underlying, int timeAxis) {
super(underlying);
this.timeAxis = timeAxis;
}
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
INDArray reshapedEps = reshape(epsilon);
Pair<Gradient, INDArray> p = underlying.backpropGradient(reshapedEps, workspaceMgr);
INDArray reverted = revertReshape(p.getSecond(), epsilon.size(0));
reverted = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, reverted);
p.setSecond(reverted);
return p;
}
@Override
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
return activate(input(), training, workspaceMgr);
}
@Override
public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) {
INDArray reshaped = reshape(input);
INDArray out = underlying.activate(reshaped, training, workspaceMgr);
INDArray ret = revertReshape(out, input.size(0));
return workspaceMgr.dup(ArrayType.ACTIVATIONS, ret);
}
protected INDArray reshape(INDArray array){
//Reshape the time axis to the minibatch axis
//For example, for RNN -> FF (dense time distributed): [mb, size, seqLen] -> [mb x seqLen, size]
int axis = timeAxis;
if(axis < 0)
axis += array.rank();
int[] permuteAxis = permuteAxes(array.rank(), axis);
INDArray permute = array.permute(permuteAxis);
long[] newShape = new long[array.rank()-1];
newShape[0] = array.size(0) * array.size(axis);
int j=1;
for( int i=1; i<array.rank(); i++ ){
if(axis == i)
continue;
newShape[j++] = array.size(i);
}
INDArray reshape = permute.dup().reshape('c', newShape);
return reshape;
}
protected int[] permuteAxes(int rank, int timeAxis){
int[] permuteAxis = new int[rank];
permuteAxis[0] = 0;
permuteAxis[1] = timeAxis;
int j=2;
for( int i=1; i<rank; i++ ){
if(timeAxis == i)
continue;
permuteAxis[j++] = i;
}
return permuteAxis;
}
protected INDArray revertReshape(INDArray toRevert, long minibatch){
int axis = timeAxis;
if(axis < 0)
axis += (toRevert.rank()+1);
long[] newShape = new long[toRevert.rank()+1];
newShape[0] = minibatch;
newShape[1] = toRevert.size(0)/minibatch;
for( int i=1; i<toRevert.rank(); i++ ){
newShape[i+1] = toRevert.size(i);
}
INDArray reshaped = toRevert.reshape('c', newShape);
int[] permute = ArrayUtil.invertPermutation(permuteAxes(toRevert.rank() + 1, axis));
INDArray permuted = reshaped.permute(permute);
return permuted;
}
}

View File

@ -70,6 +70,12 @@
<artifactId>deeplearning4j-ui</artifactId>
<version>${deeplearning4j.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>net.jpountz.lz4</groupId>
<artifactId>lz4</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>

View File

@ -434,4 +434,13 @@
</plugins>
</pluginManagement>
</build>
<profiles>
<profile>
<id>test-nd4j-native</id>
</profile>
<profile>
<id>test-nd4j-cuda-10.1</id>
</profile>
</profiles>
</project>