DL4J Time Distributed + fixes + Vertx module profiles fix (#78)

* Add test profiles to vertx module * Arbiter test tweaks Signed-off-by: AlexDBlack <blacka101@gmail.com> * Add TimeDistributed wrapper layer Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tests for TimeDistributed layer Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small test dependency exclusion for Spark module * Fixes, more thorough tests Signed-off-by: AlexDBlack <blacka101@gmail.com>
2019-11-25 16:00:21 +11:00 · 2019-11-25 16:00:21 +11:00 · 5b2ee72673
commit 5b2ee72673
parent e910ce75ec
7 changed files with 297 additions and 3 deletions
--- a/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/computationgraph/TestGraphLocalExecution.java
+++ b/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/computationgraph/TestGraphLocalExecution.java
@ -305,7 +305,7 @@ public class TestGraphLocalExecution {
    @Test
    public void testLocalExecutionEarlyStopping() throws Exception {
        EarlyStoppingConfiguration<ComputationGraph> esConf = new EarlyStoppingConfiguration.Builder<ComputationGraph>()
-                .epochTerminationConditions(new MaxEpochsTerminationCondition(6))
+                .epochTerminationConditions(new MaxEpochsTerminationCondition(4))
                .scoreCalculator(new ScoreProvider())
                .modelSaver(new InMemoryModelSaver()).build();
        Map<String, Object> commands = new HashMap<>();
@ -348,7 +348,7 @@ public class TestGraphLocalExecution {
                .dataProvider(dataProvider)
                .scoreFunction(ScoreFunctions.testSetF1())
                .modelSaver(new FileModelSaver(modelSavePath))
-                .terminationConditions(new MaxTimeCondition(30, TimeUnit.SECONDS),
+                .terminationConditions(new MaxTimeCondition(45, TimeUnit.SECONDS),
                        new MaxCandidatesCondition(10))
                .build();

--- a/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/util/TestDataFactoryProviderMnist.java
+++ b/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/util/TestDataFactoryProviderMnist.java
@ -32,7 +32,7 @@ public class TestDataFactoryProviderMnist implements DataSetIteratorFactory {
    private int terminationIter;

    public TestDataFactoryProviderMnist(){
-        this(16, 10);
+        this(16, 4);
    }

    @Override
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java
@ -0,0 +1,88 @@
+package org.deeplearning4j.nn.layers.recurrent;
+
+import org.deeplearning4j.BaseDL4JTest;
+import org.deeplearning4j.TestUtils;
+import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.WorkspaceMode;
+import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.layers.DenseLayer;
+import org.deeplearning4j.nn.conf.layers.LSTM;
+import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
+import org.deeplearning4j.nn.conf.layers.recurrent.TimeDistributed;
+import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
+import org.junit.Test;
+import org.nd4j.linalg.activations.Activation;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.dataset.DataSet;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.learning.config.Adam;
+import org.nd4j.linalg.lossfunctions.LossFunctions;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestTimeDistributed extends BaseDL4JTest {
+
+    @Test
+    public void testTimeDistributed(){
+        for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
+
+            MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder()
+                    .trainingWorkspaceMode(wsm)
+                    .inferenceWorkspaceMode(wsm)
+                    .seed(12345)
+                    .updater(new Adam(0.1))
+                    .list()
+                    .layer(new LSTM.Builder().nIn(3).nOut(3).build())
+                    .layer(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build())
+                    .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX)
+                            .lossFunction(LossFunctions.LossFunction.MCXENT).build())
+                    .setInputType(InputType.recurrent(3))
+                    .build();
+
+            MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
+                    .trainingWorkspaceMode(wsm)
+                    .inferenceWorkspaceMode(wsm)
+                    .seed(12345)
+                    .updater(new Adam(0.1))
+                    .list()
+                    .layer(new LSTM.Builder().nIn(3).nOut(3).build())
+                    .layer(new TimeDistributed(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build(), 2))
+                    .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX)
+                            .lossFunction(LossFunctions.LossFunction.MCXENT).build())
+                    .setInputType(InputType.recurrent(3))
+                    .build();
+
+            MultiLayerNetwork net1 = new MultiLayerNetwork(conf1);
+            MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
+            net1.init();
+            net2.init();
+
+            for( int mb : new int[]{1, 5}) {
+                for(char inLabelOrder : new char[]{'c', 'f'}) {
+                    INDArray in = Nd4j.rand(DataType.FLOAT, mb, 3, 5).dup(inLabelOrder);
+
+                    INDArray out1 = net1.output(in);
+                    INDArray out2 = net2.output(in);
+
+                    assertEquals(out1, out2);
+
+                    INDArray labels = TestUtils.randomOneHotTimeSeries(mb, 3, 5).dup(inLabelOrder);
+
+                    DataSet ds = new DataSet(in, labels);
+                    net1.fit(ds);
+                    net2.fit(ds);
+
+                    assertEquals(net1.params(), net2.params());
+
+                    MultiLayerNetwork net3 = TestUtils.testModelSerialization(net2);
+                    out2 = net2.output(in);
+                    INDArray out3 = net3.output(in);
+
+                    assertEquals(out2, out3);
+                }
+            }
+        }
+    }
+}
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java
@ -0,0 +1,81 @@
+package org.deeplearning4j.nn.conf.layers.recurrent;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.NonNull;
+import org.deeplearning4j.nn.conf.InputPreProcessor;
+import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.conf.inputs.InputType;
+import org.deeplearning4j.nn.conf.layers.Layer;
+import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer;
+import org.deeplearning4j.nn.layers.recurrent.TimeDistributedLayer;
+import org.deeplearning4j.optimize.api.TrainingListener;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
+
+import java.util.Collection;
+
+/**
+ * TimeDistributed wrapper layer.<br>
+ * Note: only the "Feed forward layer time distributed in an RNN" is currently supported.
+ * For example, a time distributed dense layer.<br>
+ * Usage: {@code .layer(new TimeDistributed(new DenseLayer.Builder()....build(), timeAxis))}<br>
+ * Note that for DL4J RNNs, time axis is always 2 - i.e., RNN activations have shape [minibatch, size, sequenceLength]
+ *
+ * @author Alex Black
+ */
+@Data
+@EqualsAndHashCode(callSuper = true)
+public class TimeDistributed extends BaseWrapperLayer {
+
+    private final int timeAxis;
+
+    /**
+     * @param underlying Underlying (internal) layer - should be a feed forward type such as DenseLayer
+     * @param timeAxis   Time axis, should be 2 for DL4J RNN activations (shape [minibatch, size, sequenceLength])
+     */
+    public TimeDistributed(@JsonProperty("underlying") @NonNull Layer underlying, @JsonProperty("timeAxis") int timeAxis) {
+        super(underlying);
+        this.timeAxis = timeAxis;
+    }
+
+
+    @Override
+    public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
+                                                       int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
+        NeuralNetConfiguration conf2 = conf.clone();
+        conf2.setLayer(((TimeDistributed) conf2.getLayer()).getUnderlying());
+        return new TimeDistributedLayer(underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView,
+                initializeParams, networkDataType), timeAxis);
+    }
+
+    @Override
+    public InputType getOutputType(int layerIndex, InputType inputType) {
+        if (inputType.getType() != InputType.Type.RNN) {
+            throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer (layer #" + layerIndex + ")");
+        }
+
+        InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
+        InputType ff = InputType.feedForward(rnn.getSize());
+        InputType ffOut = underlying.getOutputType(layerIndex, ff);
+        return InputType.recurrent(ffOut.arrayElementsPerExample(), rnn.getTimeSeriesLength());
+    }
+
+    @Override
+    public void setNIn(InputType inputType, boolean override) {
+        if (inputType.getType() != InputType.Type.RNN) {
+            throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer");
+        }
+
+        InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
+        InputType ff = InputType.feedForward(rnn.getSize());
+        underlying.setNIn(ff, override);
+    }
+
+    @Override
+    public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
+        //No preprocessor - the wrapper layer operates as the preprocessor
+        return null;
+    }
+}
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/TimeDistributedLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/TimeDistributedLayer.java
@ -0,0 +1,110 @@
+package org.deeplearning4j.nn.layers.recurrent;
+
+import org.deeplearning4j.nn.api.Layer;
+import org.deeplearning4j.nn.gradient.Gradient;
+import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer;
+import org.deeplearning4j.nn.workspace.ArrayType;
+import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
+import org.nd4j.linalg.util.ArrayUtil;
+
+/**
+ * TimeDistributed wrapper layer.<br>
+ * Note: only the "Feed forward layer time distributed in an RNN" is currently supported.
+ * For example, a time distributed dense layer.<br>
+ * Usage: {@code .layer(new TimeDistributed(new DenseLayer.Builder()....build(), timeAxis))}<br>
+ * Note that for DL4J RNNs, time axis is always 2 - i.e., RNN activations have shape [minibatch, size, sequenceLength]
+ *
+ * @author Alex Black
+ */
+public class TimeDistributedLayer extends BaseWrapperLayer {
+
+    private final int timeAxis;
+
+    public TimeDistributedLayer(Layer underlying, int timeAxis) {
+        super(underlying);
+        this.timeAxis = timeAxis;
+    }
+
+
+    @Override
+    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
+        INDArray reshapedEps = reshape(epsilon);
+        Pair<Gradient, INDArray> p = underlying.backpropGradient(reshapedEps, workspaceMgr);
+        INDArray reverted = revertReshape(p.getSecond(), epsilon.size(0));
+        reverted = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, reverted);
+        p.setSecond(reverted);
+        return p;
+    }
+
+    @Override
+    public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
+        return activate(input(), training, workspaceMgr);
+    }
+
+    @Override
+    public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) {
+        INDArray reshaped = reshape(input);
+        INDArray out = underlying.activate(reshaped, training, workspaceMgr);
+        INDArray ret = revertReshape(out, input.size(0));
+        return workspaceMgr.dup(ArrayType.ACTIVATIONS, ret);
+    }
+
+    protected INDArray reshape(INDArray array){
+        //Reshape the time axis to the minibatch axis
+        //For example, for RNN -> FF (dense time distributed): [mb, size, seqLen] -> [mb x seqLen, size]
+        int axis = timeAxis;
+        if(axis < 0)
+            axis += array.rank();
+
+        int[] permuteAxis = permuteAxes(array.rank(), axis);
+        INDArray permute = array.permute(permuteAxis);
+
+        long[] newShape = new long[array.rank()-1];
+        newShape[0] = array.size(0) * array.size(axis);
+        int j=1;
+        for( int i=1; i<array.rank(); i++ ){
+            if(axis == i)
+                continue;
+            newShape[j++] = array.size(i);
+        }
+
+        INDArray reshape = permute.dup().reshape('c', newShape);
+        return reshape;
+    }
+
+    protected int[] permuteAxes(int rank, int timeAxis){
+        int[] permuteAxis = new int[rank];
+        permuteAxis[0] = 0;
+        permuteAxis[1] = timeAxis;
+        int j=2;
+        for( int i=1; i<rank; i++ ){
+            if(timeAxis == i)
+                continue;
+            permuteAxis[j++] = i;
+        }
+        return permuteAxis;
+    }
+
+    protected INDArray revertReshape(INDArray toRevert, long minibatch){
+
+        int axis = timeAxis;
+        if(axis < 0)
+            axis += (toRevert.rank()+1);
+
+        long[] newShape = new long[toRevert.rank()+1];
+        newShape[0] = minibatch;
+        newShape[1] = toRevert.size(0)/minibatch;
+        for( int i=1; i<toRevert.rank(); i++ ){
+            newShape[i+1] = toRevert.size(i);
+        }
+
+        INDArray reshaped = toRevert.reshape('c', newShape);
+
+        int[] permute = ArrayUtil.invertPermutation(permuteAxes(toRevert.rank() + 1, axis));
+
+        INDArray permuted = reshaped.permute(permute);
+        return permuted;
+    }
+}
--- a/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/pom.xml
+++ b/deeplearning4j/deeplearning4j-scaleout/spark/dl4j-spark/pom.xml
@ -70,6 +70,12 @@
            <artifactId>deeplearning4j-ui</artifactId>
            <version>${deeplearning4j.version}</version>
            <scope>test</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>net.jpountz.lz4</groupId>
+                    <artifactId>lz4</artifactId>
+                </exclusion>
+            </exclusions>
        </dependency>

        <dependency>
--- a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-vertx/pom.xml
+++ b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-vertx/pom.xml
@ -434,4 +434,13 @@
            </plugins>
        </pluginManagement>
    </build>
+
+    <profiles>
+        <profile>
+            <id>test-nd4j-native</id>
+        </profile>
+        <profile>
+            <id>test-nd4j-cuda-10.1</id>
+        </profile>
+    </profiles>
 </project>