DL4J Time Distributed + fixes + Vertx module profiles fix (#78)
* Add test profiles to vertx module * Arbiter test tweaks Signed-off-by: AlexDBlack <blacka101@gmail.com> * Add TimeDistributed wrapper layer Signed-off-by: AlexDBlack <blacka101@gmail.com> * Tests for TimeDistributed layer Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small test dependency exclusion for Spark module * Fixes, more thorough tests Signed-off-by: AlexDBlack <blacka101@gmail.com>master
parent
e910ce75ec
commit
5b2ee72673
|
@ -305,7 +305,7 @@ public class TestGraphLocalExecution {
|
|||
@Test
|
||||
public void testLocalExecutionEarlyStopping() throws Exception {
|
||||
EarlyStoppingConfiguration<ComputationGraph> esConf = new EarlyStoppingConfiguration.Builder<ComputationGraph>()
|
||||
.epochTerminationConditions(new MaxEpochsTerminationCondition(6))
|
||||
.epochTerminationConditions(new MaxEpochsTerminationCondition(4))
|
||||
.scoreCalculator(new ScoreProvider())
|
||||
.modelSaver(new InMemoryModelSaver()).build();
|
||||
Map<String, Object> commands = new HashMap<>();
|
||||
|
@ -348,7 +348,7 @@ public class TestGraphLocalExecution {
|
|||
.dataProvider(dataProvider)
|
||||
.scoreFunction(ScoreFunctions.testSetF1())
|
||||
.modelSaver(new FileModelSaver(modelSavePath))
|
||||
.terminationConditions(new MaxTimeCondition(30, TimeUnit.SECONDS),
|
||||
.terminationConditions(new MaxTimeCondition(45, TimeUnit.SECONDS),
|
||||
new MaxCandidatesCondition(10))
|
||||
.build();
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ public class TestDataFactoryProviderMnist implements DataSetIteratorFactory {
|
|||
private int terminationIter;
|
||||
|
||||
public TestDataFactoryProviderMnist(){
|
||||
this(16, 10);
|
||||
this(16, 4);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
package org.deeplearning4j.nn.layers.recurrent;
|
||||
|
||||
import org.deeplearning4j.BaseDL4JTest;
|
||||
import org.deeplearning4j.TestUtils;
|
||||
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
|
||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||
import org.deeplearning4j.nn.conf.WorkspaceMode;
|
||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||
import org.deeplearning4j.nn.conf.layers.DenseLayer;
|
||||
import org.deeplearning4j.nn.conf.layers.LSTM;
|
||||
import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
|
||||
import org.deeplearning4j.nn.conf.layers.recurrent.TimeDistributed;
|
||||
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
|
||||
import org.junit.Test;
|
||||
import org.nd4j.linalg.activations.Activation;
|
||||
import org.nd4j.linalg.api.buffer.DataType;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
import org.nd4j.linalg.dataset.DataSet;
|
||||
import org.nd4j.linalg.factory.Nd4j;
|
||||
import org.nd4j.linalg.learning.config.Adam;
|
||||
import org.nd4j.linalg.lossfunctions.LossFunctions;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
public class TestTimeDistributed extends BaseDL4JTest {
|
||||
|
||||
@Test
|
||||
public void testTimeDistributed(){
|
||||
for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) {
|
||||
|
||||
MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder()
|
||||
.trainingWorkspaceMode(wsm)
|
||||
.inferenceWorkspaceMode(wsm)
|
||||
.seed(12345)
|
||||
.updater(new Adam(0.1))
|
||||
.list()
|
||||
.layer(new LSTM.Builder().nIn(3).nOut(3).build())
|
||||
.layer(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build())
|
||||
.layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX)
|
||||
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
|
||||
.setInputType(InputType.recurrent(3))
|
||||
.build();
|
||||
|
||||
MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
|
||||
.trainingWorkspaceMode(wsm)
|
||||
.inferenceWorkspaceMode(wsm)
|
||||
.seed(12345)
|
||||
.updater(new Adam(0.1))
|
||||
.list()
|
||||
.layer(new LSTM.Builder().nIn(3).nOut(3).build())
|
||||
.layer(new TimeDistributed(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build(), 2))
|
||||
.layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX)
|
||||
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
|
||||
.setInputType(InputType.recurrent(3))
|
||||
.build();
|
||||
|
||||
MultiLayerNetwork net1 = new MultiLayerNetwork(conf1);
|
||||
MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
|
||||
net1.init();
|
||||
net2.init();
|
||||
|
||||
for( int mb : new int[]{1, 5}) {
|
||||
for(char inLabelOrder : new char[]{'c', 'f'}) {
|
||||
INDArray in = Nd4j.rand(DataType.FLOAT, mb, 3, 5).dup(inLabelOrder);
|
||||
|
||||
INDArray out1 = net1.output(in);
|
||||
INDArray out2 = net2.output(in);
|
||||
|
||||
assertEquals(out1, out2);
|
||||
|
||||
INDArray labels = TestUtils.randomOneHotTimeSeries(mb, 3, 5).dup(inLabelOrder);
|
||||
|
||||
DataSet ds = new DataSet(in, labels);
|
||||
net1.fit(ds);
|
||||
net2.fit(ds);
|
||||
|
||||
assertEquals(net1.params(), net2.params());
|
||||
|
||||
MultiLayerNetwork net3 = TestUtils.testModelSerialization(net2);
|
||||
out2 = net2.output(in);
|
||||
INDArray out3 = net3.output(in);
|
||||
|
||||
assertEquals(out2, out3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
package org.deeplearning4j.nn.conf.layers.recurrent;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NonNull;
|
||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||
import org.deeplearning4j.nn.conf.layers.Layer;
|
||||
import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer;
|
||||
import org.deeplearning4j.nn.layers.recurrent.TimeDistributedLayer;
|
||||
import org.deeplearning4j.optimize.api.TrainingListener;
|
||||
import org.nd4j.linalg.api.buffer.DataType;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
import org.nd4j.shade.jackson.annotation.JsonProperty;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* TimeDistributed wrapper layer.<br>
|
||||
* Note: only the "Feed forward layer time distributed in an RNN" is currently supported.
|
||||
* For example, a time distributed dense layer.<br>
|
||||
* Usage: {@code .layer(new TimeDistributed(new DenseLayer.Builder()....build(), timeAxis))}<br>
|
||||
* Note that for DL4J RNNs, time axis is always 2 - i.e., RNN activations have shape [minibatch, size, sequenceLength]
|
||||
*
|
||||
* @author Alex Black
|
||||
*/
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public class TimeDistributed extends BaseWrapperLayer {
|
||||
|
||||
private final int timeAxis;
|
||||
|
||||
/**
|
||||
* @param underlying Underlying (internal) layer - should be a feed forward type such as DenseLayer
|
||||
* @param timeAxis Time axis, should be 2 for DL4J RNN activations (shape [minibatch, size, sequenceLength])
|
||||
*/
|
||||
public TimeDistributed(@JsonProperty("underlying") @NonNull Layer underlying, @JsonProperty("timeAxis") int timeAxis) {
|
||||
super(underlying);
|
||||
this.timeAxis = timeAxis;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
|
||||
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
|
||||
NeuralNetConfiguration conf2 = conf.clone();
|
||||
conf2.setLayer(((TimeDistributed) conf2.getLayer()).getUnderlying());
|
||||
return new TimeDistributedLayer(underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView,
|
||||
initializeParams, networkDataType), timeAxis);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||
if (inputType.getType() != InputType.Type.RNN) {
|
||||
throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer (layer #" + layerIndex + ")");
|
||||
}
|
||||
|
||||
InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
|
||||
InputType ff = InputType.feedForward(rnn.getSize());
|
||||
InputType ffOut = underlying.getOutputType(layerIndex, ff);
|
||||
return InputType.recurrent(ffOut.arrayElementsPerExample(), rnn.getTimeSeriesLength());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNIn(InputType inputType, boolean override) {
|
||||
if (inputType.getType() != InputType.Type.RNN) {
|
||||
throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer");
|
||||
}
|
||||
|
||||
InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType;
|
||||
InputType ff = InputType.feedForward(rnn.getSize());
|
||||
underlying.setNIn(ff, override);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
||||
//No preprocessor - the wrapper layer operates as the preprocessor
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
package org.deeplearning4j.nn.layers.recurrent;
|
||||
|
||||
import org.deeplearning4j.nn.api.Layer;
|
||||
import org.deeplearning4j.nn.gradient.Gradient;
|
||||
import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer;
|
||||
import org.deeplearning4j.nn.workspace.ArrayType;
|
||||
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
import org.nd4j.linalg.primitives.Pair;
|
||||
import org.nd4j.linalg.util.ArrayUtil;
|
||||
|
||||
/**
|
||||
* TimeDistributed wrapper layer.<br>
|
||||
* Note: only the "Feed forward layer time distributed in an RNN" is currently supported.
|
||||
* For example, a time distributed dense layer.<br>
|
||||
* Usage: {@code .layer(new TimeDistributed(new DenseLayer.Builder()....build(), timeAxis))}<br>
|
||||
* Note that for DL4J RNNs, time axis is always 2 - i.e., RNN activations have shape [minibatch, size, sequenceLength]
|
||||
*
|
||||
* @author Alex Black
|
||||
*/
|
||||
public class TimeDistributedLayer extends BaseWrapperLayer {
|
||||
|
||||
private final int timeAxis;
|
||||
|
||||
public TimeDistributedLayer(Layer underlying, int timeAxis) {
|
||||
super(underlying);
|
||||
this.timeAxis = timeAxis;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
||||
INDArray reshapedEps = reshape(epsilon);
|
||||
Pair<Gradient, INDArray> p = underlying.backpropGradient(reshapedEps, workspaceMgr);
|
||||
INDArray reverted = revertReshape(p.getSecond(), epsilon.size(0));
|
||||
reverted = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, reverted);
|
||||
p.setSecond(reverted);
|
||||
return p;
|
||||
}
|
||||
|
||||
@Override
|
||||
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
|
||||
return activate(input(), training, workspaceMgr);
|
||||
}
|
||||
|
||||
@Override
|
||||
public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) {
|
||||
INDArray reshaped = reshape(input);
|
||||
INDArray out = underlying.activate(reshaped, training, workspaceMgr);
|
||||
INDArray ret = revertReshape(out, input.size(0));
|
||||
return workspaceMgr.dup(ArrayType.ACTIVATIONS, ret);
|
||||
}
|
||||
|
||||
protected INDArray reshape(INDArray array){
|
||||
//Reshape the time axis to the minibatch axis
|
||||
//For example, for RNN -> FF (dense time distributed): [mb, size, seqLen] -> [mb x seqLen, size]
|
||||
int axis = timeAxis;
|
||||
if(axis < 0)
|
||||
axis += array.rank();
|
||||
|
||||
int[] permuteAxis = permuteAxes(array.rank(), axis);
|
||||
INDArray permute = array.permute(permuteAxis);
|
||||
|
||||
long[] newShape = new long[array.rank()-1];
|
||||
newShape[0] = array.size(0) * array.size(axis);
|
||||
int j=1;
|
||||
for( int i=1; i<array.rank(); i++ ){
|
||||
if(axis == i)
|
||||
continue;
|
||||
newShape[j++] = array.size(i);
|
||||
}
|
||||
|
||||
INDArray reshape = permute.dup().reshape('c', newShape);
|
||||
return reshape;
|
||||
}
|
||||
|
||||
protected int[] permuteAxes(int rank, int timeAxis){
|
||||
int[] permuteAxis = new int[rank];
|
||||
permuteAxis[0] = 0;
|
||||
permuteAxis[1] = timeAxis;
|
||||
int j=2;
|
||||
for( int i=1; i<rank; i++ ){
|
||||
if(timeAxis == i)
|
||||
continue;
|
||||
permuteAxis[j++] = i;
|
||||
}
|
||||
return permuteAxis;
|
||||
}
|
||||
|
||||
protected INDArray revertReshape(INDArray toRevert, long minibatch){
|
||||
|
||||
int axis = timeAxis;
|
||||
if(axis < 0)
|
||||
axis += (toRevert.rank()+1);
|
||||
|
||||
long[] newShape = new long[toRevert.rank()+1];
|
||||
newShape[0] = minibatch;
|
||||
newShape[1] = toRevert.size(0)/minibatch;
|
||||
for( int i=1; i<toRevert.rank(); i++ ){
|
||||
newShape[i+1] = toRevert.size(i);
|
||||
}
|
||||
|
||||
INDArray reshaped = toRevert.reshape('c', newShape);
|
||||
|
||||
int[] permute = ArrayUtil.invertPermutation(permuteAxes(toRevert.rank() + 1, axis));
|
||||
|
||||
INDArray permuted = reshaped.permute(permute);
|
||||
return permuted;
|
||||
}
|
||||
}
|
|
@ -70,6 +70,12 @@
|
|||
<artifactId>deeplearning4j-ui</artifactId>
|
||||
<version>${deeplearning4j.version}</version>
|
||||
<scope>test</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>net.jpountz.lz4</groupId>
|
||||
<artifactId>lz4</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
|
|
@ -434,4 +434,13 @@
|
|||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>test-nd4j-native</id>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>test-nd4j-cuda-10.1</id>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
Loading…
Reference in New Issue