parent
997143b9dd
commit
4dc5a116b6
|
@ -0,0 +1,167 @@
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* ******************************************************************************
|
||||||
|
* *
|
||||||
|
* * This program and the accompanying materials are made available under the
|
||||||
|
* * terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* * https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
* *
|
||||||
|
* * See the NOTICE file distributed with this work for additional
|
||||||
|
* * information regarding copyright ownership.
|
||||||
|
* * Unless required by applicable law or agreed to in writing, software
|
||||||
|
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* * License for the specific language governing permissions and limitations
|
||||||
|
* * under the License.
|
||||||
|
* *
|
||||||
|
* * SPDX-License-Identifier: Apache-2.0
|
||||||
|
* *****************************************************************************
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.brutex.ai.nd4j.tests;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator;
|
||||||
|
import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
|
||||||
|
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
|
||||||
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.DenseLayer;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.OutputLayer;
|
||||||
|
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.nd4j.common.primitives.Pair;
|
||||||
|
import org.nd4j.evaluation.classification.Evaluation;
|
||||||
|
import org.nd4j.linalg.activations.Activation;
|
||||||
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
|
import org.nd4j.linalg.lossfunctions.LossFunctions;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class ExploreParamsTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParam() {
|
||||||
|
NeuralNetConfiguration conf =
|
||||||
|
NeuralNetConfiguration.builder()
|
||||||
|
.seed(12345)
|
||||||
|
.dataType(DataType.DOUBLE)
|
||||||
|
.layer(
|
||||||
|
DenseLayer.builder().nIn(4).nOut(30).name("1. Dense").activation(Activation.TANH))
|
||||||
|
.layer(DenseLayer.builder().nIn(30).nOut(10).name("2. Dense"))
|
||||||
|
// .layer(FrozenLayer.builder(DenseLayer.builder().nOut(6).build()).build())
|
||||||
|
|
||||||
|
.layer(
|
||||||
|
OutputLayer.builder()
|
||||||
|
.nOut(3)
|
||||||
|
.lossFunction(LossFunctions.LossFunction.MSE)
|
||||||
|
.activation(Activation.SOFTMAX))
|
||||||
|
.build();
|
||||||
|
MultiLayerNetwork nn = new MultiLayerNetwork(conf);
|
||||||
|
nn.init();
|
||||||
|
log.info(nn.summary());
|
||||||
|
// INDArray input = Nd4j.rand(10,4);
|
||||||
|
INDArray labels = Nd4j.zeros(9, 3);
|
||||||
|
|
||||||
|
INDArray input =
|
||||||
|
Nd4j.create(
|
||||||
|
new double[][] {
|
||||||
|
{5.15, 3.5, 1.4, 0.21}, // setosa
|
||||||
|
{4.9, 3.2, 1.4, 0.2}, // setosa
|
||||||
|
{4.7, 3.2, 1.23, 0.2}, // setosa
|
||||||
|
{7, 3.25, 4.7, 1.41}, // versicolor
|
||||||
|
{6.4, 3.2, 4.54, 1.5}, // versicolor
|
||||||
|
{6.9, 3.1, 4.92, 1.5}, // versicolor
|
||||||
|
{7.7, 3, 6.1, 2.3}, // virginica
|
||||||
|
{6.3, 3.4, 5.6, 2.45}, // virginica
|
||||||
|
{6.4, 3.12, 5.5, 1.8} // virginica
|
||||||
|
});
|
||||||
|
|
||||||
|
labels.putScalar(0, 1);
|
||||||
|
labels.putScalar(3, 1);
|
||||||
|
labels.putScalar(6, 1);
|
||||||
|
labels.putScalar(10, 1);
|
||||||
|
labels.putScalar(13, 1);
|
||||||
|
labels.putScalar(16, 1);
|
||||||
|
labels.putScalar(20, 1);
|
||||||
|
labels.putScalar(23, 1);
|
||||||
|
labels.putScalar(26, 1);
|
||||||
|
|
||||||
|
IrisDataSetIterator iter = new IrisDataSetIterator();
|
||||||
|
//Iterable<Pair<INDArray, INDArray>> it = List.of(new Pair<INDArray, INDArray>(input, labels));
|
||||||
|
List l = new ArrayList<>();
|
||||||
|
for (int i=0; i< input.rows(); i++) {
|
||||||
|
l.add(new Pair(input.getRow(i), labels.getRow(i)));
|
||||||
|
}
|
||||||
|
Iterable<Pair<INDArray, INDArray>> it = l;
|
||||||
|
INDArrayDataSetIterator diter = new INDArrayDataSetIterator(it, 1);
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
// nn.fit(input, labels);
|
||||||
|
// nn.fit( input, labels);
|
||||||
|
nn.fit(diter);
|
||||||
|
// nn.feedForward(input);
|
||||||
|
if(i%20==0) log.info("Score: {}", nn.getScore());
|
||||||
|
}
|
||||||
|
|
||||||
|
Evaluation eval = nn.evaluate(iter, List.of("setosa", "vericolor", "virginica"));
|
||||||
|
|
||||||
|
log.info("\n{}", eval.stats());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParam2() throws IOException {
|
||||||
|
NeuralNetConfiguration conf =
|
||||||
|
NeuralNetConfiguration.builder()
|
||||||
|
.seed(12345)
|
||||||
|
.layer(
|
||||||
|
DenseLayer.builder().nIn(784).nOut(20).name("1. Dense"))
|
||||||
|
.layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense"))
|
||||||
|
.layer(
|
||||||
|
OutputLayer.builder()
|
||||||
|
.nOut(10)
|
||||||
|
.lossFunction(LossFunctions.LossFunction.MSE)
|
||||||
|
.activation(Activation.SOFTMAX))
|
||||||
|
.build();
|
||||||
|
MultiLayerNetwork nn = new MultiLayerNetwork(conf);
|
||||||
|
nn.init();
|
||||||
|
log.info(nn.summary());
|
||||||
|
|
||||||
|
NeuralNetConfiguration conf2 =
|
||||||
|
NeuralNetConfiguration.builder()
|
||||||
|
.seed(12345)
|
||||||
|
.layer(
|
||||||
|
DenseLayer.builder().nIn(784).nOut(20).name("1. Dense").dropOut(0.7))
|
||||||
|
.layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense"))
|
||||||
|
.layer(
|
||||||
|
OutputLayer.builder()
|
||||||
|
.nOut(10)
|
||||||
|
.lossFunction(LossFunctions.LossFunction.MSE)
|
||||||
|
.activation(Activation.SOFTMAX))
|
||||||
|
.build();
|
||||||
|
MultiLayerNetwork nn2 = new MultiLayerNetwork(conf2);
|
||||||
|
nn2.init();
|
||||||
|
log.info(nn2.summary());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
MnistDataSetIterator iter = new MnistDataSetIterator(10, 500);
|
||||||
|
MnistDataSetIterator iter2 = new MnistDataSetIterator(10, 50);
|
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i < 200; i++) {
|
||||||
|
nn.fit(iter);
|
||||||
|
nn2.fit(iter);
|
||||||
|
if(i%20==0) log.info("Score: {} vs. {}", nn.getScore(), nn2.getScore());
|
||||||
|
}
|
||||||
|
|
||||||
|
Evaluation eval = nn.evaluate(iter2);
|
||||||
|
Evaluation eval2 = nn2.evaluate(iter2);
|
||||||
|
|
||||||
|
log.info("\n{} \n{}", eval.stats(), eval2.stats());
|
||||||
|
}
|
||||||
|
}
|
|
@ -45,6 +45,7 @@ import org.datavec.image.transform.PipelineImageTransform;
|
||||||
import org.datavec.image.transform.ResizeImageTransform;
|
import org.datavec.image.transform.ResizeImageTransform;
|
||||||
import org.datavec.image.transform.ShowImageTransform;
|
import org.datavec.image.transform.ShowImageTransform;
|
||||||
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
|
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
|
||||||
|
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
|
||||||
import org.deeplearning4j.nn.conf.GradientNormalization;
|
import org.deeplearning4j.nn.conf.GradientNormalization;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
import org.deeplearning4j.nn.conf.distribution.Distribution;
|
import org.deeplearning4j.nn.conf.distribution.Distribution;
|
||||||
|
@ -65,6 +66,7 @@ import org.deeplearning4j.optimize.listeners.ScoreToChartListener;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.nd4j.linalg.activations.Activation;
|
import org.nd4j.linalg.activations.Activation;
|
||||||
import org.nd4j.linalg.activations.impl.ActivationLReLU;
|
import org.nd4j.linalg.activations.impl.ActivationLReLU;
|
||||||
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.dataset.DataSet;
|
import org.nd4j.linalg.dataset.DataSet;
|
||||||
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
||||||
|
@ -80,11 +82,11 @@ public class App {
|
||||||
|
|
||||||
private static final int X_DIM = 20 ;
|
private static final int X_DIM = 20 ;
|
||||||
private static final int Y_DIM = 20;
|
private static final int Y_DIM = 20;
|
||||||
private static final int CHANNELS = 1;
|
private static final int CHANNELS = 3;
|
||||||
private static final int batchSize = 10;
|
private static final int batchSize = 50;
|
||||||
private static final int INPUT = 128;
|
private static final int INPUT = 128;
|
||||||
|
|
||||||
private static final int OUTPUT_PER_PANEL = 4;
|
private static final int OUTPUT_PER_PANEL = 16;
|
||||||
|
|
||||||
private static final int ARRAY_SIZE_PER_SAMPLE = X_DIM*Y_DIM*CHANNELS;
|
private static final int ARRAY_SIZE_PER_SAMPLE = X_DIM*Y_DIM*CHANNELS;
|
||||||
private static final IUpdater UPDATER = Adam.builder().learningRate(LEARNING_RATE).beta1(0.5).build();
|
private static final IUpdater UPDATER = Adam.builder().learningRate(LEARNING_RATE).beta1(0.5).build();
|
||||||
|
@ -146,7 +148,7 @@ public class App {
|
||||||
ActivationLayer.builder(new ActivationLReLU(0.2)).build(),
|
ActivationLayer.builder(new ActivationLReLU(0.2)).build(),
|
||||||
DropoutLayer.builder(1 - 0.5).build(),
|
DropoutLayer.builder(1 - 0.5).build(),
|
||||||
|
|
||||||
OutputLayer.builder().name("dis-output").lossFunction(LossFunction.XENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build()
|
OutputLayer.builder().name("dis-output").lossFunction(LossFunction.MCXENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build()
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,6 +198,7 @@ public class App {
|
||||||
.activation( Activation.IDENTITY )
|
.activation( Activation.IDENTITY )
|
||||||
.layersFromArray( layers )
|
.layersFromArray( layers )
|
||||||
.inputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS))
|
.inputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS))
|
||||||
|
.dataType(DataType.FLOAT)
|
||||||
.build();
|
.build();
|
||||||
((NeuralNetConfiguration) conf).init();
|
((NeuralNetConfiguration) conf).init();
|
||||||
return conf;
|
return conf;
|
||||||
|
@ -212,8 +215,8 @@ public class App {
|
||||||
log.info("\u001B[32m Some \u001B[1m green \u001B[22m text \u001B[0m \u001B[7m Inverted\u001B[0m ");
|
log.info("\u001B[32m Some \u001B[1m green \u001B[22m text \u001B[0m \u001B[7m Inverted\u001B[0m ");
|
||||||
Nd4j.getMemoryManager().setAutoGcWindow(500);
|
Nd4j.getMemoryManager().setAutoGcWindow(500);
|
||||||
|
|
||||||
// MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45);
|
//MnistDataSetIterator trainData = new MnistDataSetIterator(128, true, 45);
|
||||||
// FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/flowers"), NativeImageLoader.getALLOWED_FORMATS());
|
//FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/flowers"), NativeImageLoader.getALLOWED_FORMATS());
|
||||||
FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/humans"), NativeImageLoader.getALLOWED_FORMATS());
|
FileSplit fileSplit = new FileSplit(new File("c:/users/brian/downloads/humans"), NativeImageLoader.getALLOWED_FORMATS());
|
||||||
|
|
||||||
|
|
||||||
|
@ -223,7 +226,7 @@ public class App {
|
||||||
ImageTransform transform3 = new ResizeImageTransform(X_DIM, Y_DIM);
|
ImageTransform transform3 = new ResizeImageTransform(X_DIM, Y_DIM);
|
||||||
|
|
||||||
ImageTransform tr = new PipelineImageTransform.Builder()
|
ImageTransform tr = new PipelineImageTransform.Builder()
|
||||||
.addImageTransform(transform) //convert to GREY SCALE
|
//.addImageTransform(transform) //convert to GREY SCALE
|
||||||
.addImageTransform(transform3)
|
.addImageTransform(transform3)
|
||||||
//.addImageTransform(transform2)
|
//.addImageTransform(transform2)
|
||||||
.build();
|
.build();
|
||||||
|
@ -270,10 +273,10 @@ public class App {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(i%20 == 0) {
|
//if(i%20 == 0) {
|
||||||
// frame2 = visualize(new INDArray[]{real}, batchSize,
|
frame2 = visualize(new INDArray[]{real}, batchSize,
|
||||||
// frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images
|
frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images
|
||||||
}
|
//}
|
||||||
real.divi(255f);
|
real.divi(255f);
|
||||||
|
|
||||||
// int batchSize = (int) real.shape()[0];
|
// int batchSize = (int) real.shape()[0];
|
||||||
|
@ -290,7 +293,7 @@ public class App {
|
||||||
DataSet data = DataSet.merge(Arrays.asList(realSet, fakeSet));
|
DataSet data = DataSet.merge(Arrays.asList(realSet, fakeSet));
|
||||||
|
|
||||||
dis.fit(data);
|
dis.fit(data);
|
||||||
dis.fit(data);
|
//dis.fit(data);
|
||||||
|
|
||||||
// Update the discriminator in the GAN network
|
// Update the discriminator in the GAN network
|
||||||
updateGan(gen, dis, gan);
|
updateGan(gen, dis, gan);
|
||||||
|
@ -298,7 +301,7 @@ public class App {
|
||||||
//gan.fit(new DataSet(Nd4j.rand(batchSize, INPUT), Nd4j.zeros(batchSize, 1)));
|
//gan.fit(new DataSet(Nd4j.rand(batchSize, INPUT), Nd4j.zeros(batchSize, 1)));
|
||||||
gan.fit(new DataSet(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1)));
|
gan.fit(new DataSet(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1)));
|
||||||
|
|
||||||
|
//Visualize and reporting
|
||||||
if (j % 10 == 1) {
|
if (j % 10 == 1) {
|
||||||
System.out.println("Iteration " + j + " Visualizing...");
|
System.out.println("Iteration " + j + " Visualizing...");
|
||||||
INDArray[] samples = batchSize > OUTPUT_PER_PANEL ? new INDArray[OUTPUT_PER_PANEL] : new INDArray[batchSize];
|
INDArray[] samples = batchSize > OUTPUT_PER_PANEL ? new INDArray[OUTPUT_PER_PANEL] : new INDArray[batchSize];
|
||||||
|
@ -320,11 +323,16 @@ public class App {
|
||||||
frame = visualize(samples, 1, frame == null ? new JFrame() : frame, false); //each samples only has 1 image, thus batchElements=1
|
frame = visualize(samples, 1, frame == null ? new JFrame() : frame, false); //each samples only has 1 image, thus batchElements=1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (trainData.resetSupported()) {
|
||||||
trainData.reset();
|
trainData.reset();
|
||||||
|
} else {
|
||||||
|
log.error("Trainingdata {} does not support reset.", trainData.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy the GANs generator to gen.
|
// Copy the GANs generator to gen.
|
||||||
updateGen(gen, gan);
|
updateGen(gen, gan);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
gen.save(new File("mnist-mlp-generator.dlj"));
|
gen.save(new File("mnist-mlp-generator.dlj"));
|
||||||
}
|
}
|
||||||
|
@ -383,7 +391,12 @@ public class App {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static JLabel getImage(INDArray tensor, int batchElement, boolean isOrig) {
|
private static JLabel getImage(INDArray tensor, int batchElement, boolean isOrig) {
|
||||||
final BufferedImage bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY);
|
final BufferedImage bi;
|
||||||
|
if(CHANNELS>1) {
|
||||||
|
bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_INT_RGB); //need to change here based on channels
|
||||||
|
} else {
|
||||||
|
bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY); //need to change here based on channels
|
||||||
|
}
|
||||||
final int imageSize = X_DIM * Y_DIM;
|
final int imageSize = X_DIM * Y_DIM;
|
||||||
final int offset = batchElement * imageSize;
|
final int offset = batchElement * imageSize;
|
||||||
int pxl = offset * CHANNELS; //where to start in the INDArray
|
int pxl = offset * CHANNELS; //where to start in the INDArray
|
||||||
|
|
|
@ -24,12 +24,14 @@ package net.brutex.gan;
|
||||||
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
|
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
|
||||||
import org.deeplearning4j.nn.conf.GradientNormalization;
|
import org.deeplearning4j.nn.conf.GradientNormalization;
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
import org.deeplearning4j.nn.conf.layers.ActivationLayer;
|
import org.deeplearning4j.nn.conf.layers.ActivationLayer;
|
||||||
import org.deeplearning4j.nn.conf.layers.DenseLayer;
|
import org.deeplearning4j.nn.conf.layers.DenseLayer;
|
||||||
import org.deeplearning4j.nn.conf.layers.DropoutLayer;
|
import org.deeplearning4j.nn.conf.layers.DropoutLayer;
|
||||||
import org.deeplearning4j.nn.conf.layers.OutputLayer;
|
import org.deeplearning4j.nn.conf.layers.OutputLayer;
|
||||||
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
|
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
|
||||||
import org.deeplearning4j.nn.weights.WeightInit;
|
import org.deeplearning4j.nn.weights.WeightInit;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
import org.nd4j.linalg.activations.Activation;
|
import org.nd4j.linalg.activations.Activation;
|
||||||
import org.nd4j.linalg.activations.impl.ActivationLReLU;
|
import org.nd4j.linalg.activations.impl.ActivationLReLU;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
@ -98,7 +100,10 @@ public class MnistSimpleGAN {
|
||||||
|
|
||||||
return new MultiLayerNetwork(discConf);
|
return new MultiLayerNetwork(discConf);
|
||||||
}
|
}
|
||||||
|
@Test
|
||||||
|
public void runTest() throws Exception {
|
||||||
|
main(null);
|
||||||
|
}
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
GAN gan = new GAN.Builder()
|
GAN gan = new GAN.Builder()
|
||||||
.generator(MnistSimpleGAN::getGenerator)
|
.generator(MnistSimpleGAN::getGenerator)
|
||||||
|
@ -108,6 +113,7 @@ public class MnistSimpleGAN {
|
||||||
.updater(UPDATER)
|
.updater(UPDATER)
|
||||||
.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
|
.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
|
||||||
.gradientNormalizationThreshold(100)
|
.gradientNormalizationThreshold(100)
|
||||||
|
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000);
|
Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000);
|
||||||
|
|
|
@ -2386,7 +2386,11 @@ public interface INDArray extends Serializable, AutoCloseable {
|
||||||
long[] stride();
|
long[] stride();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the ordering (fortran or c 'f' and 'c' respectively) of this ndarray
|
* Return the ordering (fortran or c 'f' and 'c' respectively) of this ndarray <br/><br/>
|
||||||
|
* C Is Contiguous layout. Mathematically speaking, row major.<br/>
|
||||||
|
* F Is Fortran contiguous layout. Mathematically speaking, column major.<br/>
|
||||||
|
* {@see https://en.wikipedia.org/wiki/Row-_and_column-major_order}<br/>
|
||||||
|
*
|
||||||
* @return the ordering of this ndarray
|
* @return the ordering of this ndarray
|
||||||
*/
|
*/
|
||||||
char ordering();
|
char ordering();
|
||||||
|
|
|
@ -5121,7 +5121,7 @@ public class Nd4j {
|
||||||
Nd4j.backend = backend;
|
Nd4j.backend = backend;
|
||||||
updateNd4jContext();
|
updateNd4jContext();
|
||||||
props = Nd4jContext.getInstance().getConf();
|
props = Nd4jContext.getInstance().getConf();
|
||||||
logger.info("Properties for Nd4jContext " + props);
|
log.debug("Properties for Nd4jContext {}", props);
|
||||||
PropertyParser pp = new PropertyParser(props);
|
PropertyParser pp = new PropertyParser(props);
|
||||||
|
|
||||||
String otherDtype = pp.toString(ND4JSystemProperties.DTYPE);
|
String otherDtype = pp.toString(ND4JSystemProperties.DTYPE);
|
||||||
|
|
|
@ -122,7 +122,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
|
||||||
.dataType(DataType.DOUBLE)
|
.dataType(DataType.DOUBLE)
|
||||||
.updater(new NoOp()).seed(12345L)
|
.updater(new NoOp()).seed(12345L)
|
||||||
.dist(new NormalDistribution(0, 2)).list()
|
.dist(new NormalDistribution(0, 2)).list()
|
||||||
.layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
|
.layer(0, Convolution2D.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
|
||||||
.activation(Activation.IDENTITY).build())
|
.activation(Activation.IDENTITY).build())
|
||||||
.layer(1,BatchNormalization.builder().useLogStd(useLogStd).build())
|
.layer(1,BatchNormalization.builder().useLogStd(useLogStd).build())
|
||||||
.layer(2, ActivationLayer.builder().activation(Activation.TANH).build())
|
.layer(2, ActivationLayer.builder().activation(Activation.TANH).build())
|
||||||
|
|
|
@ -91,7 +91,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
|
||||||
.updater(new NoOp())
|
.updater(new NoOp())
|
||||||
.dist(new NormalDistribution(0, 1))
|
.dist(new NormalDistribution(0, 1))
|
||||||
.convolutionMode(ConvolutionMode.Same)
|
.convolutionMode(ConvolutionMode.Same)
|
||||||
.list()
|
|
||||||
.layer(
|
.layer(
|
||||||
Convolution1D.builder()
|
Convolution1D.builder()
|
||||||
.activation(afn)
|
.activation(afn)
|
||||||
|
@ -435,7 +434,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
|
||||||
.updater(new NoOp())
|
.updater(new NoOp())
|
||||||
.dist(new NormalDistribution(0, 1))
|
.dist(new NormalDistribution(0, 1))
|
||||||
.convolutionMode(ConvolutionMode.Same)
|
.convolutionMode(ConvolutionMode.Same)
|
||||||
.list()
|
|
||||||
.layer(
|
.layer(
|
||||||
0,
|
0,
|
||||||
Convolution1D.builder()
|
Convolution1D.builder()
|
||||||
|
@ -461,6 +459,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
|
||||||
.stride(stride)
|
.stride(stride)
|
||||||
.padding(padding)
|
.padding(padding)
|
||||||
.pnorm(pnorm)
|
.pnorm(pnorm)
|
||||||
|
.name("SubsamplingLayer")
|
||||||
.build())
|
.build())
|
||||||
.layer(
|
.layer(
|
||||||
3,
|
3,
|
||||||
|
|
|
@ -0,0 +1,811 @@
|
||||||
|
/*
|
||||||
|
* ******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* * This program and the accompanying materials are made available under the
|
||||||
|
* * terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* * https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
* *
|
||||||
|
* * See the NOTICE file distributed with this work for additional
|
||||||
|
* * information regarding copyright ownership.
|
||||||
|
* * Unless required by applicable law or agreed to in writing, software
|
||||||
|
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* * License for the specific language governing permissions and limitations
|
||||||
|
* * under the License.
|
||||||
|
* *
|
||||||
|
* * SPDX-License-Identifier: Apache-2.0
|
||||||
|
* *****************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.deeplearning4j.gradientcheck;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.deeplearning4j.BaseDL4JTest;
|
||||||
|
import org.deeplearning4j.TestUtils;
|
||||||
|
import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator;
|
||||||
|
import org.deeplearning4j.nn.conf.ConvolutionMode;
|
||||||
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
|
import org.deeplearning4j.nn.conf.RNNFormat;
|
||||||
|
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
|
||||||
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.*;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D;
|
||||||
|
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
|
||||||
|
import org.deeplearning4j.util.Convolution1DUtils;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.nd4j.common.primitives.Pair;
|
||||||
|
import org.nd4j.evaluation.classification.Evaluation;
|
||||||
|
import org.nd4j.linalg.activations.Activation;
|
||||||
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
import org.nd4j.linalg.dataset.DataSet;
|
||||||
|
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
||||||
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
|
import org.nd4j.linalg.indexing.NDArrayIndex;
|
||||||
|
import org.nd4j.linalg.learning.config.NoOp;
|
||||||
|
import org.nd4j.linalg.lossfunctions.LossFunctions;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class CNN1DNewGradientCheckTest extends BaseDL4JTest {
|
||||||
|
private static final boolean PRINT_RESULTS = true;
|
||||||
|
private static final boolean RETURN_ON_FIRST_FAILURE = false;
|
||||||
|
private static final double DEFAULT_EPS = 1e-6;
|
||||||
|
private static final double DEFAULT_MAX_REL_ERROR = 1e-3;
|
||||||
|
private static final double DEFAULT_MIN_ABS_ERROR = 1e-8;
|
||||||
|
|
||||||
|
static {
|
||||||
|
Nd4j.setDataType(DataType.DOUBLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCnn1D() {
|
||||||
|
int minibatchSize = 4;
|
||||||
|
int[] dataChannels = {4, 10}; //the input
|
||||||
|
int[] kernels = {2,4,5,8};
|
||||||
|
int stride = 2;
|
||||||
|
int padding = 3;
|
||||||
|
int seriesLength = 300;
|
||||||
|
|
||||||
|
for (int kernel : kernels) {
|
||||||
|
for (int dChannels : dataChannels) {
|
||||||
|
int numLabels = ((seriesLength + (2 * padding) - kernel) / stride) + 1;
|
||||||
|
final NeuralNetConfiguration conf =
|
||||||
|
NeuralNetConfiguration.builder()
|
||||||
|
.dataType(DataType.DOUBLE)
|
||||||
|
.updater(new NoOp())
|
||||||
|
.dist(new NormalDistribution(0, 1))
|
||||||
|
.convolutionMode(ConvolutionMode.Same)
|
||||||
|
.layer(
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.activation(Activation.RELU)
|
||||||
|
.kernelSize(kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.nIn(dChannels) // channels
|
||||||
|
.nOut(3)
|
||||||
|
.rnnDataFormat(RNNFormat.NCW)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
RnnOutputLayer.builder()
|
||||||
|
.lossFunction(LossFunctions.LossFunction.MCXENT)
|
||||||
|
.activation(Activation.SOFTMAX)
|
||||||
|
.nOut(4)
|
||||||
|
.build())
|
||||||
|
.inputType(InputType.recurrent(dChannels, seriesLength))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
INDArray input = Nd4j.rand(minibatchSize, dChannels, seriesLength);
|
||||||
|
INDArray labels = Nd4j.zeros(minibatchSize, 4, numLabels);
|
||||||
|
for (int i = 0; i < minibatchSize; i++) {
|
||||||
|
for (int j = 0; j < numLabels; j++) {
|
||||||
|
labels.putScalar(new int[] {i, i % 4, j}, 1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
||||||
|
net.init();
|
||||||
|
String msg =
|
||||||
|
"Minibatch="
|
||||||
|
+ minibatchSize
|
||||||
|
+ ", activationFn="
|
||||||
|
+ Activation.RELU
|
||||||
|
+ ", kernel = "
|
||||||
|
+ kernel;
|
||||||
|
|
||||||
|
System.out.println(msg);
|
||||||
|
for (int j = 0; j < net.getnLayers(); j++)
|
||||||
|
System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams());
|
||||||
|
/**
|
||||||
|
List<Pair<INDArray, INDArray>> iter = new java.util.ArrayList<>(Collections.emptyList());
|
||||||
|
iter.add(new Pair<>(input, labels));
|
||||||
|
for(int x=0;x<100; x++) net.fit(input, labels);
|
||||||
|
Evaluation eval = net.evaluate(new INDArrayDataSetIterator(iter,2), Arrays.asList(new String[]{"One", "Two", "Three", "Four"}));
|
||||||
|
// net.fit(input, labels);
|
||||||
|
eval.eval(labels, net.output(input));
|
||||||
|
|
||||||
|
**/
|
||||||
|
boolean gradOK =
|
||||||
|
GradientCheckUtil.checkGradients(
|
||||||
|
net,
|
||||||
|
DEFAULT_EPS,
|
||||||
|
DEFAULT_MAX_REL_ERROR,
|
||||||
|
DEFAULT_MIN_ABS_ERROR,
|
||||||
|
PRINT_RESULTS,
|
||||||
|
RETURN_ON_FIRST_FAILURE,
|
||||||
|
input,
|
||||||
|
labels);
|
||||||
|
|
||||||
|
assertTrue(gradOK, msg);
|
||||||
|
TestUtils.testModelSerialization(net);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCnn1DWithLocallyConnected1D() {
|
||||||
|
Nd4j.getRandom().setSeed(1337);
|
||||||
|
|
||||||
|
int[] minibatchSizes = {2, 3};
|
||||||
|
int length = 25;
|
||||||
|
int convNIn = 18;
|
||||||
|
int convNOut1 = 3;
|
||||||
|
int convNOut2 = 4;
|
||||||
|
int finalNOut = 4;
|
||||||
|
|
||||||
|
int[] kernels = {1,2,4};
|
||||||
|
int stride = 1;
|
||||||
|
int padding = 0;
|
||||||
|
|
||||||
|
Activation[] activations = {Activation.SIGMOID};
|
||||||
|
|
||||||
|
for (Activation afn : activations) {
|
||||||
|
for (int minibatchSize : minibatchSizes) {
|
||||||
|
for (int kernel : kernels) {
|
||||||
|
INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
|
||||||
|
INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
|
||||||
|
for (int i = 0; i < minibatchSize; i++) {
|
||||||
|
for (int j = 0; j < length; j++) {
|
||||||
|
labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NeuralNetConfiguration conf =
|
||||||
|
NeuralNetConfiguration.builder()
|
||||||
|
.dataType(DataType.DOUBLE)
|
||||||
|
.updater(new NoOp())
|
||||||
|
.dist(new NormalDistribution(0, 1))
|
||||||
|
.convolutionMode(ConvolutionMode.Same)
|
||||||
|
.layer(
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.activation(afn)
|
||||||
|
.kernelSize(kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.nIn(convNIn)
|
||||||
|
.nOut(convNOut1)
|
||||||
|
.rnnDataFormat(RNNFormat.NCW)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
LocallyConnected1D.builder()
|
||||||
|
.activation(afn)
|
||||||
|
.kernelSize(kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.nIn(convNOut1)
|
||||||
|
.nOut(convNOut2)
|
||||||
|
.hasBias(false)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
RnnOutputLayer.builder()
|
||||||
|
.lossFunction(LossFunctions.LossFunction.MCXENT)
|
||||||
|
.activation(Activation.SOFTMAX)
|
||||||
|
.nOut(finalNOut)
|
||||||
|
.build())
|
||||||
|
.inputType(InputType.recurrent(convNIn, length))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
String json = conf.toJson();
|
||||||
|
NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
|
||||||
|
assertEquals(conf, c2);
|
||||||
|
|
||||||
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
||||||
|
net.init();
|
||||||
|
|
||||||
|
String msg =
|
||||||
|
"Minibatch=" + minibatchSize + ", activationFn=" + afn + ", kernel = " + kernel;
|
||||||
|
|
||||||
|
if (PRINT_RESULTS) {
|
||||||
|
System.out.println(msg);
|
||||||
|
// for (int j = 0; j < net.getnLayers(); j++)
|
||||||
|
// System.out.println("ILayer " + j + " # params: " +
|
||||||
|
// net.getLayer(j).numParams());
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean gradOK =
|
||||||
|
GradientCheckUtil.checkGradients(
|
||||||
|
net,
|
||||||
|
DEFAULT_EPS,
|
||||||
|
DEFAULT_MAX_REL_ERROR,
|
||||||
|
DEFAULT_MIN_ABS_ERROR,
|
||||||
|
PRINT_RESULTS,
|
||||||
|
RETURN_ON_FIRST_FAILURE,
|
||||||
|
input,
|
||||||
|
labels);
|
||||||
|
|
||||||
|
assertTrue(gradOK, msg);
|
||||||
|
|
||||||
|
TestUtils.testModelSerialization(net);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCnn1DWithCropping1D() {
|
||||||
|
Nd4j.getRandom().setSeed(1337);
|
||||||
|
|
||||||
|
int[] minibatchSizes = {1, 3};
|
||||||
|
int length = 7;
|
||||||
|
int convNIn = 2;
|
||||||
|
int convNOut1 = 3;
|
||||||
|
int convNOut2 = 4;
|
||||||
|
int finalNOut = 4;
|
||||||
|
|
||||||
|
int[] kernels = {1, 2, 4};
|
||||||
|
int stride = 1;
|
||||||
|
|
||||||
|
int padding = 0;
|
||||||
|
int cropping = 1;
|
||||||
|
int croppedLength = length - 2 * cropping;
|
||||||
|
|
||||||
|
Activation[] activations = {Activation.SIGMOID};
|
||||||
|
SubsamplingLayer.PoolingType[] poolingTypes =
|
||||||
|
new SubsamplingLayer.PoolingType[] {
|
||||||
|
SubsamplingLayer.PoolingType.MAX,
|
||||||
|
SubsamplingLayer.PoolingType.AVG,
|
||||||
|
SubsamplingLayer.PoolingType.PNORM
|
||||||
|
};
|
||||||
|
|
||||||
|
for (Activation afn : activations) {
|
||||||
|
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
|
||||||
|
for (int minibatchSize : minibatchSizes) {
|
||||||
|
for (int kernel : kernels) {
|
||||||
|
INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
|
||||||
|
INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, croppedLength);
|
||||||
|
for (int i = 0; i < minibatchSize; i++) {
|
||||||
|
for (int j = 0; j < croppedLength; j++) {
|
||||||
|
labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NeuralNetConfiguration conf =
|
||||||
|
NeuralNetConfiguration.builder()
|
||||||
|
.dataType(DataType.DOUBLE)
|
||||||
|
.updater(new NoOp())
|
||||||
|
.dist(new NormalDistribution(0, 1))
|
||||||
|
.convolutionMode(ConvolutionMode.Same)
|
||||||
|
.layer(
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.activation(afn)
|
||||||
|
.kernelSize(kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.nOut(convNOut1)
|
||||||
|
.build())
|
||||||
|
.layer(Cropping1D.builder(cropping).build())
|
||||||
|
.layer(
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.activation(afn)
|
||||||
|
.kernelSize(kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.nOut(convNOut2)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
RnnOutputLayer.builder()
|
||||||
|
.lossFunction(LossFunctions.LossFunction.MCXENT)
|
||||||
|
.activation(Activation.SOFTMAX)
|
||||||
|
.nOut(finalNOut)
|
||||||
|
.build())
|
||||||
|
.inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
String json = conf.toJson();
|
||||||
|
NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
|
||||||
|
assertEquals(conf, c2);
|
||||||
|
|
||||||
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
||||||
|
net.init();
|
||||||
|
|
||||||
|
String msg =
|
||||||
|
"PoolingType="
|
||||||
|
+ poolingType
|
||||||
|
+ ", minibatch="
|
||||||
|
+ minibatchSize
|
||||||
|
+ ", activationFn="
|
||||||
|
+ afn
|
||||||
|
+ ", kernel = "
|
||||||
|
+ kernel;
|
||||||
|
|
||||||
|
if (PRINT_RESULTS) {
|
||||||
|
System.out.println(msg);
|
||||||
|
// for (int j = 0; j < net.getnLayers(); j++)
|
||||||
|
// System.out.println("ILayer " + j + " # params: " +
|
||||||
|
// net.getLayer(j).numParams());
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean gradOK =
|
||||||
|
GradientCheckUtil.checkGradients(
|
||||||
|
net,
|
||||||
|
DEFAULT_EPS,
|
||||||
|
DEFAULT_MAX_REL_ERROR,
|
||||||
|
DEFAULT_MIN_ABS_ERROR,
|
||||||
|
PRINT_RESULTS,
|
||||||
|
RETURN_ON_FIRST_FAILURE,
|
||||||
|
input,
|
||||||
|
labels);
|
||||||
|
|
||||||
|
assertTrue(gradOK, msg);
|
||||||
|
|
||||||
|
TestUtils.testModelSerialization(net);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCnn1DWithZeroPadding1D() {
|
||||||
|
Nd4j.getRandom().setSeed(1337);
|
||||||
|
|
||||||
|
int[] minibatchSizes = {1, 3};
|
||||||
|
int length = 7;
|
||||||
|
int convNIn = 2;
|
||||||
|
int convNOut1 = 3;
|
||||||
|
int convNOut2 = 4;
|
||||||
|
int finalNOut = 4;
|
||||||
|
|
||||||
|
int[] kernels = {1, 2, 4};
|
||||||
|
int stride = 1;
|
||||||
|
int pnorm = 2;
|
||||||
|
|
||||||
|
int padding = 0;
|
||||||
|
int zeroPadding = 2;
|
||||||
|
int paddedLength = length + 2 * zeroPadding;
|
||||||
|
|
||||||
|
Activation[] activations = {Activation.SIGMOID};
|
||||||
|
SubsamplingLayer.PoolingType[] poolingTypes =
|
||||||
|
new SubsamplingLayer.PoolingType[] {
|
||||||
|
SubsamplingLayer.PoolingType.MAX,
|
||||||
|
SubsamplingLayer.PoolingType.AVG,
|
||||||
|
SubsamplingLayer.PoolingType.PNORM
|
||||||
|
};
|
||||||
|
|
||||||
|
for (Activation afn : activations) {
|
||||||
|
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
|
||||||
|
for (int minibatchSize : minibatchSizes) {
|
||||||
|
for (int kernel : kernels) {
|
||||||
|
INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
|
||||||
|
INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, paddedLength);
|
||||||
|
for (int i = 0; i < minibatchSize; i++) {
|
||||||
|
for (int j = 0; j < paddedLength; j++) {
|
||||||
|
labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NeuralNetConfiguration conf =
|
||||||
|
NeuralNetConfiguration.builder()
|
||||||
|
.dataType(DataType.DOUBLE)
|
||||||
|
.updater(new NoOp())
|
||||||
|
.dist(new NormalDistribution(0, 1))
|
||||||
|
.convolutionMode(ConvolutionMode.Same)
|
||||||
|
.layer(
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.activation(afn)
|
||||||
|
.kernelSize(2, kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.nOut(convNOut1)
|
||||||
|
.build())
|
||||||
|
.layer(ZeroPadding1DLayer.builder(zeroPadding).build())
|
||||||
|
.layer(
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.activation(afn)
|
||||||
|
.kernelSize(kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.nOut(convNOut2)
|
||||||
|
.build())
|
||||||
|
.layer(ZeroPadding1DLayer.builder(0).build())
|
||||||
|
.layer(
|
||||||
|
Subsampling1DLayer.builder(poolingType)
|
||||||
|
.kernelSize(kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.pnorm(pnorm)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
RnnOutputLayer.builder()
|
||||||
|
.lossFunction(LossFunctions.LossFunction.MCXENT)
|
||||||
|
.activation(Activation.SOFTMAX)
|
||||||
|
.nOut(finalNOut)
|
||||||
|
.build())
|
||||||
|
.inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
String json = conf.toJson();
|
||||||
|
NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
|
||||||
|
assertEquals(conf, c2);
|
||||||
|
|
||||||
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
||||||
|
net.init();
|
||||||
|
|
||||||
|
String msg =
|
||||||
|
"PoolingType="
|
||||||
|
+ poolingType
|
||||||
|
+ ", minibatch="
|
||||||
|
+ minibatchSize
|
||||||
|
+ ", activationFn="
|
||||||
|
+ afn
|
||||||
|
+ ", kernel = "
|
||||||
|
+ kernel;
|
||||||
|
|
||||||
|
if (PRINT_RESULTS) {
|
||||||
|
System.out.println(msg);
|
||||||
|
// for (int j = 0; j < net.getnLayers(); j++)
|
||||||
|
// System.out.println("ILayer " + j + " # params: " +
|
||||||
|
// net.getLayer(j).numParams());
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean gradOK =
|
||||||
|
GradientCheckUtil.checkGradients(
|
||||||
|
net,
|
||||||
|
DEFAULT_EPS,
|
||||||
|
DEFAULT_MAX_REL_ERROR,
|
||||||
|
DEFAULT_MIN_ABS_ERROR,
|
||||||
|
PRINT_RESULTS,
|
||||||
|
RETURN_ON_FIRST_FAILURE,
|
||||||
|
input,
|
||||||
|
labels);
|
||||||
|
|
||||||
|
assertTrue(gradOK, msg);
|
||||||
|
TestUtils.testModelSerialization(net);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCnn1DWithSubsampling1D() {
|
||||||
|
Nd4j.getRandom().setSeed(12345);
|
||||||
|
|
||||||
|
int[] minibatchSizes = {1, 3};
|
||||||
|
int length = 7;
|
||||||
|
int convNIn = 2;
|
||||||
|
int convNOut1 = 3;
|
||||||
|
int convNOut2 = 4;
|
||||||
|
int finalNOut = 4;
|
||||||
|
|
||||||
|
int[] kernels = {1, 2, 4};
|
||||||
|
int stride = 1;
|
||||||
|
int padding = 0;
|
||||||
|
int pnorm = 2;
|
||||||
|
|
||||||
|
Activation[] activations = {Activation.SIGMOID, Activation.TANH};
|
||||||
|
SubsamplingLayer.PoolingType[] poolingTypes =
|
||||||
|
new SubsamplingLayer.PoolingType[] {
|
||||||
|
SubsamplingLayer.PoolingType.MAX,
|
||||||
|
SubsamplingLayer.PoolingType.AVG,
|
||||||
|
SubsamplingLayer.PoolingType.PNORM
|
||||||
|
};
|
||||||
|
|
||||||
|
for (Activation afn : activations) {
|
||||||
|
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
|
||||||
|
for (int minibatchSize : minibatchSizes) {
|
||||||
|
for (int kernel : kernels) {
|
||||||
|
INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
|
||||||
|
INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
|
||||||
|
for (int i = 0; i < minibatchSize; i++) {
|
||||||
|
for (int j = 0; j < length; j++) {
|
||||||
|
labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NeuralNetConfiguration conf =
|
||||||
|
NeuralNetConfiguration.builder()
|
||||||
|
.dataType(DataType.DOUBLE)
|
||||||
|
.updater(new NoOp())
|
||||||
|
.dist(new NormalDistribution(0, 1))
|
||||||
|
.convolutionMode(ConvolutionMode.Same)
|
||||||
|
.layer(
|
||||||
|
0,
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.activation(afn)
|
||||||
|
.kernelSize(kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.nOut(convNOut1)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
1,
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.activation(afn)
|
||||||
|
.kernelSize(kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.nOut(convNOut2)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
2,
|
||||||
|
Subsampling1DLayer.builder(poolingType)
|
||||||
|
.kernelSize(kernel)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
.pnorm(pnorm)
|
||||||
|
.name("SubsamplingLayer")
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
3,
|
||||||
|
RnnOutputLayer.builder()
|
||||||
|
.lossFunction(LossFunctions.LossFunction.MCXENT)
|
||||||
|
.activation(Activation.SOFTMAX)
|
||||||
|
.nOut(finalNOut)
|
||||||
|
.build())
|
||||||
|
.inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
String json = conf.toJson();
|
||||||
|
NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
|
||||||
|
assertEquals(conf, c2);
|
||||||
|
|
||||||
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
||||||
|
net.init();
|
||||||
|
|
||||||
|
String msg =
|
||||||
|
"PoolingType="
|
||||||
|
+ poolingType
|
||||||
|
+ ", minibatch="
|
||||||
|
+ minibatchSize
|
||||||
|
+ ", activationFn="
|
||||||
|
+ afn
|
||||||
|
+ ", kernel = "
|
||||||
|
+ kernel;
|
||||||
|
|
||||||
|
if (PRINT_RESULTS) {
|
||||||
|
System.out.println(msg);
|
||||||
|
// for (int j = 0; j < net.getnLayers(); j++)
|
||||||
|
// System.out.println("ILayer " + j + " # params: " +
|
||||||
|
// net.getLayer(j).numParams());
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean gradOK =
|
||||||
|
GradientCheckUtil.checkGradients(
|
||||||
|
net,
|
||||||
|
DEFAULT_EPS,
|
||||||
|
DEFAULT_MAX_REL_ERROR,
|
||||||
|
DEFAULT_MIN_ABS_ERROR,
|
||||||
|
PRINT_RESULTS,
|
||||||
|
RETURN_ON_FIRST_FAILURE,
|
||||||
|
input,
|
||||||
|
labels);
|
||||||
|
|
||||||
|
assertTrue(gradOK, msg);
|
||||||
|
TestUtils.testModelSerialization(net);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCnn1dWithMasking() {
|
||||||
|
int length = 12;
|
||||||
|
int convNIn = 2;
|
||||||
|
int convNOut1 = 3;
|
||||||
|
int convNOut2 = 4;
|
||||||
|
int finalNOut = 3;
|
||||||
|
|
||||||
|
int pnorm = 2;
|
||||||
|
|
||||||
|
SubsamplingLayer.PoolingType[] poolingTypes =
|
||||||
|
new SubsamplingLayer.PoolingType[] {
|
||||||
|
SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG
|
||||||
|
};
|
||||||
|
|
||||||
|
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
|
||||||
|
for (ConvolutionMode cm :
|
||||||
|
new ConvolutionMode[] {ConvolutionMode.Same, ConvolutionMode.Truncate}) {
|
||||||
|
for (int stride : new int[] {1, 2}) {
|
||||||
|
String s = cm + ", stride=" + stride + ", pooling=" + poolingType;
|
||||||
|
log.info("Starting test: " + s);
|
||||||
|
Nd4j.getRandom().setSeed(12345);
|
||||||
|
|
||||||
|
NeuralNetConfiguration conf =
|
||||||
|
NeuralNetConfiguration.builder()
|
||||||
|
.dataType(DataType.DOUBLE)
|
||||||
|
.updater(new NoOp())
|
||||||
|
.activation(Activation.TANH)
|
||||||
|
.dist(new NormalDistribution(0, 1))
|
||||||
|
.convolutionMode(cm)
|
||||||
|
.seed(12345)
|
||||||
|
.layer(
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.kernelSize(2)
|
||||||
|
.rnnDataFormat(RNNFormat.NCW)
|
||||||
|
.stride(stride)
|
||||||
|
.nIn(convNIn)
|
||||||
|
.nOut(convNOut1)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
Subsampling1DLayer.builder(poolingType)
|
||||||
|
.kernelSize(2)
|
||||||
|
.stride(stride)
|
||||||
|
.pnorm(pnorm)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.kernelSize(2)
|
||||||
|
.rnnDataFormat(RNNFormat.NCW)
|
||||||
|
.stride(stride)
|
||||||
|
.nIn(convNOut1)
|
||||||
|
.nOut(convNOut2)
|
||||||
|
.build())
|
||||||
|
.layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build())
|
||||||
|
.layer(
|
||||||
|
OutputLayer.builder()
|
||||||
|
.lossFunction(LossFunctions.LossFunction.MCXENT)
|
||||||
|
.activation(Activation.SOFTMAX)
|
||||||
|
.nOut(finalNOut)
|
||||||
|
.build())
|
||||||
|
.inputType(InputType.recurrent(convNIn, length))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
||||||
|
net.init();
|
||||||
|
|
||||||
|
INDArray f = Nd4j.rand(2, convNIn, length);
|
||||||
|
INDArray fm = Nd4j.create(2, length);
|
||||||
|
fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
|
||||||
|
fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, 6)).assign(1);
|
||||||
|
|
||||||
|
INDArray label = TestUtils.randomOneHot(2, finalNOut);
|
||||||
|
|
||||||
|
boolean gradOK =
|
||||||
|
GradientCheckUtil.checkGradients(
|
||||||
|
new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm));
|
||||||
|
|
||||||
|
assertTrue(gradOK, s);
|
||||||
|
TestUtils.testModelSerialization(net);
|
||||||
|
|
||||||
|
// TODO also check that masked step values don't impact forward pass, score or gradients
|
||||||
|
|
||||||
|
DataSet ds = new DataSet(f, label, fm, null);
|
||||||
|
double scoreBefore = net.score(ds);
|
||||||
|
net.setInput(f);
|
||||||
|
net.setLabels(label);
|
||||||
|
net.setLayerMaskArrays(fm, null);
|
||||||
|
net.computeGradientAndScore();
|
||||||
|
INDArray gradBefore = net.getFlattenedGradients().dup();
|
||||||
|
f.putScalar(1, 0, 10, 10.0);
|
||||||
|
f.putScalar(1, 1, 11, 20.0);
|
||||||
|
double scoreAfter = net.score(ds);
|
||||||
|
net.setInput(f);
|
||||||
|
net.setLabels(label);
|
||||||
|
net.setLayerMaskArrays(fm, null);
|
||||||
|
net.computeGradientAndScore();
|
||||||
|
INDArray gradAfter = net.getFlattenedGradients().dup();
|
||||||
|
|
||||||
|
assertEquals(scoreBefore, scoreAfter, 1e-6);
|
||||||
|
assertEquals(gradBefore, gradAfter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCnn1Causal() throws Exception {
|
||||||
|
int convNIn = 2;
|
||||||
|
int convNOut1 = 3;
|
||||||
|
int convNOut2 = 4;
|
||||||
|
int finalNOut = 3;
|
||||||
|
|
||||||
|
int[] lengths = {11, 12, 13, 9, 10, 11};
|
||||||
|
int[] kernels = {2, 3, 2, 4, 2, 3};
|
||||||
|
int[] dilations = {1, 1, 2, 1, 2, 1};
|
||||||
|
int[] strides = {1, 2, 1, 2, 1, 1};
|
||||||
|
boolean[] masks = {false, true, false, true, false, true};
|
||||||
|
boolean[] hasB = {true, false, true, false, true, true};
|
||||||
|
for (int i = 0; i < lengths.length; i++) {
|
||||||
|
int length = lengths[i];
|
||||||
|
int k = kernels[i];
|
||||||
|
int d = dilations[i];
|
||||||
|
int st = strides[i];
|
||||||
|
boolean mask = masks[i];
|
||||||
|
boolean hasBias = hasB[i];
|
||||||
|
// TODO has bias
|
||||||
|
String s = "k=" + k + ", s=" + st + " d=" + d + ", seqLen=" + length;
|
||||||
|
log.info("Starting test: " + s);
|
||||||
|
Nd4j.getRandom().setSeed(12345);
|
||||||
|
|
||||||
|
NeuralNetConfiguration conf =
|
||||||
|
NeuralNetConfiguration.builder()
|
||||||
|
.dataType(DataType.DOUBLE)
|
||||||
|
.updater(new NoOp())
|
||||||
|
.activation(Activation.TANH)
|
||||||
|
.weightInit(new NormalDistribution(0, 1))
|
||||||
|
.seed(12345)
|
||||||
|
.layer(
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.kernelSize(k)
|
||||||
|
.dilation(d)
|
||||||
|
.hasBias(hasBias)
|
||||||
|
.convolutionMode(ConvolutionMode.Causal)
|
||||||
|
.stride(st)
|
||||||
|
.nOut(convNOut1)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
Convolution1DNew.builder()
|
||||||
|
.kernelSize(k)
|
||||||
|
.dilation(d)
|
||||||
|
.convolutionMode(ConvolutionMode.Causal)
|
||||||
|
.stride(st)
|
||||||
|
.nOut(convNOut2)
|
||||||
|
.build())
|
||||||
|
.layer(
|
||||||
|
RnnOutputLayer.builder()
|
||||||
|
.lossFunction(LossFunctions.LossFunction.MCXENT)
|
||||||
|
.activation(Activation.SOFTMAX)
|
||||||
|
.nOut(finalNOut)
|
||||||
|
.build())
|
||||||
|
.inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
||||||
|
net.init();
|
||||||
|
|
||||||
|
INDArray f = Nd4j.rand(DataType.DOUBLE, 2, convNIn, length);
|
||||||
|
INDArray fm = null;
|
||||||
|
if (mask) {
|
||||||
|
fm = Nd4j.create(2, length);
|
||||||
|
fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
|
||||||
|
fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, length - 2)).assign(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
long outSize1 = Convolution1DUtils.getOutputSize(length, k, st, 0, ConvolutionMode.Causal, d);
|
||||||
|
long outSize2 =
|
||||||
|
Convolution1DUtils.getOutputSize(outSize1, k, st, 0, ConvolutionMode.Causal, d);
|
||||||
|
|
||||||
|
INDArray label = TestUtils.randomOneHotTimeSeries(2, finalNOut, (int) outSize2);
|
||||||
|
|
||||||
|
String msg =
|
||||||
|
"Minibatch="
|
||||||
|
+ 1
|
||||||
|
+ ", activationFn="
|
||||||
|
+ Activation.RELU
|
||||||
|
+ ", kernel = "
|
||||||
|
+ k;
|
||||||
|
|
||||||
|
System.out.println(msg);
|
||||||
|
for (int j = 0; j < net.getnLayers(); j++)
|
||||||
|
System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams());
|
||||||
|
|
||||||
|
boolean gradOK =
|
||||||
|
GradientCheckUtil.checkGradients(
|
||||||
|
new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm));
|
||||||
|
|
||||||
|
assertTrue(gradOK, s);
|
||||||
|
TestUtils.testModelSerialization(net);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -108,8 +108,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
|
||||||
.updater(new NoOp())
|
.updater(new NoOp())
|
||||||
.weightInit(WeightInit.XAVIER)
|
.weightInit(WeightInit.XAVIER)
|
||||||
.seed(12345L)
|
.seed(12345L)
|
||||||
.list()
|
|
||||||
.layer(0, ConvolutionLayer.builder(1, 1).nOut(6).activation(afn).build())
|
.layer(0, Convolution2D.builder().kernelSize(1).stride(1).nOut(6).activation(afn).build())
|
||||||
.layer(1, OutputLayer.builder(lf).activation(outputActivation).nOut(3).build())
|
.layer(1, OutputLayer.builder(lf).activation(outputActivation).nOut(3).build())
|
||||||
.inputType(InputType.convolutionalFlat(1, 4, 1));
|
.inputType(InputType.convolutionalFlat(1, 4, 1));
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ import lombok.val;
|
||||||
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
||||||
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.Convolution2D;
|
||||||
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
|
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
|
||||||
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
|
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
|
||||||
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
|
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
|
||||||
|
@ -85,7 +86,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution {
|
||||||
IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
|
IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
|
||||||
enforceTrainingConfig, conf, kerasMajorVersion);
|
enforceTrainingConfig, conf, kerasMajorVersion);
|
||||||
|
|
||||||
val builder = ConvolutionLayer.builder().name(this.name)
|
val builder = Convolution2D.builder().name(this.name)
|
||||||
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
|
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
|
||||||
.activation(getIActivationFromConfig(layerConfig, conf))
|
.activation(getIActivationFromConfig(layerConfig, conf))
|
||||||
.weightInit(init)
|
.weightInit(init)
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
||||||
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.Convolution2D;
|
||||||
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
|
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
|
||||||
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
|
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
|
||||||
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
|
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
|
||||||
|
@ -95,7 +96,7 @@ public class KerasConvolution2D extends KerasConvolution {
|
||||||
LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
|
LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
|
||||||
layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
|
layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
|
||||||
|
|
||||||
final var builder = ConvolutionLayer.builder().name(this.name)
|
final var builder = Convolution2D.builder().name(this.name)
|
||||||
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
|
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
|
||||||
.activation(getIActivationFromConfig(layerConfig, conf))
|
.activation(getIActivationFromConfig(layerConfig, conf))
|
||||||
.weightInit(init)
|
.weightInit(init)
|
||||||
|
|
|
@ -222,6 +222,14 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
|
||||||
// TODO do not put inside self to avoid serialization issues
|
// TODO do not put inside self to avoid serialization issues
|
||||||
// innerConfigurations.add(0, this); //put this configuration at first place
|
// innerConfigurations.add(0, this); //put this configuration at first place
|
||||||
|
|
||||||
|
|
||||||
|
getLayerConfigurations().stream()
|
||||||
|
.forEach(
|
||||||
|
lconf ->
|
||||||
|
lconf.setNetConfiguration(
|
||||||
|
this)); // set this as net config for all layers (defined in here, not stacked
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inherit network wide configuration setting to those layer configurations that do not have an
|
* Inherit network wide configuration setting to those layer configurations that do not have an
|
||||||
* individual setting (nor a default)
|
* individual setting (nor a default)
|
||||||
|
@ -230,11 +238,6 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
|
||||||
lconf.runInheritance();
|
lconf.runInheritance();
|
||||||
}
|
}
|
||||||
|
|
||||||
getLayerConfigurations().stream()
|
|
||||||
.forEach(
|
|
||||||
lconf ->
|
|
||||||
lconf.setNetConfiguration(
|
|
||||||
this)); // set this as net config for all layers (defined in here, not stacked
|
|
||||||
|
|
||||||
// Validate BackpropType setting
|
// Validate BackpropType setting
|
||||||
if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH)
|
if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH)
|
||||||
|
@ -326,7 +329,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
|
||||||
LayerConfiguration layer = getFlattenedLayerConfigurations().get(i - 1);
|
LayerConfiguration layer = getFlattenedLayerConfigurations().get(i - 1);
|
||||||
// convolution 1d is an edge case where it has rnn input type but the filters
|
// convolution 1d is an edge case where it has rnn input type but the filters
|
||||||
// should be the output
|
// should be the output
|
||||||
if (layer instanceof Convolution1D) {
|
if (layer instanceof Convolution1D || layer instanceof Convolution1DNew) {
|
||||||
if (l instanceof DenseLayer && getInputType() instanceof InputType.InputTypeRecurrent) {
|
if (l instanceof DenseLayer && getInputType() instanceof InputType.InputTypeRecurrent) {
|
||||||
FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l;
|
FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l;
|
||||||
if (getInputType() instanceof InputType.InputTypeRecurrent) {
|
if (getInputType() instanceof InputType.InputTypeRecurrent) {
|
||||||
|
|
|
@ -21,7 +21,13 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.conf;
|
package org.deeplearning4j.nn.conf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* N is the batch size<br/>
|
||||||
|
* C is the number of feature maps (that is,, number of channels)<br/>
|
||||||
|
* H is the image height (not used for 1D conv as this is an RNN format<br/>
|
||||||
|
* W is the image width<br/>
|
||||||
|
* **/
|
||||||
public enum RNNFormat implements DataFormat {
|
public enum RNNFormat implements DataFormat {
|
||||||
NCW,
|
/** n=batch size; c=channels/ features; w=width **/ NCW,
|
||||||
NWC
|
/** n=batch size; w=width; c=channels/ features **/ NWC
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,142 @@
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* ******************************************************************************
|
||||||
|
* *
|
||||||
|
* * This program and the accompanying materials are made available under the
|
||||||
|
* * terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* * https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
* *
|
||||||
|
* * See the NOTICE file distributed with this work for additional
|
||||||
|
* * information regarding copyright ownership.
|
||||||
|
* * Unless required by applicable law or agreed to in writing, software
|
||||||
|
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* * License for the specific language governing permissions and limitations
|
||||||
|
* * under the License.
|
||||||
|
* *
|
||||||
|
* * SPDX-License-Identifier: Apache-2.0
|
||||||
|
* *****************************************************************************
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.deeplearning4j.nn.conf.layers;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
|
import lombok.*;
|
||||||
|
import lombok.experimental.Accessors;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
||||||
|
import org.deeplearning4j.nn.conf.ConvolutionMode;
|
||||||
|
import org.deeplearning4j.util.ValidationUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ConvolutionLayer nIn in the input layer is the number of channels nOut is the number of filters
|
||||||
|
* to be used in the net or in other words the channels The builder specifies the filter/kernel
|
||||||
|
* size, the stride and padding The pooling layer takes the kernel size
|
||||||
|
*
|
||||||
|
* <p>Supports multiple dimensions: In 1D CNN, kernel moves in 1 direction. Input and output data of
|
||||||
|
* 1D CNN is 2 dimensional. Mostly used on Time-Series data.
|
||||||
|
*
|
||||||
|
* <p>In 2D CNN, kernel moves in 2 directions. Input and output data of 2D CNN is 3 dimensional.
|
||||||
|
* Mostly used on Image data.
|
||||||
|
*
|
||||||
|
* <p>In 3D CNN, kernel moves in 3 directions. Input and output data of 3D CNN is 4 dimensional.
|
||||||
|
* Mostly used on 3D Image data (MRI, CT Scans, Video).
|
||||||
|
*/
|
||||||
|
@ToString(callSuper = true)
|
||||||
|
@NoArgsConstructor
|
||||||
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@Slf4j
|
||||||
|
@SuperBuilder
|
||||||
|
public abstract class AbstractConvolutionLayer extends FeedForwardLayer {
|
||||||
|
/** The kernel of this convolution with size in each n-dimensions */
|
||||||
|
@Getter private int[] kernelSize;
|
||||||
|
/** The stride */
|
||||||
|
@Getter private int[] stride;
|
||||||
|
/** The padding */
|
||||||
|
@Getter private int[] padding;
|
||||||
|
/** The dilation */
|
||||||
|
@Getter private int[] dilation;
|
||||||
|
/** If true (default): include bias parameters in the model. False: no bias. */
|
||||||
|
@Builder.Default
|
||||||
|
@Getter
|
||||||
|
@Accessors(fluent = true)
|
||||||
|
@Setter
|
||||||
|
private boolean hasBias = true;
|
||||||
|
/**
|
||||||
|
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
|
||||||
|
* details Default is {@link ConvolutionMode}.Truncate.
|
||||||
|
*/
|
||||||
|
@Builder.Default @Getter @Setter
|
||||||
|
private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
|
||||||
|
/**
|
||||||
|
* When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
|
||||||
|
* be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
|
||||||
|
* false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
|
||||||
|
*/
|
||||||
|
@Getter @Setter @Builder.Default private boolean cudnnAllowFallback = true;
|
||||||
|
|
||||||
|
/** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
|
||||||
|
@Getter @Setter @Builder.Default private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST;
|
||||||
|
|
||||||
|
@Getter @Setter private ConvolutionLayer.FwdAlgo cudnnFwdAlgo;
|
||||||
|
@Getter @Setter private ConvolutionLayer.BwdFilterAlgo cudnnBwdFilterAlgo;
|
||||||
|
@Getter @Setter private ConvolutionLayer.BwdDataAlgo cudnnBwdDataAlgo;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
||||||
|
* See {@link CNN2DFormat} for more details.<br>
|
||||||
|
* Default: NCHW
|
||||||
|
*
|
||||||
|
* @param format Format for activations (in and out)
|
||||||
|
*/
|
||||||
|
@Builder.Default @Getter @Setter
|
||||||
|
private CNN2DFormat convFormat =
|
||||||
|
CNN2DFormat.NCHW; // default value for legacy serialization reasons
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of parameters this layer has a result of its configuration.
|
||||||
|
*
|
||||||
|
* @return number or parameters
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public long numParams() {
|
||||||
|
var kern = 1;
|
||||||
|
for (int i : getKernelSize()) {
|
||||||
|
kern = kern * i;
|
||||||
|
}
|
||||||
|
return nIn * nOut * kern + (hasBias() ? nOut : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract static class AbstractConvolutionLayerBuilder<
|
||||||
|
C extends AbstractConvolutionLayer, B extends AbstractConvolutionLayerBuilder<C, B>>
|
||||||
|
extends FeedForwardLayerBuilder<C, B> {
|
||||||
|
|
||||||
|
public B kernelSize(int @NonNull ... kernelSize) {
|
||||||
|
if (this.kernelSize != null) {
|
||||||
|
log.warn("You are setting the kernel more than once, last call with override prior calls.");
|
||||||
|
}
|
||||||
|
this.kernelSize = kernelSize;
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
|
||||||
|
public B stride(int @NonNull ... stride) {
|
||||||
|
this.stride = stride;
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
|
||||||
|
public B padding(int @NonNull ... padding) {
|
||||||
|
this.padding = padding;
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
|
||||||
|
public B dilation(int @NonNull ... dilation) {
|
||||||
|
this.dilation = dilation;
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -52,6 +52,16 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class BaseLayerConfiguration extends LayerConfiguration
|
public abstract class BaseLayerConfiguration extends LayerConfiguration
|
||||||
implements ITraininableLayerConfiguration, Serializable, Cloneable {
|
implements ITraininableLayerConfiguration, Serializable, Cloneable {
|
||||||
|
/**
|
||||||
|
* Number of parameters this layer has a result of its configuration. This default implementation
|
||||||
|
* calls {@link #initializer()}.numParams( this ).
|
||||||
|
*
|
||||||
|
* @return number or parameters
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public long numParams() {
|
||||||
|
return initializer().numParams(this);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set constraints to be applied to all layers. Default: no constraints.<br>
|
* Set constraints to be applied to all layers. Default: no constraints.<br>
|
||||||
|
|
|
@ -45,6 +45,7 @@ import org.nd4j.linalg.factory.Nd4j;
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
public class CapsuleLayer extends SameDiffLayer {
|
public class CapsuleLayer extends SameDiffLayer {
|
||||||
|
|
||||||
|
|
||||||
private static final String WEIGHT_PARAM = "weight";
|
private static final String WEIGHT_PARAM = "weight";
|
||||||
private static final String BIAS_PARAM = "bias";
|
private static final String BIAS_PARAM = "bias";
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -36,22 +36,17 @@ import org.deeplearning4j.util.ValidationUtils;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
/*
|
|
||||||
//TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
|
//TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
|
||||||
//Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
|
//Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
|
||||||
* This approach treats a multivariate time series with L timesteps and
|
/**
|
||||||
|
* This approach treats a multivariate time series with L time steps and
|
||||||
* P variables as an L x 1 x P image (L rows high, 1 column wide, P
|
* P variables as an L x 1 x P image (L rows high, 1 column wide, P
|
||||||
* channels deep). The kernel should be H<L pixels high and W=1 pixels
|
* channels deep). The kernel should be H<L pixels high and W=1 pixels
|
||||||
* wide.
|
* wide.
|
||||||
|
*
|
||||||
In 1D CNN, kernel moves in 1 direction.
|
* In 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions.
|
||||||
Input and output data of 1D CNN is 2 dimensional. Mostly used on Time-Series data.
|
* Input and output data of 1D CNN is 2-dimensional. Mostly used on Time-Series data.
|
||||||
|
|
||||||
In 2D CNN, kernel moves in 2 directions.
|
|
||||||
Input and output data of 2D CNN is 3 dimensional. Mostly used on Image data.
|
|
||||||
|
|
||||||
In 3D CNN, kernel moves in 3 directions.
|
|
||||||
Input and output data of 3D CNN is 4 dimensional. Mostly used on 3D Image data (MRI, CT Scans, Video).
|
|
||||||
*/
|
*/
|
||||||
@Data
|
@Data
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
|
@ -223,7 +218,7 @@ public class Convolution1D extends ConvolutionLayer {
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract static class Convolution1DBuilder<
|
public abstract static class Convolution1DBuilder<
|
||||||
C extends ConvolutionLayer, B extends Convolution1DBuilder<C, B>>
|
C extends Convolution1D, B extends Convolution1DBuilder<C, B>>
|
||||||
extends ConvolutionLayerBuilder<C, B> {
|
extends ConvolutionLayerBuilder<C, B> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,250 @@
|
||||||
|
/*
|
||||||
|
* ******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* * This program and the accompanying materials are made available under the
|
||||||
|
* * terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* * https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
* *
|
||||||
|
* * See the NOTICE file distributed with this work for additional
|
||||||
|
* * information regarding copyright ownership.
|
||||||
|
* * Unless required by applicable law or agreed to in writing, software
|
||||||
|
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* * License for the specific language governing permissions and limitations
|
||||||
|
* * under the License.
|
||||||
|
* *
|
||||||
|
* * SPDX-License-Identifier: Apache-2.0
|
||||||
|
* *****************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.deeplearning4j.nn.conf.layers;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
|
import lombok.*;
|
||||||
|
import lombok.experimental.SuperBuilder;
|
||||||
|
import lombok.extern.jackson.Jacksonized;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.deeplearning4j.nn.api.ParamInitializer;
|
||||||
|
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
||||||
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
||||||
|
import org.deeplearning4j.nn.conf.RNNFormat;
|
||||||
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
|
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
|
||||||
|
import org.deeplearning4j.nn.params.ConvolutionNewParamInitializer;
|
||||||
|
import org.deeplearning4j.optimize.api.TrainingListener;
|
||||||
|
import org.deeplearning4j.util.Convolution1DUtils;
|
||||||
|
import org.deeplearning4j.util.ValidationUtils;
|
||||||
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
|
// TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
|
||||||
|
// Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This approach treats a multivariate time series with L time steps and P variables as an L x 1 x P
|
||||||
|
* image (L rows high, 1 column wide, P channels deep). The kernel should be H<L pixels high and W=1
|
||||||
|
* pixels wide.
|
||||||
|
*
|
||||||
|
* <p>In 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions. Input and output data of
|
||||||
|
* 1D CNN is 2-dimensional. Mostly used on Time-Series data.
|
||||||
|
*/
|
||||||
|
@Data
|
||||||
|
@Slf4j
|
||||||
|
@ToString(callSuper = true)
|
||||||
|
@EqualsAndHashCode(callSuper = true)
|
||||||
|
@Jacksonized
|
||||||
|
@SuperBuilder
|
||||||
|
public class Convolution1DNew extends AbstractConvolutionLayer {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
|
||||||
|
* See {@link CNN2DFormat} for more details.<br>
|
||||||
|
* Default: NCHW
|
||||||
|
*
|
||||||
|
* @param format Format for activations (in and out)
|
||||||
|
*/
|
||||||
|
@Builder.Default
|
||||||
|
protected CNN2DFormat dataFormat =
|
||||||
|
CNN2DFormat.NCHW; // default value for legacy serialization reasons
|
||||||
|
|
||||||
|
@Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ParamInitializer initializer() {
|
||||||
|
return ConvolutionNewParamInitializer.getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public org.deeplearning4j.nn.api.Layer instantiate(
|
||||||
|
NeuralNetConfiguration conf,
|
||||||
|
Collection<TrainingListener> trainingListeners,
|
||||||
|
int layerIndex,
|
||||||
|
INDArray layerParamsView,
|
||||||
|
boolean initializeParams,
|
||||||
|
DataType networkDataType) {
|
||||||
|
setNetConfiguration(conf);
|
||||||
|
LayerValidation.assertNInNOutSet("Convolution1D", getName(), layerIndex, getNIn(), getNOut());
|
||||||
|
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||||
|
/*
|
||||||
|
Object ret;
|
||||||
|
try {
|
||||||
|
ret = lconf.getCanConfigure()
|
||||||
|
.getConstructor(LayerConfiguration.class, DataType.class)
|
||||||
|
.newInstance(new Object[] { lconf, networkDataType });
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
|
||||||
|
*/
|
||||||
|
org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer ret =
|
||||||
|
new org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer(lconf, networkDataType);
|
||||||
|
|
||||||
|
ret.addTrainingListeners(trainingListeners);
|
||||||
|
ret.setIndex(layerIndex);
|
||||||
|
ret.setParamsViewArray(layerParamsView);
|
||||||
|
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
|
||||||
|
ret.setParamTable(paramTable);
|
||||||
|
ret.setLayerConfiguration(this);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||||
|
if (inputType == null || inputType.getType() != InputType.Type.RNN) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Invalid input for 1D CNN layer (layer index = "
|
||||||
|
+ layerIndex
|
||||||
|
+ ", layer name = \""
|
||||||
|
+ getName()
|
||||||
|
+ "\"): expect RNN input type with size > 0. Got: "
|
||||||
|
+ inputType);
|
||||||
|
}
|
||||||
|
InputType.InputTypeRecurrent it = (InputType.InputTypeRecurrent) inputType;
|
||||||
|
long inputTsLength = it.getTimeSeriesLength();
|
||||||
|
long outLength;
|
||||||
|
if (inputTsLength < 0) {
|
||||||
|
// Probably: user did InputType.recurrent(x) without specifying sequence length
|
||||||
|
outLength = -1;
|
||||||
|
} else {
|
||||||
|
outLength =
|
||||||
|
Convolution1DUtils.getOutputSize(
|
||||||
|
inputTsLength,
|
||||||
|
getKernelSize()[0],
|
||||||
|
getStride()[0],
|
||||||
|
getPadding()[0],
|
||||||
|
getConvolutionMode(),
|
||||||
|
getDilation()[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return InputType.recurrent(nOut, outLength, rnnDataFormat);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNIn(InputType inputType, boolean override) {
|
||||||
|
if (inputType == null || inputType.getType() != InputType.Type.RNN) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Invalid input for 1D CNN layer (layer name = \""
|
||||||
|
+ getName()
|
||||||
|
+ "\"): expect RNN input type with size > 0. Got: "
|
||||||
|
+ inputType);
|
||||||
|
}
|
||||||
|
|
||||||
|
InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
|
||||||
|
if (nIn <= 0 || override) {
|
||||||
|
this.nIn = r.getSize();
|
||||||
|
}
|
||||||
|
if (this.rnnDataFormat == null || override) this.rnnDataFormat = r.getFormat();
|
||||||
|
|
||||||
|
if (this.dataFormat == null || override)
|
||||||
|
this.dataFormat = rnnDataFormat == RNNFormat.NCW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
|
||||||
|
if (inputType == null) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Invalid input for Convolution1D layer (layer name=\""
|
||||||
|
+ getName()
|
||||||
|
+ "\"): input is null");
|
||||||
|
}
|
||||||
|
|
||||||
|
return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat, getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a report of the estimated memory consumption for the given layer
|
||||||
|
*
|
||||||
|
* @param inputType Input type to the layer. Memory consumption is often a function of the input
|
||||||
|
* type
|
||||||
|
* @return Memory report for the layer
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public LayerMemoryReport getMemoryReport(InputType inputType) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean allowCausal() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class Convolution1DNewBuilderImpl
|
||||||
|
extends Convolution1DNewBuilder<Convolution1DNew, Convolution1DNewBuilderImpl> {
|
||||||
|
public Convolution1DNew build() {
|
||||||
|
Convolution1DNew l = new Convolution1DNew(this);
|
||||||
|
if (l.getDilation() == null) {
|
||||||
|
dilation(1, 1);
|
||||||
|
}
|
||||||
|
if (l.getPadding() == null) {
|
||||||
|
padding(0);
|
||||||
|
}
|
||||||
|
l = new Convolution1DNew(this);
|
||||||
|
|
||||||
|
Convolution1DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()[0]);
|
||||||
|
Convolution1DUtils.validateCnn1DKernelStridePadding(
|
||||||
|
l.getKernelSize()[0], l.getStride()[0], l.getPadding()[0]);
|
||||||
|
l.initializeConstraints();
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract static class Convolution1DNewBuilder<
|
||||||
|
C extends Convolution1DNew, B extends Convolution1DNewBuilder<C, B>>
|
||||||
|
extends AbstractConvolutionLayerBuilder<C, B> {
|
||||||
|
private int dimensions(Class arrayType) {
|
||||||
|
return arrayType.isArray() ? 1 + dimensions(arrayType.getComponentType()) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public B kernelSize(int @NonNull ... kernel) {
|
||||||
|
// Todo, we always provide arrays, but only first element is really used
|
||||||
|
if (dimensions(kernel.getClass()) > 1)
|
||||||
|
log.warn(
|
||||||
|
"Kernel size has '{}' dimensions, only using first dimensions for 1D convolution layer.",
|
||||||
|
dimensions(kernel.getClass()));
|
||||||
|
super.kernelSize(
|
||||||
|
ValidationUtils.validate1NonNegative(new int[] {kernel[0]}, "kernelSize")[0], 1);
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
|
||||||
|
public B padding(int @NonNull ... padding) {
|
||||||
|
// Todo, we always provide arrays, but only first element is really used
|
||||||
|
super.padding(ValidationUtils.validate1NonNegative(new int[] {padding[0]}, "padding"));
|
||||||
|
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
|
||||||
|
public B dilation(int @NonNull ... dilation) {
|
||||||
|
// Todo, we always provide arrays, but only first element is really used
|
||||||
|
super.dilation(ValidationUtils.validate1NonNegative(new int[] {dilation[0]}, "dilation"));
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
|
||||||
|
public B stride(int @NonNull ... stride) {
|
||||||
|
// Todo, we always provide arrays, but only first element is really used
|
||||||
|
super.stride(ValidationUtils.validate1NonNegative(new int[] {stride[0]}, "stride")[0], 1);
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -61,6 +61,23 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@SuperBuilder(builderMethodName = "innerBuilder")
|
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||||
public class ConvolutionLayer extends FeedForwardLayer {
|
public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
|
|
||||||
|
public static ConvolutionLayerBuilder<?, ?> builder() {
|
||||||
|
return innerBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
|
||||||
|
return innerBuilder().kernelSize(kernelSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ConvolutionLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
|
||||||
|
return innerBuilder().kernelSize(kernelSize).stride(stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ConvolutionLayerBuilder<?, ?> builder(
|
||||||
|
int[] kernelSize, int[] stride, int[] padding) {
|
||||||
|
return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding);
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Size of the convolution rows/columns
|
* Size of the convolution rows/columns
|
||||||
*
|
*
|
||||||
|
@ -122,23 +139,6 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
@Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter
|
@Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter
|
||||||
private boolean defaultValueOverriden = false;
|
private boolean defaultValueOverriden = false;
|
||||||
|
|
||||||
public static ConvolutionLayerBuilder<?, ?> builder() {
|
|
||||||
return innerBuilder();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
|
|
||||||
return innerBuilder().kernelSize(kernelSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static ConvolutionLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
|
|
||||||
return innerBuilder().kernelSize(kernelSize).stride(stride);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static ConvolutionLayerBuilder<?, ?> builder(
|
|
||||||
int[] kernelSize, int[] stride, int[] padding) {
|
|
||||||
return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasBias() {
|
public boolean hasBias() {
|
||||||
return hasBias;
|
return hasBias;
|
||||||
}
|
}
|
||||||
|
@ -429,6 +429,7 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
private static final class ConvolutionLayerBuilderImpl
|
private static final class ConvolutionLayerBuilderImpl
|
||||||
extends ConvolutionLayerBuilder<ConvolutionLayer, ConvolutionLayerBuilderImpl> {
|
extends ConvolutionLayerBuilder<ConvolutionLayer, ConvolutionLayerBuilderImpl> {
|
||||||
public ConvolutionLayer build() {
|
public ConvolutionLayer build() {
|
||||||
|
@ -473,6 +474,6 @@ public class ConvolutionLayer extends FeedForwardLayer {
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,56 +38,24 @@ import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of filters to be used
|
* Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of
|
||||||
* in the net or in other words the channels The builder specifies the filter/kernel size, the stride and padding
|
* filters to be used in the net or in other words the channels The builder specifies the
|
||||||
* The pooling layer takes the kernel size
|
* filter/kernel size, the stride and padding The pooling layer takes the kernel size
|
||||||
*/
|
*/
|
||||||
@Data
|
@Data
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder")
|
@Jacksonized
|
||||||
|
@SuperBuilder
|
||||||
public class Deconvolution2D extends ConvolutionLayer {
|
public class Deconvolution2D extends ConvolutionLayer {
|
||||||
|
|
||||||
|
@Builder.Default private CNN2DFormat format = CNN2DFormat.NCHW;
|
||||||
|
|
||||||
@Builder.Default
|
|
||||||
private CNN2DFormat format = CNN2DFormat.NCHW;
|
|
||||||
protected boolean allowCausal() {
|
protected boolean allowCausal() {
|
||||||
//Causal convolution - allowed for 1D only
|
// Causal convolution - allowed for 1D only
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class Deconvolution2DBuilderImpl extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
|
|
||||||
public Deconvolution2D build() {
|
|
||||||
Deconvolution2D l = new Deconvolution2D(this);
|
|
||||||
l.initializeConstraints();
|
|
||||||
return l;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
public static abstract class Deconvolution2DBuilder<C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>> extends ConvolutionLayerBuilder<C, B> {
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public B kernelSize(int... kernelSize) {
|
|
||||||
super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
|
|
||||||
return self();
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public B stride(int... stride) {
|
|
||||||
super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
|
|
||||||
return self();
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public B padding(int... padding) {
|
|
||||||
super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
|
|
||||||
return self();
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public B dilation(int... dilation) {
|
|
||||||
super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
|
|
||||||
return self();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
public boolean hasBias() {
|
public boolean hasBias() {
|
||||||
return isHasBias();
|
return isHasBias();
|
||||||
}
|
}
|
||||||
|
@ -96,20 +64,25 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
|
||||||
public Deconvolution2D clone() {
|
public Deconvolution2D clone() {
|
||||||
Deconvolution2D clone = (Deconvolution2D) super.clone();
|
Deconvolution2D clone = (Deconvolution2D) super.clone();
|
||||||
if (clone.getKernelSize() != null) {
|
if (clone.getKernelSize() != null) {
|
||||||
clone.setKernelSize( clone.getKernelSize().clone());
|
clone.setKernelSize(clone.getKernelSize().clone());
|
||||||
}
|
}
|
||||||
if (clone.getStride() != null) {
|
if (clone.getStride() != null) {
|
||||||
clone.setStride( clone.getStride().clone());
|
clone.setStride(clone.getStride().clone());
|
||||||
}
|
}
|
||||||
if (clone.getPadding() != null) {
|
if (clone.getPadding() != null) {
|
||||||
clone.setPadding( clone.getPadding().clone());
|
clone.setPadding(clone.getPadding().clone());
|
||||||
}
|
}
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
|
public Layer instantiate(
|
||||||
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
|
NeuralNetConfiguration conf,
|
||||||
|
Collection<TrainingListener> trainingListeners,
|
||||||
|
int layerIndex,
|
||||||
|
INDArray layerParamsView,
|
||||||
|
boolean initializeParams,
|
||||||
|
DataType networkDataType) {
|
||||||
setNetConfiguration(conf);
|
setNetConfiguration(conf);
|
||||||
LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut());
|
LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut());
|
||||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||||
|
@ -135,13 +108,61 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
|
||||||
@Override
|
@Override
|
||||||
public InputType getOutputType(int layerIndex, InputType inputType) {
|
public InputType getOutputType(int layerIndex, InputType inputType) {
|
||||||
if (inputType == null || inputType.getType() != InputType.Type.CNN) {
|
if (inputType == null || inputType.getType() != InputType.Type.CNN) {
|
||||||
throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getName()
|
throw new IllegalStateException(
|
||||||
+ "\"): Expected CNN input, got " + inputType);
|
"Invalid input for Convolution layer (layer name=\""
|
||||||
|
+ getName()
|
||||||
|
+ "\"): Expected CNN input, got "
|
||||||
|
+ inputType);
|
||||||
}
|
}
|
||||||
|
|
||||||
return InputTypeUtil.getOutputTypeDeconvLayer(inputType, getKernelSize(), getStride(), getPadding(), getDilation(), getConvolutionMode(),
|
return InputTypeUtil.getOutputTypeDeconvLayer(
|
||||||
nOut, layerIndex, getName(), Deconvolution2DLayer.class);
|
inputType,
|
||||||
|
getKernelSize(),
|
||||||
|
getStride(),
|
||||||
|
getPadding(),
|
||||||
|
getDilation(),
|
||||||
|
getConvolutionMode(),
|
||||||
|
nOut,
|
||||||
|
layerIndex,
|
||||||
|
getName(),
|
||||||
|
Deconvolution2DLayer.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final class Deconvolution2DBuilderImpl
|
||||||
|
extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
|
||||||
|
public Deconvolution2D build() {
|
||||||
|
Deconvolution2D l = new Deconvolution2D(this);
|
||||||
|
l.initializeConstraints();
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract static class Deconvolution2DBuilder<
|
||||||
|
C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>>
|
||||||
|
extends ConvolutionLayerBuilder<C, B> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public B kernelSize(int... kernelSize) {
|
||||||
|
super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public B stride(int... stride) {
|
||||||
|
super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public B padding(int... padding) {
|
||||||
|
super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public B dilation(int... dilation) {
|
||||||
|
super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
|
||||||
|
return self();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,6 +63,7 @@ public class DenseLayer extends FeedForwardLayer {
|
||||||
LayerValidation.assertNInNOutSet(
|
LayerValidation.assertNInNOutSet(
|
||||||
"DenseLayerConfiguration", getName(), layerIndex, getNIn(), getNOut());
|
"DenseLayerConfiguration", getName(), layerIndex, getNIn(), getNOut());
|
||||||
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
|
||||||
|
lconf.setNetConfiguration(conf);
|
||||||
runInheritance();
|
runInheritance();
|
||||||
|
|
||||||
org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret =
|
org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret =
|
||||||
|
|
|
@ -31,6 +31,7 @@ import lombok.experimental.SuperBuilder;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import net.brutex.ai.dnn.api.ILayerConfiguration;
|
import net.brutex.ai.dnn.api.ILayerConfiguration;
|
||||||
import net.brutex.ai.dnn.api.LayerType;
|
import net.brutex.ai.dnn.api.LayerType;
|
||||||
|
import org.deeplearning4j.nn.api.Layer;
|
||||||
import org.deeplearning4j.nn.api.ParamInitializer;
|
import org.deeplearning4j.nn.api.ParamInitializer;
|
||||||
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
import org.deeplearning4j.nn.api.layers.LayerConstraint;
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
||||||
|
@ -56,7 +57,7 @@ import org.nd4j.linalg.learning.regularization.Regularization;
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
// @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id")
|
// @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id")
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@SuperBuilder
|
@SuperBuilder(toBuilder = true)
|
||||||
public abstract class LayerConfiguration
|
public abstract class LayerConfiguration
|
||||||
implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration
|
implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration
|
||||||
|
|
||||||
|
@ -66,10 +67,20 @@ public abstract class LayerConfiguration
|
||||||
@Getter @Setter protected List<LayerConstraint> biasConstraints;
|
@Getter @Setter protected List<LayerConstraint> biasConstraints;
|
||||||
@Getter @Setter protected List<LayerConstraint> constraints;
|
@Getter @Setter protected List<LayerConstraint> constraints;
|
||||||
@Getter @Setter protected IWeightNoise weightNoise;
|
@Getter @Setter protected IWeightNoise weightNoise;
|
||||||
@Builder.Default private @Getter @Setter LinkedHashSet<String> variables = new LinkedHashSet<>();
|
@Builder.Default private @Getter @Setter @NonNull LinkedHashSet<String> variables = new LinkedHashSet<>();
|
||||||
@Getter @Setter private IDropout dropOut;
|
@Getter @Setter private IDropout dropOut;
|
||||||
/** The type of the layer, basically defines the base class and its properties */
|
/** The type of the layer, basically defines the base class and its properties */
|
||||||
@Builder.Default @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN;
|
@Builder.Default @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of parameters this layer has a result of its configuration
|
||||||
|
* @return number or parameters
|
||||||
|
*/
|
||||||
|
public long numParams() {
|
||||||
|
return initializer().numParams(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A reference to the neural net configuration. This field is excluded from json serialization as
|
* A reference to the neural net configuration. This field is excluded from json serialization as
|
||||||
* well as from equals check to avoid circular referenced.
|
* well as from equals check to avoid circular referenced.
|
||||||
|
|
|
@ -22,6 +22,8 @@ package org.deeplearning4j.nn.conf.layers;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import lombok.*;
|
import lombok.*;
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import lombok.extern.jackson.Jacksonized;
|
import lombok.extern.jackson.Jacksonized;
|
||||||
|
@ -59,10 +61,12 @@ public class LocallyConnected1D extends SameDiffLayer {
|
||||||
/**
|
/**
|
||||||
* @param nIn Number of inputs to the layer (input size)
|
* @param nIn Number of inputs to the layer (input size)
|
||||||
*/
|
*/
|
||||||
|
@JsonProperty("nIn")
|
||||||
private long nIn;
|
private long nIn;
|
||||||
/**
|
/**
|
||||||
* @param nOut Number of outputs (output size)
|
* @param nOut Number of outputs (output size)
|
||||||
*/
|
*/
|
||||||
|
@JsonProperty("nOut")
|
||||||
private long nOut;
|
private long nOut;
|
||||||
/**
|
/**
|
||||||
* @param activation Activation function for the layer
|
* @param activation Activation function for the layer
|
||||||
|
|
|
@ -34,6 +34,16 @@ import org.nd4j.linalg.learning.regularization.Regularization;
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class NoParamLayer extends LayerConfiguration {
|
public abstract class NoParamLayer extends LayerConfiguration {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of parameters this layer. This will always return 0
|
||||||
|
*
|
||||||
|
* @return 0
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public long numParams() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ParamInitializer initializer() {
|
public ParamInitializer initializer() {
|
||||||
return EmptyParamInitializer.getInstance();
|
return EmptyParamInitializer.getInstance();
|
||||||
|
@ -58,6 +68,7 @@ public abstract class NoParamLayer extends LayerConfiguration {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Will always return no-Op updater.
|
* Will always return no-Op updater.
|
||||||
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
@ -65,7 +76,7 @@ public abstract class NoParamLayer extends LayerConfiguration {
|
||||||
return Updater.NONE.getIUpdaterWithDefaultConfig();
|
return Updater.NONE.getIUpdaterWithDefaultConfig();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static abstract class NoParamLayerBuilder<C extends NoParamLayer, B extends NoParamLayerBuilder<C,B>>
|
public abstract static class NoParamLayerBuilder<
|
||||||
extends LayerConfigurationBuilder<C,B>
|
C extends NoParamLayer, B extends NoParamLayerBuilder<C, B>>
|
||||||
{}
|
extends LayerConfigurationBuilder<C, B> {}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import lombok.EqualsAndHashCode;
|
import lombok.EqualsAndHashCode;
|
||||||
|
import lombok.NonNull;
|
||||||
import lombok.ToString;
|
import lombok.ToString;
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import lombok.extern.jackson.Jacksonized;
|
import lombok.extern.jackson.Jacksonized;
|
||||||
|
@ -35,6 +36,7 @@ import org.deeplearning4j.optimize.api.TrainingListener;
|
||||||
import org.deeplearning4j.util.Convolution1DUtils;
|
import org.deeplearning4j.util.Convolution1DUtils;
|
||||||
import org.deeplearning4j.util.Convolution2DUtils;
|
import org.deeplearning4j.util.Convolution2DUtils;
|
||||||
import org.deeplearning4j.util.ValidationUtils;
|
import org.deeplearning4j.util.ValidationUtils;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.nd4j.linalg.api.buffer.DataType;
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
|
||||||
|
@ -50,9 +52,91 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
@ToString(callSuper = true)
|
@ToString(callSuper = true)
|
||||||
@EqualsAndHashCode(callSuper = true)
|
@EqualsAndHashCode(callSuper = true)
|
||||||
@Jacksonized
|
@Jacksonized
|
||||||
@SuperBuilder
|
@SuperBuilder(builderMethodName = "innerBuilder")
|
||||||
public class Subsampling1DLayer extends SubsamplingLayer {
|
public class Subsampling1DLayer extends SubsamplingLayer {
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder() {
|
||||||
|
return innerBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) {
|
||||||
|
return innerBuilder()
|
||||||
|
.poolingType(poolingType);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType) {
|
||||||
|
return innerBuilder()
|
||||||
|
.poolingType(poolingType.toPoolingType());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(int... kernelSize) {
|
||||||
|
return innerBuilder()
|
||||||
|
.kernelSize(kernelSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
|
||||||
|
return innerBuilder()
|
||||||
|
.kernelSize(kernelSize)
|
||||||
|
.stride(stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride, int[] padding) {
|
||||||
|
return innerBuilder()
|
||||||
|
.kernelSize(kernelSize)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) {
|
||||||
|
return innerBuilder()
|
||||||
|
.poolingType(poolingType.toPoolingType())
|
||||||
|
.kernelSize(kernelSize)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) {
|
||||||
|
return innerBuilder()
|
||||||
|
.poolingType(poolingType)
|
||||||
|
.kernelSize(kernelSize)
|
||||||
|
.stride(stride)
|
||||||
|
.padding(padding)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) {
|
||||||
|
return innerBuilder()
|
||||||
|
.poolingType(poolingType)
|
||||||
|
.kernelSize(kernelSize)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize) {
|
||||||
|
return innerBuilder()
|
||||||
|
.poolingType(poolingType.toPoolingType())
|
||||||
|
.kernelSize(kernelSize)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride) {
|
||||||
|
return innerBuilder()
|
||||||
|
.poolingType(poolingType)
|
||||||
|
.kernelSize(kernelSize)
|
||||||
|
.stride(stride)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize, int[] stride) {
|
||||||
|
return innerBuilder()
|
||||||
|
.poolingType(poolingType.toPoolingType())
|
||||||
|
.kernelSize(kernelSize)
|
||||||
|
.stride(stride)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public org.deeplearning4j.nn.api.Layer instantiate(
|
public org.deeplearning4j.nn.api.Layer instantiate(
|
||||||
NeuralNetConfiguration conf,
|
NeuralNetConfiguration conf,
|
||||||
|
@ -176,20 +260,20 @@ public class Subsampling1DLayer extends SubsamplingLayer {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public B kernelSize(int... kernelSize) {
|
public B kernelSize(int @NonNull ... kernelSize) {
|
||||||
super.kernelSize( ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]);
|
super.kernelSize(ValidationUtils.validate1NonNegative(new int[]{kernelSize[0]}, "kernelSize")[0]); //fix width = 1
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public B stride(int... stride) {
|
public B stride(@NotNull int... stride) {
|
||||||
super.stride( ValidationUtils.validate1NonNegative(stride, "stride")[0]);
|
super.stride( ValidationUtils.validate1NonNegative(new int[]{stride[0]}, "stride")[0]);
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public B padding(int... padding) {
|
public B padding(@NotNull int... padding) {
|
||||||
super.padding( ValidationUtils.validate1NonNegative(padding, "padding")[0]);
|
super.padding( ValidationUtils.validate1NonNegative(new int[]{padding[0]}, "padding"));
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,10 +27,7 @@ import lombok.*;
|
||||||
import lombok.experimental.SuperBuilder;
|
import lombok.experimental.SuperBuilder;
|
||||||
import lombok.extern.jackson.Jacksonized;
|
import lombok.extern.jackson.Jacksonized;
|
||||||
import org.deeplearning4j.nn.api.ParamInitializer;
|
import org.deeplearning4j.nn.api.ParamInitializer;
|
||||||
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
import org.deeplearning4j.nn.conf.*;
|
||||||
import org.deeplearning4j.nn.conf.ConvolutionMode;
|
|
||||||
import org.deeplearning4j.nn.conf.InputPreProcessor;
|
|
||||||
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
|
||||||
import org.deeplearning4j.nn.conf.inputs.InputType;
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
||||||
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
|
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
|
||||||
import org.deeplearning4j.nn.conf.memory.MemoryReport;
|
import org.deeplearning4j.nn.conf.memory.MemoryReport;
|
||||||
|
@ -84,6 +81,7 @@ public class SubsamplingLayer extends NoParamLayer {
|
||||||
* @param padding padding in the height and width dimensions
|
* @param padding padding in the height and width dimensions
|
||||||
*/
|
*/
|
||||||
@Builder.Default protected int[] padding = new int[] {0, 0};
|
@Builder.Default protected int[] padding = new int[] {0, 0};
|
||||||
|
|
||||||
protected int pnorm;
|
protected int pnorm;
|
||||||
@Builder.Default protected double eps = 1e-8;
|
@Builder.Default protected double eps = 1e-8;
|
||||||
/**
|
/**
|
||||||
|
@ -104,6 +102,7 @@ public class SubsamplingLayer extends NoParamLayer {
|
||||||
*/
|
*/
|
||||||
protected @Builder.Default CNN2DFormat dataFormat =
|
protected @Builder.Default CNN2DFormat dataFormat =
|
||||||
CNN2DFormat.NCHW; // default value for legacy reasons
|
CNN2DFormat.NCHW; // default value for legacy reasons
|
||||||
|
protected @Builder.Default RNNFormat rnnFormat = RNNFormat.NCW;
|
||||||
/**
|
/**
|
||||||
* When doing average pooling, should the padding values be included in the divisor or not?<br>
|
* When doing average pooling, should the padding values be included in the divisor or not?<br>
|
||||||
* Not applicable for max and p-norm pooling.<br>
|
* Not applicable for max and p-norm pooling.<br>
|
||||||
|
@ -127,6 +126,7 @@ public class SubsamplingLayer extends NoParamLayer {
|
||||||
* average pooling
|
* average pooling
|
||||||
*/
|
*/
|
||||||
@Builder.Default protected boolean avgPoolIncludePadInDivisor = true;
|
@Builder.Default protected boolean avgPoolIncludePadInDivisor = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated
|
* Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated
|
||||||
* convolutions, which are also known as atrous convolutions.<br>
|
* convolutions, which are also known as atrous convolutions.<br>
|
||||||
|
@ -301,7 +301,7 @@ public class SubsamplingLayer extends NoParamLayer {
|
||||||
public void setNIn(InputType inputType, boolean override) {
|
public void setNIn(InputType inputType, boolean override) {
|
||||||
// No op: subsampling layer doesn't have nIn value
|
// No op: subsampling layer doesn't have nIn value
|
||||||
if (!defaultValueOverridden || override) {
|
if (!defaultValueOverridden || override) {
|
||||||
this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat();
|
this.rnnFormat = ((InputType.InputTypeRecurrent) inputType).getFormat();
|
||||||
defaultValueOverridden = true;
|
defaultValueOverridden = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -355,14 +355,6 @@ public class SubsamplingLayer extends NoParamLayer {
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getPnorm() {
|
|
||||||
return pnorm;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getEps() {
|
|
||||||
return eps;
|
|
||||||
}
|
|
||||||
|
|
||||||
public enum PoolingType {
|
public enum PoolingType {
|
||||||
MAX,
|
MAX,
|
||||||
AVG,
|
AVG,
|
||||||
|
@ -394,33 +386,33 @@ public class SubsamplingLayer extends NoParamLayer {
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
public B eps(int eps) {
|
public B eps(double eps) {
|
||||||
ValidationUtils.validateNonNegative(eps, "eps");
|
ValidationUtils.validateNonNegative(eps, "eps");
|
||||||
this.eps$value = eps;
|
this.eps$value = eps;
|
||||||
this.eps$set = true;
|
this.eps$set = true;
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
public B kernelSize(int... kernelSize) {
|
public B kernelSize(int @NonNull... kernelSize) {
|
||||||
this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize");
|
this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize");
|
||||||
this.kernelSize$set = true;
|
this.kernelSize$set = true;
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
public B stride(int... stride) {
|
public B stride(int @NonNull ... stride) {
|
||||||
this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride");
|
this.stride$value = ValidationUtils.validate2NonNegative(stride, true, "stride");
|
||||||
this.stride$set = true;
|
this.stride$set = true;
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
public B padding(int... padding) {
|
public B padding(int @NonNull ... padding) {
|
||||||
this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding");
|
this.padding$value = ValidationUtils.validate2NonNegative(padding, true, "padding");
|
||||||
this.padding$set = true;
|
this.padding$set = true;
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
||||||
public B dilation(int... dilation) {
|
public B dilation(int @NonNull ... dilation) {
|
||||||
this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation");
|
this.dilation$value = ValidationUtils.validate2NonNegative(dilation, true, "dilation");
|
||||||
this.dilation$set = true;
|
this.dilation$set = true;
|
||||||
return self();
|
return self();
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,6 +74,7 @@ public class FrozenLayer extends LayerConfiguration {
|
||||||
boolean initializeParams,
|
boolean initializeParams,
|
||||||
DataType networkDataType) {
|
DataType networkDataType) {
|
||||||
|
|
||||||
|
innerConfiguration.setNetConfiguration(conf);
|
||||||
// Need to be able to instantiate a layer, from a config - for JSON -> net type situations
|
// Need to be able to instantiate a layer, from a config - for JSON -> net type situations
|
||||||
org.deeplearning4j.nn.api.Layer underlying =
|
org.deeplearning4j.nn.api.Layer underlying =
|
||||||
innerConfiguration.instantiate(
|
innerConfiguration.instantiate(
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
package org.deeplearning4j.nn.conf.layers.samediff;
|
package org.deeplearning4j.nn.conf.layers.samediff;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -52,7 +53,8 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
|
||||||
@EqualsAndHashCode(callSuper = true, doNotUseGetters = true)
|
@EqualsAndHashCode(callSuper = true, doNotUseGetters = true)
|
||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
@SuperBuilder
|
@SuperBuilder
|
||||||
public abstract class AbstractSameDiffLayer extends LayerConfiguration {
|
public abstract class AbstractSameDiffLayer extends LayerConfiguration
|
||||||
|
implements org.deeplearning4j.nn.api.ITraininableLayerConfiguration {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The regularization for the parameters (excluding biases) - for example {@link WeightDecay}
|
* The regularization for the parameters (excluding biases) - for example {@link WeightDecay}
|
||||||
|
@ -63,16 +65,14 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
|
||||||
* @param regularization Regularization to apply for the network parameters/weights (excluding
|
* @param regularization Regularization to apply for the network parameters/weights (excluding
|
||||||
* biases)
|
* biases)
|
||||||
*/
|
*/
|
||||||
@Getter
|
@Getter protected List<Regularization> regularization;
|
||||||
protected List<Regularization> regularization;
|
|
||||||
/**
|
/**
|
||||||
* The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the
|
* The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the
|
||||||
* regularization for the biases only - for example {@link WeightDecay}
|
* regularization for the biases only - for example {@link WeightDecay}
|
||||||
*
|
*
|
||||||
* @param regularizationBias Regularization to apply for the network biases only
|
* @param regularizationBias Regularization to apply for the network biases only
|
||||||
*/
|
*/
|
||||||
@Getter
|
@Getter protected List<Regularization> regularizationBias;
|
||||||
protected List<Regularization> regularizationBias;
|
|
||||||
/**
|
/**
|
||||||
* Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
|
* Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
|
||||||
* org.nd4j.linalg.learning.config.Nesterovs}
|
* org.nd4j.linalg.learning.config.Nesterovs}
|
||||||
|
@ -87,20 +87,22 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
|
||||||
* @param biasUpdater Updater to use for bias parameters
|
* @param biasUpdater Updater to use for bias parameters
|
||||||
*/
|
*/
|
||||||
protected @Getter @Setter IUpdater biasUpdater;
|
protected @Getter @Setter IUpdater biasUpdater;
|
||||||
@Getter @Setter
|
|
||||||
protected GradientNormalization gradientNormalization;
|
@Getter @Setter protected GradientNormalization gradientNormalization;
|
||||||
@Getter @Setter
|
@Getter @Setter protected double gradientNormalizationThreshold = Double.NaN;
|
||||||
protected double gradientNormalizationThreshold = Double.NaN;
|
@Getter @Setter private SDLayerParams layerParams;
|
||||||
@Getter @Setter
|
|
||||||
private SDLayerParams layerParams;
|
@Getter @Setter private DataType dataType;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void runInheritance(@NotNull NeuralNetConfiguration conf) {
|
public void runInheritance(@NotNull NeuralNetConfiguration conf) {
|
||||||
super.runInheritance(conf);
|
super.runInheritance(conf);
|
||||||
if (this.biasUpdater == null ) this.biasUpdater = conf.getBiasUpdater();
|
if (this.biasUpdater == null) this.biasUpdater = conf.getBiasUpdater();
|
||||||
if (this.updater == null) this.updater = conf.getUpdater();
|
if (this.updater == null) this.updater = conf.getUpdater();
|
||||||
if (this.regularizationBias == null || regularizationBias.isEmpty()) this.regularizationBias = conf.getRegularizationBias();
|
if (this.regularizationBias == null || regularizationBias.isEmpty())
|
||||||
if (this.regularization == null || regularization.isEmpty()) this.regularization = conf.getRegularization();
|
this.regularizationBias = conf.getRegularizationBias();
|
||||||
|
if (this.regularization == null || regularization.isEmpty())
|
||||||
|
this.regularization = conf.getRegularization();
|
||||||
// if( this.weightInit == null) this.weightInit = conf.getWeightInit();
|
// if( this.weightInit == null) this.weightInit = conf.getWeightInit();
|
||||||
if (this.gradientNormalization == null)
|
if (this.gradientNormalization == null)
|
||||||
this.gradientNormalization = conf.getGradientNormalization();
|
this.gradientNormalization = conf.getGradientNormalization();
|
||||||
|
@ -109,6 +111,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
|
||||||
this.gradientNormalizationThreshold = conf.getGradientNormalizationThreshold();
|
this.gradientNormalizationThreshold = conf.getGradientNormalizationThreshold();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<Regularization> getRegularizationByParam(String paramName) {
|
public List<Regularization> getRegularizationByParam(String paramName) {
|
||||||
if (layerParams.isWeightParam(paramName)) {
|
if (layerParams.isWeightParam(paramName)) {
|
||||||
|
@ -119,6 +122,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonIgnore
|
||||||
public SDLayerParams getLayerParams() {
|
public SDLayerParams getLayerParams() {
|
||||||
if (layerParams == null) {
|
if (layerParams == null) {
|
||||||
layerParams = new SDLayerParams();
|
layerParams = new SDLayerParams();
|
||||||
|
@ -138,7 +142,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String,
|
* Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String,
|
||||||
* long...)} and {@link SDLayerParams#addBiasParam(String, long...)}
|
* long...)} and {@link SDLayerParams#addBiasParam(String, long...)}
|
||||||
|
@ -207,7 +210,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
|
||||||
fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array);
|
fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method generates an "all ones" mask array for use in the SameDiff model when none is
|
* This method generates an "all ones" mask array for use in the SameDiff model when none is
|
||||||
* provided.
|
* provided.
|
||||||
|
|
|
@ -52,8 +52,8 @@ import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
|
||||||
/** A layer with input and output, no parameters or gradients */
|
/** A layer with input and output, no parameters or gradients */
|
||||||
@NoArgsConstructor(force = true)
|
@NoArgsConstructor(force = true)
|
||||||
@Slf4j
|
@Slf4j
|
||||||
//@JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id")
|
// @JsonIdentityInfo(generator = ObjectIdGenerators.IntSequenceGenerator.class, property = "@id")
|
||||||
//@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class")
|
// @JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, property = "__class")
|
||||||
public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> implements Layer {
|
public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> implements Layer {
|
||||||
|
|
||||||
private final @Getter List<String> variables = new ArrayList<>();
|
private final @Getter List<String> variables = new ArrayList<>();
|
||||||
|
@ -80,10 +80,8 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
|
||||||
protected DataType dataType;
|
protected DataType dataType;
|
||||||
protected @Getter @Setter int iterationCount;
|
protected @Getter @Setter int iterationCount;
|
||||||
protected @Getter @Setter int epochCount;
|
protected @Getter @Setter int epochCount;
|
||||||
@JsonIgnore
|
@JsonIgnore private @Getter @Setter IModel net;
|
||||||
private @Getter @Setter IModel net;
|
@JsonIgnore @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration;
|
||||||
@JsonIgnore
|
|
||||||
@Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration;
|
|
||||||
|
|
||||||
public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) {
|
public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) {
|
||||||
//noinspection unchecked
|
//noinspection unchecked
|
||||||
|
@ -95,19 +93,18 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
|
||||||
}
|
}
|
||||||
this.dataType = dataType;
|
this.dataType = dataType;
|
||||||
if (layerConfiguration.getNetConfiguration() == null) {
|
if (layerConfiguration.getNetConfiguration() == null) {
|
||||||
throw new RuntimeException("You cannot create a layer from a layer configuration, that is not part of any neural network configuration.");
|
throw new RuntimeException(
|
||||||
|
"You cannot create a layer from a layer configuration, that is not part of any neural network configuration.");
|
||||||
}
|
}
|
||||||
this.net = layerConfiguration.getNetConfiguration().getNet();
|
this.net = layerConfiguration.getNetConfiguration().getNet();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addTrainingListeners(TrainingListener... listeners) {
|
public void addTrainingListeners(TrainingListener... listeners) {
|
||||||
if(listeners != null)
|
if (listeners != null) trainingListeners.addAll(List.of(listeners));
|
||||||
trainingListeners.addAll(List.of(listeners));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addTrainingListeners(Collection<TrainingListener> listeners) {
|
public void addTrainingListeners(Collection<TrainingListener> listeners) {
|
||||||
if(listeners != null)
|
if (listeners != null) trainingListeners.addAll(listeners);
|
||||||
trainingListeners.addAll(listeners);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -471,7 +468,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getInputMiniBatchSize() {
|
public int getInputMiniBatchSize() {
|
||||||
if(input==null) return 0;
|
if (input == null) return 0;
|
||||||
return (int) input.size(0);
|
return (int) input.size(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -565,8 +562,9 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void setParamTable(Map<String, INDArray> paramTable) {
|
public void setParamTable(Map<String, INDArray> paramTable) {
|
||||||
log.warn("Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName());
|
log.warn(
|
||||||
//throw new RuntimeException("Not implemented");
|
"Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName());
|
||||||
|
// throw new RuntimeException("Not implemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -578,7 +576,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Map<String, INDArray> getParamTable(boolean isBackprop) {
|
public Map<String, INDArray> getParamTable(boolean isBackprop) {
|
||||||
// throw new RuntimeException("Not implemented");
|
// throw new RuntimeException("Not implemented");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -590,7 +588,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public INDArray getParams() {
|
public INDArray getParams() {
|
||||||
//throw new RuntimeException("Not implemented");
|
// throw new RuntimeException("Not implemented");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -662,6 +662,7 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>
|
||||||
*/
|
*/
|
||||||
public boolean hasBias() {
|
public boolean hasBias() {
|
||||||
// Overridden by layers supporting no bias mode: dense, output, convolutional, embedding
|
// Overridden by layers supporting no bias mode: dense, output, convolutional, embedding
|
||||||
|
//return true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||||
import org.deeplearning4j.nn.api.ITraininableLayerConfiguration;
|
import org.deeplearning4j.nn.api.ITraininableLayerConfiguration;
|
||||||
import org.deeplearning4j.nn.api.Layer;
|
import org.deeplearning4j.nn.api.Layer;
|
||||||
import org.deeplearning4j.nn.conf.CacheMode;
|
import org.deeplearning4j.nn.conf.CacheMode;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration;
|
||||||
import org.deeplearning4j.nn.conf.misc.DummyConfig;
|
import org.deeplearning4j.nn.conf.misc.DummyConfig;
|
||||||
import org.deeplearning4j.nn.gradient.DefaultGradient;
|
import org.deeplearning4j.nn.gradient.DefaultGradient;
|
||||||
import org.deeplearning4j.nn.gradient.Gradient;
|
import org.deeplearning4j.nn.gradient.Gradient;
|
||||||
|
@ -88,6 +89,8 @@ public class FrozenLayer extends BaseWrapperLayer {
|
||||||
return underlying.activate(input, false, workspaceMgr);
|
return underlying.activate(input, false, workspaceMgr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void fit() {
|
public void fit() {
|
||||||
if (!logFit) {
|
if (!logFit) {
|
||||||
|
|
|
@ -51,21 +51,26 @@ public class Convolution1DLayer extends ConvolutionLayer {
|
||||||
super(conf, dataType);
|
super(conf, dataType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
public Pair<Gradient, INDArray> backpropGradient(
|
||||||
|
INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
||||||
assertInputSet(true);
|
assertInputSet(true);
|
||||||
if (epsilon.rank() != 3)
|
if (epsilon.rank() != 3)
|
||||||
throw new DL4JInvalidInputException("Got rank " + epsilon.rank()
|
throw new DL4JInvalidInputException(
|
||||||
|
"Got rank "
|
||||||
|
+ epsilon.rank()
|
||||||
+ " array as epsilon for Convolution1D backprop with shape "
|
+ " array as epsilon for Convolution1D backprop with shape "
|
||||||
+ Arrays.toString(epsilon.shape())
|
+ Arrays.toString(epsilon.shape())
|
||||||
+ ". Expected rank 3 array with shape [minibatchSize, features, length]. " + layerId());
|
+ ". Expected rank 3 array with shape [minibatchSize, features, length]. "
|
||||||
Pair<INDArray,INDArray> fwd = preOutput(false,true,workspaceMgr);
|
+ layerId());
|
||||||
|
Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr);
|
||||||
IActivation afn = getTypedLayerConfiguration().getActivationFn();
|
IActivation afn = getTypedLayerConfiguration().getActivationFn();
|
||||||
INDArray delta = afn.backprop(fwd.getFirst(), epsilon).getFirst(); //TODO handle activation function params
|
INDArray delta =
|
||||||
|
afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params
|
||||||
|
|
||||||
Convolution1D c = getTypedLayerConfiguration();
|
org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration();
|
||||||
Conv1DConfig conf = Conv1DConfig.builder()
|
Conv1DConfig conf =
|
||||||
|
Conv1DConfig.builder()
|
||||||
.k(c.getKernelSize()[0])
|
.k(c.getKernelSize()[0])
|
||||||
.s(c.getStride()[0])
|
.s(c.getStride()[0])
|
||||||
.d(c.getDilation()[0])
|
.d(c.getDilation()[0])
|
||||||
|
@ -74,41 +79,48 @@ public class Convolution1DLayer extends ConvolutionLayer {
|
||||||
.paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
|
.paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
|
INDArray w =
|
||||||
getParam(ConvolutionParamInitializer.WEIGHT_KEY),
|
Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
|
||||||
RNNFormat.NCW);
|
getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
|
||||||
|
|
||||||
INDArray[] inputArrs;
|
INDArray[] inputArrs;
|
||||||
INDArray[] outputArrs;
|
INDArray[] outputArrs;
|
||||||
INDArray wg = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
|
INDArray wg =
|
||||||
gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
|
Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
|
||||||
getRnnDataFormat());
|
gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat());
|
||||||
INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
|
INDArray epsOut =
|
||||||
|
workspaceMgr.createUninitialized(
|
||||||
|
ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
|
||||||
INDArray input = this.input.castTo(dataType);
|
INDArray input = this.input.castTo(dataType);
|
||||||
if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
|
if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
|
||||||
input = input.permute(0,2,1); //NHWC to NCHW
|
input = input.permute(0, 2, 1); // NHWC to NCHW
|
||||||
}
|
}
|
||||||
|
|
||||||
if(getTypedLayerConfiguration().hasBias()) {
|
if (getTypedLayerConfiguration().hasBias()) {
|
||||||
INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
|
INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
|
||||||
b = b.reshape(b.length());
|
b = b.reshape(b.length());
|
||||||
inputArrs = new INDArray[]{input, w, b, delta};
|
inputArrs = new INDArray[] {input, w, b, delta};
|
||||||
INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
|
INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
|
||||||
bg = bg.reshape(bg.length());
|
bg = bg.reshape(bg.length());
|
||||||
outputArrs = new INDArray[]{epsOut, wg, bg};
|
outputArrs = new INDArray[] {epsOut, wg, bg};
|
||||||
} else {
|
} else {
|
||||||
inputArrs = new INDArray[]{input, w, delta};
|
inputArrs = new INDArray[] {input, w, delta};
|
||||||
outputArrs = new INDArray[]{epsOut, wg};
|
outputArrs = new INDArray[] {epsOut, wg};
|
||||||
}
|
}
|
||||||
|
|
||||||
Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
|
Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
|
||||||
Nd4j.exec(op);
|
Nd4j.exec(op);
|
||||||
|
|
||||||
Gradient retGradient = new DefaultGradient();
|
Gradient retGradient = new DefaultGradient();
|
||||||
if(getTypedLayerConfiguration().hasBias()) {
|
if (getTypedLayerConfiguration().hasBias()) {
|
||||||
retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
|
retGradient.setGradientFor(
|
||||||
|
ConvolutionParamInitializer.BIAS_KEY,
|
||||||
|
gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
|
||||||
}
|
}
|
||||||
retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), 'c');
|
retGradient.setGradientFor(
|
||||||
|
ConvolutionParamInitializer.WEIGHT_KEY,
|
||||||
|
gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
|
||||||
|
'c');
|
||||||
if (getRnnDataFormat() == RNNFormat.NWC) {
|
if (getRnnDataFormat() == RNNFormat.NWC) {
|
||||||
epsOut = epsOut.permute(0, 2, 1);
|
epsOut = epsOut.permute(0, 2, 1);
|
||||||
}
|
}
|
||||||
|
@ -116,8 +128,9 @@ public class Convolution1DLayer extends ConvolutionLayer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Pair<INDArray, INDArray> preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
|
protected Pair<INDArray, INDArray> preOutput4d(
|
||||||
Pair<INDArray,INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
|
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
|
||||||
|
Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
|
||||||
INDArray p3d = preOutput.getFirst();
|
INDArray p3d = preOutput.getFirst();
|
||||||
INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
|
INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
|
||||||
preOutput.setFirst(p);
|
preOutput.setFirst(p);
|
||||||
|
@ -125,16 +138,18 @@ public class Convolution1DLayer extends ConvolutionLayer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Pair<INDArray,INDArray> preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
|
protected Pair<INDArray, INDArray> preOutput(
|
||||||
|
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
|
||||||
assertInputSet(false);
|
assertInputSet(false);
|
||||||
|
|
||||||
INDArray input = this.input.castTo(dataType);
|
INDArray input = this.input.castTo(dataType);
|
||||||
if(getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
|
if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
|
||||||
input = input.permute(0,2,1); //NHWC to NCHW
|
input = input.permute(0, 2, 1); // NHWC to NCHW
|
||||||
}
|
}
|
||||||
|
|
||||||
Convolution1D c = getTypedLayerConfiguration();
|
org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration();
|
||||||
Conv1DConfig conf = Conv1DConfig.builder()
|
Conv1DConfig conf =
|
||||||
|
Conv1DConfig.builder()
|
||||||
.k(c.getKernelSize()[0])
|
.k(c.getKernelSize()[0])
|
||||||
.s(c.getStride()[0])
|
.s(c.getStride()[0])
|
||||||
.d(c.getDilation()[0])
|
.d(c.getDilation()[0])
|
||||||
|
@ -143,19 +158,17 @@ public class Convolution1DLayer extends ConvolutionLayer {
|
||||||
.paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
|
.paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
INDArray w =
|
||||||
INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
|
Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
|
||||||
getParam(ConvolutionParamInitializer.WEIGHT_KEY)
|
getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
|
||||||
,RNNFormat.NCW);
|
|
||||||
|
|
||||||
|
|
||||||
INDArray[] inputs;
|
INDArray[] inputs;
|
||||||
if(getTypedLayerConfiguration().hasBias()) {
|
if (getTypedLayerConfiguration().hasBias()) {
|
||||||
INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
|
INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
|
||||||
b = b.reshape(b.length());
|
b = b.reshape(b.length());
|
||||||
inputs = new INDArray[]{input, w, b};
|
inputs = new INDArray[] {input, w, b};
|
||||||
} else {
|
} else {
|
||||||
inputs = new INDArray[]{input, w};
|
inputs = new INDArray[] {input, w};
|
||||||
}
|
}
|
||||||
|
|
||||||
Conv1D op = new Conv1D(inputs, null, conf);
|
Conv1D op = new Conv1D(inputs, null, conf);
|
||||||
|
@ -164,46 +177,58 @@ public class Convolution1DLayer extends ConvolutionLayer {
|
||||||
Nd4j.exec(op);
|
Nd4j.exec(op);
|
||||||
INDArray output = op.getOutputArgument(0);
|
INDArray output = op.getOutputArgument(0);
|
||||||
|
|
||||||
if(getRnnDataFormat() == RNNFormat.NWC) {
|
if (getRnnDataFormat() == RNNFormat.NWC) {
|
||||||
output = output.permute(0,2,1);
|
output = output.permute(0, 2, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Pair<>(output, null);
|
return new Pair<>(output, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
|
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
|
||||||
INDArray act4d = super.activate(training, workspaceMgr);
|
INDArray act4d = super.activate(training, workspaceMgr);
|
||||||
INDArray act3d = act4d.rank() > 3 ?
|
INDArray act3d =
|
||||||
act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
|
act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
|
||||||
|
|
||||||
if(maskArray != null) {
|
if (maskArray != null) {
|
||||||
INDArray maskOut = feedForwardMaskArray(maskArray, MaskState.Active, (int)act3d.size(0)).getFirst();
|
INDArray maskOut =
|
||||||
Preconditions.checkState(act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
|
feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst();
|
||||||
|
Preconditions.checkState(
|
||||||
|
act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
|
||||||
"Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
|
"Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
|
||||||
act3d.shape(), maskOut.shape());
|
act3d.shape(),
|
||||||
|
maskOut.shape());
|
||||||
Broadcast.mul(act3d, maskOut, act3d, 0, 2);
|
Broadcast.mul(act3d, maskOut, act3d, 0, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, act3d); //Should be zero copy most of the time
|
return workspaceMgr.leverageTo(
|
||||||
|
ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
|
public Pair<INDArray, MaskState> feedForwardMaskArray(
|
||||||
int minibatchSize) {
|
INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
|
||||||
INDArray reduced = Convolution2DUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0],
|
INDArray reduced =
|
||||||
getTypedLayerConfiguration().getStride()[0], getTypedLayerConfiguration().getPadding()[0], getTypedLayerConfiguration().getDilation()[0],
|
Convolution2DUtils.cnn1dMaskReduction(
|
||||||
|
maskArray,
|
||||||
|
getTypedLayerConfiguration().getKernelSize()[0],
|
||||||
|
getTypedLayerConfiguration().getStride()[0],
|
||||||
|
getTypedLayerConfiguration().getPadding()[0],
|
||||||
|
getTypedLayerConfiguration().getDilation()[0],
|
||||||
getTypedLayerConfiguration().getConvolutionMode());
|
getTypedLayerConfiguration().getConvolutionMode());
|
||||||
return new Pair<>(reduced, currentMaskState);
|
return new Pair<>(reduced, currentMaskState);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private RNNFormat getRnnDataFormat() {
|
||||||
public Convolution1D getTypedLayerConfiguration() {
|
|
||||||
return (Convolution1D)layerConfiguration;
|
|
||||||
}
|
|
||||||
|
|
||||||
private RNNFormat getRnnDataFormat(){
|
|
||||||
return getTypedLayerConfiguration().getRnnDataFormat();
|
return getTypedLayerConfiguration().getRnnDataFormat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Convolution1D getTypedLayerConfiguration() {
|
||||||
|
return (Convolution1D) super.getTypedLayerConfiguration();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,226 @@
|
||||||
|
/*
|
||||||
|
* ******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* * This program and the accompanying materials are made available under the
|
||||||
|
* * terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* * https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
* *
|
||||||
|
* * See the NOTICE file distributed with this work for additional
|
||||||
|
* * information regarding copyright ownership.
|
||||||
|
* * Unless required by applicable law or agreed to in writing, software
|
||||||
|
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* * License for the specific language governing permissions and limitations
|
||||||
|
* * under the License.
|
||||||
|
* *
|
||||||
|
* * SPDX-License-Identifier: Apache-2.0
|
||||||
|
* *****************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.deeplearning4j.nn.layers.convolution;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import org.deeplearning4j.exception.DL4JInvalidInputException;
|
||||||
|
import org.deeplearning4j.nn.api.MaskState;
|
||||||
|
import org.deeplearning4j.nn.conf.RNNFormat;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
||||||
|
import org.deeplearning4j.nn.gradient.DefaultGradient;
|
||||||
|
import org.deeplearning4j.nn.gradient.Gradient;
|
||||||
|
import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
|
||||||
|
import org.deeplearning4j.nn.workspace.ArrayType;
|
||||||
|
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
|
||||||
|
import org.deeplearning4j.util.Convolution1DUtils;
|
||||||
|
import org.deeplearning4j.util.Convolution2DUtils;
|
||||||
|
import org.nd4j.common.base.Preconditions;
|
||||||
|
import org.nd4j.common.primitives.Pair;
|
||||||
|
import org.nd4j.linalg.activations.IActivation;
|
||||||
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1D;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1DDerivative;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv1DConfig;
|
||||||
|
import org.nd4j.linalg.api.shape.LongShapeDescriptor;
|
||||||
|
import org.nd4j.linalg.factory.Broadcast;
|
||||||
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
|
|
||||||
|
public class Convolution1DNewLayer<Layer_ConfT extends Convolution1DNew>
|
||||||
|
extends ConvolutionNewLayer<Layer_ConfT> {
|
||||||
|
public Convolution1DNewLayer(LayerConfiguration conf, DataType dataType) {
|
||||||
|
super(conf, dataType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pair<Gradient, INDArray> backpropGradient(
|
||||||
|
INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
||||||
|
assertInputSet(true);
|
||||||
|
if (epsilon.rank() != 3)
|
||||||
|
throw new DL4JInvalidInputException(
|
||||||
|
"Got rank "
|
||||||
|
+ epsilon.rank()
|
||||||
|
+ " array as epsilon for Convolution1D backprop with shape "
|
||||||
|
+ Arrays.toString(epsilon.shape())
|
||||||
|
+ ". Expected rank 3 array with shape [minibatchSize, features, length]. "
|
||||||
|
+ layerId());
|
||||||
|
Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr);
|
||||||
|
IActivation afn = getTypedLayerConfiguration().getActivationFn();
|
||||||
|
INDArray delta =
|
||||||
|
afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params
|
||||||
|
|
||||||
|
Convolution1DNew c = getTypedLayerConfiguration();
|
||||||
|
Conv1DConfig conf =
|
||||||
|
Conv1DConfig.builder()
|
||||||
|
.k(c.getKernelSize()[0])
|
||||||
|
.s(c.getStride()[0])
|
||||||
|
.d(c.getDilation()[0])
|
||||||
|
.p(c.getPadding()[0])
|
||||||
|
.dataFormat(Conv1DConfig.NCW)
|
||||||
|
.paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
INDArray w =
|
||||||
|
Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
|
||||||
|
getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
|
||||||
|
|
||||||
|
INDArray[] inputArrs;
|
||||||
|
INDArray[] outputArrs;
|
||||||
|
INDArray wg =
|
||||||
|
Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
|
||||||
|
gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat());
|
||||||
|
INDArray epsOut =
|
||||||
|
workspaceMgr.createUninitialized(
|
||||||
|
ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
|
||||||
|
INDArray input = this.input.castTo(dataType);
|
||||||
|
if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
|
||||||
|
input = input.permute(0, 2, 1); // NHWC to NCHW
|
||||||
|
}
|
||||||
|
|
||||||
|
if (getTypedLayerConfiguration().hasBias()) {
|
||||||
|
INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
|
||||||
|
b = b.reshape(b.length());
|
||||||
|
inputArrs = new INDArray[] {input, w, b, delta};
|
||||||
|
INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
|
||||||
|
bg = bg.reshape(bg.length());
|
||||||
|
outputArrs = new INDArray[] {epsOut, wg, bg};
|
||||||
|
} else {
|
||||||
|
inputArrs = new INDArray[] {input, w, delta};
|
||||||
|
outputArrs = new INDArray[] {epsOut, wg};
|
||||||
|
}
|
||||||
|
|
||||||
|
Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
|
||||||
|
Nd4j.exec(op);
|
||||||
|
|
||||||
|
Gradient retGradient = new DefaultGradient();
|
||||||
|
if (getTypedLayerConfiguration().hasBias()) {
|
||||||
|
retGradient.setGradientFor(
|
||||||
|
ConvolutionParamInitializer.BIAS_KEY,
|
||||||
|
gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
|
||||||
|
}
|
||||||
|
retGradient.setGradientFor(
|
||||||
|
ConvolutionParamInitializer.WEIGHT_KEY,
|
||||||
|
gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
|
||||||
|
'c');
|
||||||
|
if (getRnnDataFormat() == RNNFormat.NWC) {
|
||||||
|
epsOut = epsOut.permute(0, 2, 1);
|
||||||
|
}
|
||||||
|
return new Pair<>(retGradient, epsOut);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Pair<INDArray, INDArray> preOutput4d(
|
||||||
|
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
|
||||||
|
Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
|
||||||
|
INDArray p3d = preOutput.getFirst();
|
||||||
|
INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
|
||||||
|
preOutput.setFirst(p);
|
||||||
|
return preOutput;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Pair<INDArray, INDArray> preOutput(
|
||||||
|
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
|
||||||
|
assertInputSet(false);
|
||||||
|
|
||||||
|
INDArray input = this.input.castTo(dataType);
|
||||||
|
if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
|
||||||
|
input = input.permute(0, 2, 1); // NHWC to NCHW
|
||||||
|
}
|
||||||
|
|
||||||
|
Convolution1DNew c = getTypedLayerConfiguration();
|
||||||
|
Conv1DConfig conf =
|
||||||
|
Conv1DConfig.builder()
|
||||||
|
.k(c.getKernelSize()[0])
|
||||||
|
.s(c.getStride()[0])
|
||||||
|
.d(c.getDilation()[0])
|
||||||
|
.p(c.getPadding()[0])
|
||||||
|
.dataFormat(Conv1DConfig.NCW)
|
||||||
|
.paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
INDArray w =
|
||||||
|
Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
|
||||||
|
getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
|
||||||
|
|
||||||
|
INDArray[] inputs;
|
||||||
|
if (getTypedLayerConfiguration().hasBias()) {
|
||||||
|
INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
|
||||||
|
b = b.reshape(b.length());
|
||||||
|
inputs = new INDArray[] {input, w, b};
|
||||||
|
} else {
|
||||||
|
inputs = new INDArray[] {input, w};
|
||||||
|
}
|
||||||
|
|
||||||
|
Conv1D op = new Conv1D(inputs, null, conf);
|
||||||
|
List<LongShapeDescriptor> outShape = op.calculateOutputShape();
|
||||||
|
op.setOutputArgument(0, Nd4j.create(outShape.get(0), false));
|
||||||
|
Nd4j.exec(op);
|
||||||
|
INDArray output = op.getOutputArgument(0);
|
||||||
|
|
||||||
|
if (getRnnDataFormat() == RNNFormat.NWC) {
|
||||||
|
output = output.permute(0, 2, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Pair<>(output, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
|
||||||
|
INDArray act4d = super.activate(training, workspaceMgr);
|
||||||
|
INDArray act3d =
|
||||||
|
act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
|
||||||
|
|
||||||
|
if (maskArray != null) {
|
||||||
|
INDArray maskOut =
|
||||||
|
feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst();
|
||||||
|
Preconditions.checkState(
|
||||||
|
act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
|
||||||
|
"Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
|
||||||
|
act3d.shape(),
|
||||||
|
maskOut.shape());
|
||||||
|
Broadcast.mul(act3d, maskOut, act3d, 0, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return workspaceMgr.leverageTo(
|
||||||
|
ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pair<INDArray, MaskState> feedForwardMaskArray(
|
||||||
|
INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
|
||||||
|
INDArray reduced =
|
||||||
|
Convolution2DUtils.cnn1dMaskReduction(
|
||||||
|
maskArray,
|
||||||
|
getTypedLayerConfiguration().getKernelSize()[0],
|
||||||
|
getTypedLayerConfiguration().getStride()[0],
|
||||||
|
getTypedLayerConfiguration().getPadding()[0],
|
||||||
|
getTypedLayerConfiguration().getDilation()[0],
|
||||||
|
getTypedLayerConfiguration().getConvolutionMode());
|
||||||
|
return new Pair<>(reduced, currentMaskState);
|
||||||
|
}
|
||||||
|
|
||||||
|
private RNNFormat getRnnDataFormat() {
|
||||||
|
return getTypedLayerConfiguration().getRnnDataFormat();
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,753 @@
|
||||||
|
/*
|
||||||
|
* ******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* * This program and the accompanying materials are made available under the
|
||||||
|
* * terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* * https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
* *
|
||||||
|
* * See the NOTICE file distributed with this work for additional
|
||||||
|
* * information regarding copyright ownership.
|
||||||
|
* * Unless required by applicable law or agreed to in writing, software
|
||||||
|
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* * License for the specific language governing permissions and limitations
|
||||||
|
* * under the License.
|
||||||
|
* *
|
||||||
|
* * SPDX-License-Identifier: Apache-2.0
|
||||||
|
* *****************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.deeplearning4j.nn.layers.convolution;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.deeplearning4j.common.config.DL4JClassLoading;
|
||||||
|
import org.deeplearning4j.exception.DL4JInvalidInputException;
|
||||||
|
import org.deeplearning4j.nn.api.MaskState;
|
||||||
|
import org.deeplearning4j.nn.conf.CNN2DFormat;
|
||||||
|
import org.deeplearning4j.nn.conf.CacheMode;
|
||||||
|
import org.deeplearning4j.nn.conf.ConvolutionMode;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
||||||
|
import org.deeplearning4j.nn.gradient.DefaultGradient;
|
||||||
|
import org.deeplearning4j.nn.gradient.Gradient;
|
||||||
|
import org.deeplearning4j.nn.layers.BaseLayer;
|
||||||
|
import org.deeplearning4j.nn.layers.LayerHelper;
|
||||||
|
import org.deeplearning4j.nn.layers.mkldnn.MKLDNNConvHelper;
|
||||||
|
import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
|
||||||
|
import org.deeplearning4j.nn.workspace.ArrayType;
|
||||||
|
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
|
||||||
|
import org.deeplearning4j.util.Convolution2DUtils;
|
||||||
|
import org.nd4j.common.primitives.Pair;
|
||||||
|
import org.nd4j.linalg.activations.IActivation;
|
||||||
|
import org.nd4j.linalg.api.buffer.DataType;
|
||||||
|
import org.nd4j.linalg.api.memory.MemoryWorkspace;
|
||||||
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
import org.nd4j.linalg.api.shape.Shape;
|
||||||
|
import org.nd4j.linalg.convolution.Convolution;
|
||||||
|
import org.nd4j.linalg.exception.ND4JArraySizeException;
|
||||||
|
import org.nd4j.linalg.exception.ND4JOpProfilerException;
|
||||||
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class ConvolutionNewLayer<
|
||||||
|
LayerConf_T extends org.deeplearning4j.nn.conf.layers.Convolution1DNew>
|
||||||
|
extends BaseLayer<org.deeplearning4j.nn.conf.layers.Convolution1DNew> {
|
||||||
|
|
||||||
|
protected INDArray i2d;
|
||||||
|
protected ConvolutionHelper helper = null;
|
||||||
|
protected int helperCountFail = 0;
|
||||||
|
protected ConvolutionMode convolutionMode;
|
||||||
|
protected transient INDArray dummyBias; // Used only when: hasBias == false AND helpers are used
|
||||||
|
protected transient INDArray dummyBiasGrad; // As above
|
||||||
|
|
||||||
|
|
||||||
|
public ConvolutionNewLayer(LayerConfiguration conf, DataType dataType) {
|
||||||
|
super(conf, dataType);
|
||||||
|
initializeHelper();
|
||||||
|
if (conf instanceof Convolution1DNew) {
|
||||||
|
convolutionMode = ((Convolution1DNew) conf).getConvolutionMode();
|
||||||
|
} else if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) {
|
||||||
|
convolutionMode =
|
||||||
|
((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void initializeHelper() {
|
||||||
|
String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend");
|
||||||
|
if ("CUDA".equalsIgnoreCase(backend)) {
|
||||||
|
helper =
|
||||||
|
DL4JClassLoading.createNewInstance(
|
||||||
|
"org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper",
|
||||||
|
ConvolutionHelper.class,
|
||||||
|
dataType);
|
||||||
|
log.debug("CudnnConvolutionHelper successfully initialized");
|
||||||
|
if (!helper.checkSupported()) {
|
||||||
|
helper = null;
|
||||||
|
}
|
||||||
|
} else if ("CPU".equalsIgnoreCase(backend)) {
|
||||||
|
helper = new MKLDNNConvHelper(dataType);
|
||||||
|
log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (helper != null && !helper.checkSupported()) {
|
||||||
|
log.debug("Removed helper {} as not supported", helper.getClass());
|
||||||
|
helper = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Type type() {
|
||||||
|
return Type.CONVOLUTIONAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Convolution1DNew getTypedLayerConfiguration() {
|
||||||
|
return super.getTypedLayerConfiguration();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pair<Gradient, INDArray> backpropGradient(
|
||||||
|
INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
||||||
|
assertInputSet(true);
|
||||||
|
INDArray weights =
|
||||||
|
getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr);
|
||||||
|
INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr);
|
||||||
|
|
||||||
|
INDArray input = this.input.castTo(dataType); // No op if correct type
|
||||||
|
if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType);
|
||||||
|
|
||||||
|
INDArray origInput = input;
|
||||||
|
INDArray origEps = epsilon;
|
||||||
|
if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) {
|
||||||
|
input = input.permute(0, 3, 1, 2); // NHWC to NCHW
|
||||||
|
epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW
|
||||||
|
}
|
||||||
|
|
||||||
|
long miniBatch = input.size(0);
|
||||||
|
int inH = (int) input.size(2);
|
||||||
|
int inW = (int) input.size(3);
|
||||||
|
|
||||||
|
long outDepth = weights.size(0);
|
||||||
|
long inDepth = weights.size(1);
|
||||||
|
int kH = (int) weights.size(2);
|
||||||
|
int kW = (int) weights.size(3);
|
||||||
|
|
||||||
|
int[] dilation = getTypedLayerConfiguration().getDilation();
|
||||||
|
int[] kernel = getTypedLayerConfiguration().getKernelSize();
|
||||||
|
int[] strides = getTypedLayerConfiguration().getStride();
|
||||||
|
int[] pad;
|
||||||
|
int[] outSize;
|
||||||
|
if (convolutionMode == ConvolutionMode.Same) {
|
||||||
|
outSize =
|
||||||
|
Convolution2DUtils.getOutputSize(
|
||||||
|
input,
|
||||||
|
kernel,
|
||||||
|
strides,
|
||||||
|
null,
|
||||||
|
convolutionMode,
|
||||||
|
dilation,
|
||||||
|
CNN2DFormat.NCHW); // Also performs validation
|
||||||
|
pad =
|
||||||
|
Convolution2DUtils.getSameModeTopLeftPadding(
|
||||||
|
outSize, new int[] {inH, inW}, kernel, strides, dilation);
|
||||||
|
} else {
|
||||||
|
pad = getTypedLayerConfiguration().getPadding();
|
||||||
|
outSize =
|
||||||
|
Convolution2DUtils.getOutputSize(
|
||||||
|
input,
|
||||||
|
kernel,
|
||||||
|
strides,
|
||||||
|
pad,
|
||||||
|
convolutionMode,
|
||||||
|
dilation,
|
||||||
|
CNN2DFormat.NCHW); // Also performs validation
|
||||||
|
}
|
||||||
|
|
||||||
|
int outH = outSize[0];
|
||||||
|
int outW = outSize[1];
|
||||||
|
|
||||||
|
INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
|
||||||
|
INDArray weightGradView =
|
||||||
|
gradientViews.get(
|
||||||
|
ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW]
|
||||||
|
INDArray weightGradView2df =
|
||||||
|
Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false)
|
||||||
|
.transpose();
|
||||||
|
|
||||||
|
INDArray delta;
|
||||||
|
IActivation afn = getTypedLayerConfiguration().getActivationFn();
|
||||||
|
|
||||||
|
Pair<INDArray, INDArray> p = preOutput4d(true, true, workspaceMgr);
|
||||||
|
INDArray z = p.getFirst();
|
||||||
|
CNN2DFormat f = getTypedLayerConfiguration().getConvFormat();
|
||||||
|
if (f != CNN2DFormat.NCHW) {
|
||||||
|
z = z.permute(0, 3, 1, 2); // NHWC to NCHW
|
||||||
|
}
|
||||||
|
delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params
|
||||||
|
|
||||||
|
if (helper != null
|
||||||
|
&& (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
|
||||||
|
INDArray helperDelta = delta;
|
||||||
|
if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC)
|
||||||
|
helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC
|
||||||
|
|
||||||
|
if (!hasBias() && !(helper instanceof MKLDNNConvHelper)) {
|
||||||
|
// MKL-DNN supports no bias, CuDNN doesn't
|
||||||
|
if (dummyBiasGrad == null) {
|
||||||
|
try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
|
||||||
|
dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
biasGradView = dummyBiasGrad;
|
||||||
|
}
|
||||||
|
|
||||||
|
Pair<Gradient, INDArray> ret = null;
|
||||||
|
try {
|
||||||
|
ret =
|
||||||
|
helper.backpropGradient(
|
||||||
|
origInput,
|
||||||
|
weights,
|
||||||
|
bias,
|
||||||
|
helperDelta,
|
||||||
|
kernel,
|
||||||
|
strides,
|
||||||
|
pad,
|
||||||
|
biasGradView,
|
||||||
|
weightGradView,
|
||||||
|
afn,
|
||||||
|
getTypedLayerConfiguration().getCudnnAlgoMode(),
|
||||||
|
getTypedLayerConfiguration().getCudnnBwdFilterAlgo(),
|
||||||
|
getTypedLayerConfiguration().getCudnnBwdDataAlgo(),
|
||||||
|
convolutionMode,
|
||||||
|
dilation,
|
||||||
|
getTypedLayerConfiguration().getConvFormat(),
|
||||||
|
workspaceMgr);
|
||||||
|
} catch (ND4JOpProfilerException e) {
|
||||||
|
throw e; // NaN panic etc for debugging
|
||||||
|
} catch (Exception e) {
|
||||||
|
if (e.getMessage().contains("Failed to allocate")) {
|
||||||
|
// This is a memory exception - don't fallback to built-in implementation
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
|
||||||
|
helperCountFail++;
|
||||||
|
if (helper instanceof MKLDNNConvHelper) {
|
||||||
|
log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
|
||||||
|
} else {
|
||||||
|
log.warn("CuDNN execution failed - falling back on built-in implementation", e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException(
|
||||||
|
"Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false",
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret != null) {
|
||||||
|
// Backprop dropout, if present
|
||||||
|
INDArray gradPostDropout = ret.getRight();
|
||||||
|
gradPostDropout = backpropDropOutIfPresent(gradPostDropout);
|
||||||
|
ret.setSecond(gradPostDropout);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW]
|
||||||
|
|
||||||
|
// Note: due to the permute in preOut, and the fact that we essentially do a
|
||||||
|
// preOut.muli(epsilon), this reshape
|
||||||
|
// should be zero-copy; only possible exception being sometimes with the "identity" activation
|
||||||
|
// case
|
||||||
|
INDArray delta2d =
|
||||||
|
delta.reshape('c', outDepth, miniBatch * outH * outW); // Shape.newShapeNoCopy(delta,new
|
||||||
|
// int[]{outDepth,miniBatch*outH*outW},false);
|
||||||
|
|
||||||
|
// Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input
|
||||||
|
// [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
|
||||||
|
// To get this: create an array of the order we want, permute it to the order required by im2col
|
||||||
|
// implementation, and then do im2col on that
|
||||||
|
// to get old order from required order: permute(0,3,4,5,1,2)
|
||||||
|
INDArray im2col2d =
|
||||||
|
p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not
|
||||||
|
if (im2col2d == null) {
|
||||||
|
INDArray col =
|
||||||
|
Nd4j.createUninitialized(
|
||||||
|
dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
|
||||||
|
INDArray col2 = col.permute(0, 3, 4, 5, 1, 2);
|
||||||
|
Convolution.im2col(
|
||||||
|
input,
|
||||||
|
kH,
|
||||||
|
kW,
|
||||||
|
strides[0],
|
||||||
|
strides[1],
|
||||||
|
pad[0],
|
||||||
|
pad[1],
|
||||||
|
dilation[0],
|
||||||
|
dilation[1],
|
||||||
|
convolutionMode == ConvolutionMode.Same,
|
||||||
|
col2);
|
||||||
|
// Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape
|
||||||
|
im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate weight gradients, using cc->c mmul.
|
||||||
|
// weightGradView2df is f order, but this is because it's transposed from c order
|
||||||
|
// Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c
|
||||||
|
// order, not usual f order
|
||||||
|
Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0);
|
||||||
|
|
||||||
|
// Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally
|
||||||
|
// in c order for some reason)
|
||||||
|
INDArray wPermuted =
|
||||||
|
weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order
|
||||||
|
INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth);
|
||||||
|
|
||||||
|
// Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format
|
||||||
|
// before col2im reduction)
|
||||||
|
// Note: cc -> f mmul here, then reshape to 6d in f order
|
||||||
|
INDArray epsNext2d =
|
||||||
|
w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array?
|
||||||
|
INDArray eps6d =
|
||||||
|
Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true);
|
||||||
|
|
||||||
|
// Calculate epsilonNext by doing im2col reduction.
|
||||||
|
// Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW]
|
||||||
|
// currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first
|
||||||
|
eps6d = eps6d.permute(5, 2, 1, 0, 4, 3);
|
||||||
|
INDArray epsNextOrig =
|
||||||
|
workspaceMgr.createUninitialized(
|
||||||
|
ArrayType.ACTIVATION_GRAD,
|
||||||
|
eps6d.dataType(),
|
||||||
|
new long[] {inDepth, miniBatch, inH, inW},
|
||||||
|
'c');
|
||||||
|
|
||||||
|
// Note: we are execute col2im in a way that the output array should be used in a stride 1 muli
|
||||||
|
// in the layer below... (same strides as zs/activations)
|
||||||
|
INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3);
|
||||||
|
Convolution.col2im(
|
||||||
|
eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]);
|
||||||
|
|
||||||
|
Gradient retGradient = new DefaultGradient();
|
||||||
|
if (getTypedLayerConfiguration().hasBias()) {
|
||||||
|
delta2d.sum(biasGradView, 1); // biasGradView is initialized/zeroed first in sum op
|
||||||
|
retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView);
|
||||||
|
}
|
||||||
|
retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c');
|
||||||
|
|
||||||
|
weightNoiseParams.clear();
|
||||||
|
|
||||||
|
epsNext = backpropDropOutIfPresent(epsNext);
|
||||||
|
|
||||||
|
if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) {
|
||||||
|
epsNext = epsNext.permute(0, 2, 3, 1); // NCHW to NHWC
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Pair<>(retGradient, epsNext);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain
|
||||||
|
* their standard non-4d preOutput method, while overriding this to return 4d activations (for use
|
||||||
|
* in backprop) without modifying the public API
|
||||||
|
*/
|
||||||
|
protected Pair<INDArray, INDArray> preOutput4d(
|
||||||
|
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
|
||||||
|
return preOutput(training, forBackprop, workspaceMgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void validateInputRank() {
|
||||||
|
// Input validation: expect rank 4 matrix
|
||||||
|
if (input.rank() != 4) {
|
||||||
|
String layerName = layerConfiguration.getName();
|
||||||
|
if (layerName == null) layerName = "(not named)";
|
||||||
|
throw new DL4JInvalidInputException(
|
||||||
|
"Got rank "
|
||||||
|
+ input.rank()
|
||||||
|
+ " array as input to ConvolutionLayer (layer name = "
|
||||||
|
+ layerName
|
||||||
|
+ ", layer index = "
|
||||||
|
+ index
|
||||||
|
+ ") with shape "
|
||||||
|
+ Arrays.toString(input.shape())
|
||||||
|
+ ". "
|
||||||
|
+ "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]."
|
||||||
|
+ (input.rank() == 2
|
||||||
|
? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)"
|
||||||
|
: "")
|
||||||
|
+ " "
|
||||||
|
+ layerId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void validateInputDepth(long inDepth) {
|
||||||
|
CNN2DFormat format = getTypedLayerConfiguration().getConvFormat();
|
||||||
|
int dim = format == CNN2DFormat.NHWC ? 3 : 1;
|
||||||
|
if (input.size(dim) != inDepth) {
|
||||||
|
String layerName = layerConfiguration.getName();
|
||||||
|
if (layerName == null) layerName = "(not named)";
|
||||||
|
|
||||||
|
String s =
|
||||||
|
"Cannot do forward pass in Convolution layer (layer name = "
|
||||||
|
+ layerName
|
||||||
|
+ ", layer index = "
|
||||||
|
+ index
|
||||||
|
+ "): input array channels does not match CNN layer configuration"
|
||||||
|
+ " (data format = "
|
||||||
|
+ format
|
||||||
|
+ ", data input channels = "
|
||||||
|
+ input.size(dim)
|
||||||
|
+ ", "
|
||||||
|
+ getTypedLayerConfiguration().getConvFormat().dimensionNames()
|
||||||
|
+ "="
|
||||||
|
+ Arrays.toString(input.shape())
|
||||||
|
+ "; expected"
|
||||||
|
+ " input channels = "
|
||||||
|
+ inDepth
|
||||||
|
+ ") "
|
||||||
|
+ layerId();
|
||||||
|
|
||||||
|
int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3;
|
||||||
|
if (input.size(dimIfWrongFormat) == inDepth) {
|
||||||
|
// User might have passed NCHW data to a NHWC net, or vice versa?
|
||||||
|
s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new DL4JInvalidInputException(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PreOutput method that also returns the im2col2d array (if being called for backprop), as this
|
||||||
|
* can be re-used instead of being calculated again.
|
||||||
|
*
|
||||||
|
* @param training Train or test time (impacts dropout)
|
||||||
|
* @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return
|
||||||
|
* null for second pair entry. Note that it may still be null in the case of CuDNN and the
|
||||||
|
* like.
|
||||||
|
* @return Pair of arrays: preOutput (activations) and optionally the im2col2d array
|
||||||
|
*/
|
||||||
|
protected Pair<INDArray, INDArray> preOutput(
|
||||||
|
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
|
||||||
|
assertInputSet(false);
|
||||||
|
INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr);
|
||||||
|
INDArray weights =
|
||||||
|
getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr);
|
||||||
|
|
||||||
|
validateInputRank();
|
||||||
|
|
||||||
|
INDArray input = this.input.castTo(dataType);
|
||||||
|
INDArray inputOrig = input;
|
||||||
|
if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
|
||||||
|
input = input.permute(0, 3, 1, 2).dup(); // NHWC to NCHW
|
||||||
|
}
|
||||||
|
|
||||||
|
long miniBatch = input.size(0);
|
||||||
|
long outDepth = weights.size(0);
|
||||||
|
long inDepth = weights.size(1);
|
||||||
|
validateInputDepth(inDepth);
|
||||||
|
|
||||||
|
long kH = weights.size(2);
|
||||||
|
long kW = weights.size(3);
|
||||||
|
|
||||||
|
int[] dilation = getTypedLayerConfiguration().getDilation();
|
||||||
|
int[] kernel = getTypedLayerConfiguration().getKernelSize();
|
||||||
|
int[] strides = getTypedLayerConfiguration().getStride();
|
||||||
|
|
||||||
|
int[] pad;
|
||||||
|
int[] outSize;
|
||||||
|
if (convolutionMode == ConvolutionMode.Same) {
|
||||||
|
outSize =
|
||||||
|
Convolution2DUtils.getOutputSize(
|
||||||
|
input,
|
||||||
|
kernel,
|
||||||
|
strides,
|
||||||
|
null,
|
||||||
|
convolutionMode,
|
||||||
|
dilation,
|
||||||
|
CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method
|
||||||
|
|
||||||
|
if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE)
|
||||||
|
throw new ND4JArraySizeException();
|
||||||
|
int[] inWidthHeight;
|
||||||
|
// if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW)
|
||||||
|
// TODO: Switch hardcoded state later. For now, convolution is implemented as
|
||||||
|
// switch to NCHW then permute back for NWHC
|
||||||
|
inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)};
|
||||||
|
|
||||||
|
/* else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) {
|
||||||
|
inWidthHeight = new int[] {(int) input.size(1), (int) input.size(2)};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throw new IllegalStateException("No data format configured!");*/
|
||||||
|
pad =
|
||||||
|
Convolution2DUtils.getSameModeTopLeftPadding(
|
||||||
|
outSize, inWidthHeight, kernel, strides, dilation);
|
||||||
|
} else {
|
||||||
|
pad = getTypedLayerConfiguration().getPadding();
|
||||||
|
outSize =
|
||||||
|
Convolution2DUtils.getOutputSize(
|
||||||
|
input,
|
||||||
|
kernel,
|
||||||
|
strides,
|
||||||
|
pad,
|
||||||
|
convolutionMode,
|
||||||
|
dilation,
|
||||||
|
CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method
|
||||||
|
}
|
||||||
|
|
||||||
|
int outH = outSize[0];
|
||||||
|
int outW = outSize[1];
|
||||||
|
|
||||||
|
if (helper != null
|
||||||
|
&& (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
|
||||||
|
if (preOutput != null && forBackprop) {
|
||||||
|
return new Pair<>(preOutput, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For no-bias convolutional layers: use an empty (all 0s) value for biases
|
||||||
|
if (!hasBias()) {
|
||||||
|
if (dummyBias == null) {
|
||||||
|
try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
|
||||||
|
dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bias = dummyBias;
|
||||||
|
}
|
||||||
|
|
||||||
|
INDArray ret = null;
|
||||||
|
try {
|
||||||
|
ret =
|
||||||
|
helper.preOutput(
|
||||||
|
inputOrig,
|
||||||
|
weights,
|
||||||
|
bias,
|
||||||
|
kernel,
|
||||||
|
strides,
|
||||||
|
pad,
|
||||||
|
getTypedLayerConfiguration().getCudnnAlgoMode(),
|
||||||
|
getTypedLayerConfiguration().getCudnnFwdAlgo(),
|
||||||
|
convolutionMode,
|
||||||
|
dilation,
|
||||||
|
getTypedLayerConfiguration().getConvFormat(),
|
||||||
|
workspaceMgr);
|
||||||
|
} catch (ND4JOpProfilerException e) {
|
||||||
|
throw e; // NaN panic etc for debugging
|
||||||
|
} catch (Exception e) {
|
||||||
|
if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
|
||||||
|
// This is a memory exception - don't fallback to built-in implementation
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
|
||||||
|
helperCountFail++;
|
||||||
|
if (helper instanceof MKLDNNConvHelper) {
|
||||||
|
log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
|
||||||
|
} else {
|
||||||
|
log.warn("CuDNN execution failed - falling back on built-in implementation", e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException(
|
||||||
|
"Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ret != null) {
|
||||||
|
return new Pair<>(ret, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preOutput != null && i2d != null && forBackprop) {
|
||||||
|
return new Pair<>(preOutput, i2d);
|
||||||
|
}
|
||||||
|
|
||||||
|
// im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input
|
||||||
|
// [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
|
||||||
|
// To get this: create an array of the order we want, permute it to the order required by im2col
|
||||||
|
// implementation, and then do im2col on that
|
||||||
|
// to get old order from required order: permute(0,3,4,5,1,2)
|
||||||
|
// Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through
|
||||||
|
// the rows post-reshape
|
||||||
|
INDArray col =
|
||||||
|
Nd4j.createUninitialized(
|
||||||
|
weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
|
||||||
|
int[] permute = new int[] {0, 3, 4, 5, 1, 2};
|
||||||
|
INDArray col2 = col.permute(permute);
|
||||||
|
INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float
|
||||||
|
if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException();
|
||||||
|
Convolution.im2col(
|
||||||
|
im2ColIn,
|
||||||
|
(int) kH,
|
||||||
|
(int) kW,
|
||||||
|
strides[0],
|
||||||
|
strides[1],
|
||||||
|
pad[0],
|
||||||
|
pad[1],
|
||||||
|
dilation[0],
|
||||||
|
dilation[1],
|
||||||
|
convolutionMode == ConvolutionMode.Same,
|
||||||
|
col2);
|
||||||
|
|
||||||
|
INDArray im2col2d =
|
||||||
|
Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false);
|
||||||
|
|
||||||
|
// Current order of weights: [depthOut,depthIn,kH,kW], c order
|
||||||
|
// Permute to give [kW,kH,depthIn,depthOut], f order
|
||||||
|
// Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless
|
||||||
|
// weights aren't in c order for some reason
|
||||||
|
INDArray permutedW = weights.permute(3, 2, 1, 0);
|
||||||
|
INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth);
|
||||||
|
|
||||||
|
// Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut]
|
||||||
|
INDArray z =
|
||||||
|
workspaceMgr.createUninitialized(
|
||||||
|
ArrayType.ACTIVATIONS,
|
||||||
|
weights.dataType(),
|
||||||
|
new long[] {im2col2d.size(0), reshapedW.size(1)},
|
||||||
|
'f');
|
||||||
|
im2col2d.mmuli(reshapedW, z);
|
||||||
|
|
||||||
|
// Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is
|
||||||
|
// [miniBatch*outH*outW,depthOut] -> addiRowVector
|
||||||
|
if (getTypedLayerConfiguration().hasBias()) {
|
||||||
|
z.addiRowVector(bias);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order:
|
||||||
|
// [miniBatch,outDepth,outH,outW];
|
||||||
|
z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true);
|
||||||
|
z = z.permute(2, 3, 1, 0);
|
||||||
|
|
||||||
|
if (training
|
||||||
|
&& cacheMode != CacheMode.NONE
|
||||||
|
&& workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
|
||||||
|
&& workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
|
||||||
|
try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
|
||||||
|
i2d = im2col2d.unsafeDuplication();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
|
||||||
|
z = z.permute(0, 2, 3, 1); // NCHW to NHWC
|
||||||
|
z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Pair<>(z, forBackprop ? im2col2d : null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
|
||||||
|
if (input == null) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Cannot perform forward pass with null input " + layerId());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cacheMode == null) cacheMode = CacheMode.NONE;
|
||||||
|
|
||||||
|
applyDropOutIfNecessary(training, workspaceMgr);
|
||||||
|
|
||||||
|
INDArray z = preOutput(training, false, workspaceMgr).getFirst();
|
||||||
|
|
||||||
|
// we do cache only if cache workspace exists. Skip otherwise
|
||||||
|
if (training
|
||||||
|
&& cacheMode != CacheMode.NONE
|
||||||
|
&& workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
|
||||||
|
&& workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
|
||||||
|
try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
|
||||||
|
preOutput = z.unsafeDuplication();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// String afn = conf.getLayer().getActivationFunction();
|
||||||
|
IActivation afn = getTypedLayerConfiguration().getActivationFn();
|
||||||
|
|
||||||
|
if (helper != null
|
||||||
|
&& Shape.strideDescendingCAscendingF(z)
|
||||||
|
&& (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
|
||||||
|
INDArray ret = null;
|
||||||
|
try {
|
||||||
|
ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training);
|
||||||
|
} catch (ND4JOpProfilerException e) {
|
||||||
|
throw e; // NaN panic etc for debugging
|
||||||
|
} catch (Exception e) {
|
||||||
|
if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
|
||||||
|
// This is a memory exception - don't fallback to built-in implementation
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
|
||||||
|
helperCountFail++;
|
||||||
|
if (helper instanceof MKLDNNConvHelper) {
|
||||||
|
log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
|
||||||
|
} else {
|
||||||
|
log.warn("CuDNN execution failed - falling back on built-in implementation", e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException(
|
||||||
|
"Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret != null) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INDArray activation = afn.getActivation(z, training);
|
||||||
|
return activation;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasBias() {
|
||||||
|
return getTypedLayerConfiguration().hasBias();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isPretrainLayer() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LayerHelper getHelper() {
|
||||||
|
return helper;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) {
|
||||||
|
throw new UnsupportedOperationException("Not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setParamsTable(INDArray paramsTable) {
|
||||||
|
// Override, as base layer does f order parameter flattening by default
|
||||||
|
setParams(paramsTable, 'c');
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Pair<INDArray, MaskState> feedForwardMaskArray(
|
||||||
|
INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
|
||||||
|
if (maskArray == null) {
|
||||||
|
// For same mode (with stride 1): output activations size is always same size as input
|
||||||
|
// activations size -> mask array is same size
|
||||||
|
return new Pair<>(maskArray, currentMaskState);
|
||||||
|
}
|
||||||
|
|
||||||
|
INDArray outMask =
|
||||||
|
Convolution2DUtils.cnn2dMaskReduction(
|
||||||
|
maskArray,
|
||||||
|
getTypedLayerConfiguration().getKernelSize(),
|
||||||
|
getTypedLayerConfiguration().getStride(),
|
||||||
|
getTypedLayerConfiguration().getPadding(),
|
||||||
|
getTypedLayerConfiguration().getDilation(),
|
||||||
|
getTypedLayerConfiguration().getConvolutionMode());
|
||||||
|
return new Pair<>(outMask, currentMaskState);
|
||||||
|
}
|
||||||
|
}
|
|
@ -47,7 +47,7 @@ public class DenseLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Dens
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean hasBias() {
|
public boolean hasBias() {
|
||||||
return super.hasBias();
|
return getTypedLayerConfiguration().isHasBias();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -37,59 +37,82 @@ import org.nd4j.linalg.api.buffer.DataType;
|
||||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.lossfunctions.ILossFunction;
|
import org.nd4j.linalg.lossfunctions.ILossFunction;
|
||||||
|
|
||||||
public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.layers.RnnOutputLayer> {
|
public class RnnOutputLayer
|
||||||
|
extends BaseOutputLayer<org.deeplearning4j.nn.conf.layers.RnnOutputLayer> {
|
||||||
|
|
||||||
public RnnOutputLayer(LayerConfiguration conf, DataType dataType) {
|
public RnnOutputLayer(LayerConfiguration conf, DataType dataType) {
|
||||||
super(conf, dataType);
|
super(conf, dataType);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
public Pair<Gradient, INDArray> backpropGradient(
|
||||||
|
INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
|
||||||
assertInputSet(true);
|
assertInputSet(true);
|
||||||
if (input.rank() != 3) {
|
if (input.rank() != 3) {
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." +
|
"Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]."
|
||||||
" Got input with rank " + input.rank() + " and shape " + Arrays.toString(input.shape()) + " - " + layerId());
|
+ " Got input with rank "
|
||||||
|
+ input.rank()
|
||||||
|
+ " and shape "
|
||||||
|
+ Arrays.toString(input.shape())
|
||||||
|
+ " - "
|
||||||
|
+ layerId());
|
||||||
}
|
}
|
||||||
|
|
||||||
RNNFormat format = getTypedLayerConfiguration().getDataFormat();
|
RNNFormat format = getTypedLayerConfiguration().getDataFormat();
|
||||||
int td = (format == RNNFormat.NCW) ? 2 : 1;
|
int td = (format == RNNFormat.NCW) ? 2 : 1; //either NCW or NWC
|
||||||
Preconditions.checkState(labels.rank() == 3, "Expected rank 3 labels array, got label array with shape %ndShape", labels);
|
Preconditions.checkState(
|
||||||
Preconditions.checkState(input.size(td) == labels.size(td), "Sequence lengths do not match for RnnOutputLayer input and labels:" +
|
labels.rank() == 3,
|
||||||
"Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape", input, labels);
|
"Expected rank 3 labels array, got label array with shape %ndShape",
|
||||||
|
labels);
|
||||||
|
Preconditions.checkState(
|
||||||
|
input.size(td) == labels.size(td),
|
||||||
|
"Sequence lengths do not match for RnnOutputLayer input and labels:"
|
||||||
|
+ "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - "
|
||||||
|
+ "mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape.\n",
|
||||||
|
input, "\n\n",
|
||||||
|
labels);
|
||||||
|
|
||||||
INDArray inputTemp = input;
|
INDArray inputTemp = input;
|
||||||
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
|
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
|
||||||
this.input = input.permute(0, 2, 1);
|
this.input = input.permute(0, 2, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM);
|
this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM);
|
||||||
|
|
||||||
applyDropOutIfNecessary(true, workspaceMgr); //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients
|
applyDropOutIfNecessary(
|
||||||
|
true,
|
||||||
|
workspaceMgr); // Edge case: we skip OutputLayer forward pass during training as this isn't
|
||||||
|
// required to calculate gradients
|
||||||
|
|
||||||
Pair<Gradient, INDArray> gradAndEpsilonNext = super.backpropGradient(epsilon, workspaceMgr); //Also applies dropout
|
Pair<Gradient, INDArray> gradAndEpsilonNext =
|
||||||
|
super.backpropGradient(epsilon, workspaceMgr); // Also applies dropout
|
||||||
this.input = inputTemp;
|
this.input = inputTemp;
|
||||||
INDArray epsilon2d = gradAndEpsilonNext.getSecond();
|
INDArray epsilon2d = gradAndEpsilonNext.getSecond();
|
||||||
|
|
||||||
INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
|
INDArray epsilon3d =
|
||||||
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
|
TimeSeriesUtils.reshape2dTo3d(
|
||||||
|
epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
|
||||||
|
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
|
||||||
epsilon3d = epsilon3d.permute(0, 2, 1);
|
epsilon3d = epsilon3d.permute(0, 2, 1);
|
||||||
}
|
}
|
||||||
weightNoiseParams.clear();
|
weightNoiseParams.clear();
|
||||||
|
|
||||||
//epsilon3d = backpropDropOutIfPresent(epsilon3d);
|
// epsilon3d = backpropDropOutIfPresent(epsilon3d);
|
||||||
return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d);
|
return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**{@inheritDoc}
|
/** {@inheritDoc} */
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public double f1Score(INDArray examples, INDArray labels) {
|
public double f1Score(INDArray examples, INDArray labels) {
|
||||||
if (examples.rank() == 3)
|
if (examples.rank() == 3)
|
||||||
examples = TimeSeriesUtils.reshape3dTo2d(examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
|
examples =
|
||||||
|
TimeSeriesUtils.reshape3dTo2d(
|
||||||
|
examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
|
||||||
if (labels.rank() == 3)
|
if (labels.rank() == 3)
|
||||||
labels = TimeSeriesUtils.reshape3dTo2d(labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
|
labels =
|
||||||
|
TimeSeriesUtils.reshape3dTo2d(
|
||||||
|
labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
|
||||||
return super.f1Score(examples, labels);
|
return super.f1Score(examples, labels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,15 +129,18 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
|
||||||
protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) {
|
protected INDArray preOutput2d(boolean training, LayerWorkspaceMgr workspaceMgr) {
|
||||||
assertInputSet(false);
|
assertInputSet(false);
|
||||||
if (input.rank() == 3) {
|
if (input.rank() == 3) {
|
||||||
//Case when called from RnnOutputLayer
|
// Case when called from RnnOutputLayer
|
||||||
INDArray inputTemp = input;
|
INDArray inputTemp = input;
|
||||||
input = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? input.permute(0, 2, 1):input;
|
input =
|
||||||
|
(getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC)
|
||||||
|
? input.permute(0, 2, 1)
|
||||||
|
: input;
|
||||||
input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM);
|
input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM);
|
||||||
INDArray out = super.preOutput(training, workspaceMgr);
|
INDArray out = super.preOutput(training, workspaceMgr);
|
||||||
this.input = inputTemp;
|
this.input = inputTemp;
|
||||||
return out;
|
return out;
|
||||||
} else {
|
} else {
|
||||||
//Case when called from BaseOutputLayer
|
// Case when called from BaseOutputLayer
|
||||||
INDArray out = super.preOutput(training, workspaceMgr);
|
INDArray out = super.preOutput(training, workspaceMgr);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
@ -123,8 +149,11 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
|
||||||
@Override
|
@Override
|
||||||
protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
|
protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
|
||||||
INDArray labels = this.labels;
|
INDArray labels = this.labels;
|
||||||
if (labels.rank() == 3){
|
if (labels.rank() == 3) {
|
||||||
labels = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? labels.permute(0, 2, 1) : labels;
|
labels =
|
||||||
|
(getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC)
|
||||||
|
? labels.permute(0, 2, 1)
|
||||||
|
: labels;
|
||||||
return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType);
|
return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType);
|
||||||
}
|
}
|
||||||
return labels;
|
return labels;
|
||||||
|
@ -140,24 +169,30 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
|
||||||
INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr);
|
INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr);
|
||||||
|
|
||||||
applyDropOutIfNecessary(training, workspaceMgr);
|
applyDropOutIfNecessary(training, workspaceMgr);
|
||||||
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
|
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
|
||||||
input = input.permute(0, 2, 1);
|
input = input.permute(0, 2, 1);
|
||||||
}
|
}
|
||||||
INDArray input2d = TimeSeriesUtils.reshape3dTo2d(input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM);
|
INDArray input2d =
|
||||||
|
TimeSeriesUtils.reshape3dTo2d(
|
||||||
|
input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM);
|
||||||
|
|
||||||
INDArray act2d = getTypedLayerConfiguration().getActivationFn().getActivation(input2d.mmul(W).addiRowVector(b), training);
|
INDArray act2d =
|
||||||
|
getTypedLayerConfiguration()
|
||||||
|
.getActivationFn()
|
||||||
|
.getActivation(input2d.mmul(W).addiRowVector(b), training);
|
||||||
if (maskArray != null) {
|
if (maskArray != null) {
|
||||||
if(!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())){
|
if (!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())) {
|
||||||
//Per output masking
|
// Per output masking
|
||||||
act2d.muli(maskArray.castTo(act2d.dataType()));
|
act2d.muli(maskArray.castTo(act2d.dataType()));
|
||||||
} else {
|
} else {
|
||||||
//Per time step masking
|
// Per time step masking
|
||||||
act2d.muliColumnVector(maskArray.castTo(act2d.dataType()));
|
act2d.muliColumnVector(maskArray.castTo(act2d.dataType()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
INDArray ret = TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
|
INDArray ret =
|
||||||
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC){
|
TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
|
||||||
|
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
|
||||||
ret = ret.permute(0, 2, 1);
|
ret = ret.permute(0, 2, 1);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -166,17 +201,25 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
|
||||||
@Override
|
@Override
|
||||||
public void setMaskArray(INDArray maskArray) {
|
public void setMaskArray(INDArray maskArray) {
|
||||||
if (maskArray != null) {
|
if (maskArray != null) {
|
||||||
//Two possible cases:
|
// Two possible cases:
|
||||||
//(a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector)
|
// (a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector)
|
||||||
//(b) per output masking - rank 3 mask array -> reshape to rank 2 (
|
// (b) per output masking - rank 3 mask array -> reshape to rank 2 (
|
||||||
if (maskArray.rank() == 2) {
|
if (maskArray.rank() == 2) {
|
||||||
this.maskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
|
this.maskArray =
|
||||||
|
TimeSeriesUtils.reshapeTimeSeriesMaskToVector(
|
||||||
|
maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
|
||||||
} else if (maskArray.rank() == 3) {
|
} else if (maskArray.rank() == 3) {
|
||||||
this.maskArray = TimeSeriesUtils.reshape3dTo2d(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
|
this.maskArray =
|
||||||
|
TimeSeriesUtils.reshape3dTo2d(
|
||||||
|
maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
|
||||||
} else {
|
} else {
|
||||||
throw new UnsupportedOperationException(
|
throw new UnsupportedOperationException(
|
||||||
"Invalid mask array: must be rank 2 or 3 (got: rank " + maskArray.rank() + ", shape = "
|
"Invalid mask array: must be rank 2 or 3 (got: rank "
|
||||||
+ Arrays.toString(maskArray.shape()) + ") " + layerId());
|
+ maskArray.rank()
|
||||||
|
+ ", shape = "
|
||||||
|
+ Arrays.toString(maskArray.shape())
|
||||||
|
+ ") "
|
||||||
|
+ layerId());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
this.maskArray = null;
|
this.maskArray = null;
|
||||||
|
@ -184,42 +227,52 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState,
|
public Pair<INDArray, MaskState> feedForwardMaskArray(
|
||||||
int minibatchSize) {
|
INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
|
||||||
|
|
||||||
//If the *input* mask array is present and active, we should use it to mask the output
|
// If the *input* mask array is present and active, we should use it to mask the output
|
||||||
if (maskArray != null && currentMaskState == MaskState.Active) {
|
if (maskArray != null && currentMaskState == MaskState.Active) {
|
||||||
this.inputMaskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
|
this.inputMaskArray =
|
||||||
|
TimeSeriesUtils.reshapeTimeSeriesMaskToVector(
|
||||||
|
maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
|
||||||
this.inputMaskArrayState = currentMaskState;
|
this.inputMaskArrayState = currentMaskState;
|
||||||
} else {
|
} else {
|
||||||
this.inputMaskArray = null;
|
this.inputMaskArray = null;
|
||||||
this.inputMaskArrayState = null;
|
this.inputMaskArrayState = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return null; //Last layer in network
|
return null; // Last layer in network
|
||||||
}
|
}
|
||||||
|
|
||||||
/**Compute the score for each example individually, after labels and input have been set.
|
/**
|
||||||
|
* Compute the score for each example individually, after labels and input have been set.
|
||||||
*
|
*
|
||||||
* @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include regularization)
|
* @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include
|
||||||
* @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example
|
* regularization)
|
||||||
|
* @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith
|
||||||
|
* example
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) {
|
public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) {
|
||||||
//For RNN: need to sum up the score over each time step before returning.
|
// For RNN: need to sum up the score over each time step before returning.
|
||||||
|
|
||||||
if (input == null || labels == null)
|
if (input == null || labels == null)
|
||||||
throw new IllegalStateException("Cannot calculate score without input and labels " + layerId());
|
throw new IllegalStateException(
|
||||||
|
"Cannot calculate score without input and labels " + layerId());
|
||||||
INDArray preOut = preOutput2d(false, workspaceMgr);
|
INDArray preOut = preOutput2d(false, workspaceMgr);
|
||||||
|
|
||||||
ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction();
|
ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction();
|
||||||
INDArray scoreArray =
|
INDArray scoreArray =
|
||||||
lossFunction.computeScoreArray(getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), preOut,
|
lossFunction.computeScoreArray(
|
||||||
getTypedLayerConfiguration().getActivationFn(), maskArray);
|
getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM),
|
||||||
//scoreArray: shape [minibatch*timeSeriesLength, 1]
|
preOut,
|
||||||
//Reshape it to [minibatch, timeSeriesLength] then sum over time step
|
getTypedLayerConfiguration().getActivationFn(),
|
||||||
|
maskArray);
|
||||||
|
// scoreArray: shape [minibatch*timeSeriesLength, 1]
|
||||||
|
// Reshape it to [minibatch, timeSeriesLength] then sum over time step
|
||||||
|
|
||||||
INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0));
|
INDArray scoreArrayTs =
|
||||||
|
TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int) input.size(0));
|
||||||
INDArray summedScores = scoreArrayTs.sum(true, 1);
|
INDArray summedScores = scoreArrayTs.sum(true, 1);
|
||||||
|
|
||||||
if (fullNetRegTerm != 0.0) {
|
if (fullNetRegTerm != 0.0) {
|
||||||
|
|
|
@ -47,7 +47,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
|
||||||
import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
|
import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
public abstract class BaseWrapperLayer extends AbstractLayer {
|
public abstract class BaseWrapperLayer<LayerConf_T extends LayerConfiguration> extends AbstractLayer {
|
||||||
|
|
||||||
protected Layer underlying;
|
protected Layer underlying;
|
||||||
|
|
||||||
|
@ -57,8 +57,8 @@ public abstract class BaseWrapperLayer extends AbstractLayer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BaseLayerConfiguration getTypedLayerConfiguration() {
|
public LayerConf_T getTypedLayerConfiguration() {
|
||||||
return (BaseLayerConfiguration) underlying.getLayerConfiguration();
|
return (LayerConf_T) underlying.getLayerConfiguration();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -712,7 +712,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork
|
||||||
if (layer_conf instanceof BaseLayerConfiguration)
|
if (layer_conf instanceof BaseLayerConfiguration)
|
||||||
((BaseLayerConfiguration) layer_conf).setDataType(netDtype);
|
((BaseLayerConfiguration) layer_conf).setDataType(netDtype);
|
||||||
|
|
||||||
nParamsPerLayer[i] = layer_conf.initializer().numParams(layer_conf);
|
nParamsPerLayer[i] = layer_conf.numParams();
|
||||||
paramLength += nParamsPerLayer[i];
|
paramLength += nParamsPerLayer[i];
|
||||||
}
|
}
|
||||||
log.debug(
|
log.debug(
|
||||||
|
|
|
@ -0,0 +1,183 @@
|
||||||
|
/*
|
||||||
|
* ******************************************************************************
|
||||||
|
* *
|
||||||
|
* *
|
||||||
|
* * This program and the accompanying materials are made available under the
|
||||||
|
* * terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* * https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
* *
|
||||||
|
* * See the NOTICE file distributed with this work for additional
|
||||||
|
* * information regarding copyright ownership.
|
||||||
|
* * Unless required by applicable law or agreed to in writing, software
|
||||||
|
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* * License for the specific language governing permissions and limitations
|
||||||
|
* * under the License.
|
||||||
|
* *
|
||||||
|
* * SPDX-License-Identifier: Apache-2.0
|
||||||
|
* *****************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.deeplearning4j.nn.params;
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import lombok.val;
|
||||||
|
import org.deeplearning4j.nn.api.AbstractParamInitializer;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
|
||||||
|
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
|
||||||
|
import org.deeplearning4j.nn.weights.WeightInitUtil;
|
||||||
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
|
import org.nd4j.linalg.indexing.NDArrayIndex;
|
||||||
|
|
||||||
|
public class ConvolutionNewParamInitializer extends AbstractParamInitializer {
|
||||||
|
|
||||||
|
private static final ConvolutionNewParamInitializer INSTANCE = new ConvolutionNewParamInitializer();
|
||||||
|
|
||||||
|
public static ConvolutionNewParamInitializer getInstance() {
|
||||||
|
return INSTANCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public final static String WEIGHT_KEY = DefaultParamInitializer.WEIGHT_KEY;
|
||||||
|
public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long numParams(LayerConfiguration l) {
|
||||||
|
return l.numParams();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> paramKeys(LayerConfiguration layer) {
|
||||||
|
ConvolutionLayer layerConf =
|
||||||
|
(ConvolutionLayer) layer;
|
||||||
|
if(layerConf.hasBias()){
|
||||||
|
return Arrays.asList(WEIGHT_KEY, BIAS_KEY);
|
||||||
|
} else {
|
||||||
|
return weightKeys(layer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> weightKeys(LayerConfiguration layer) {
|
||||||
|
return Collections.singletonList(WEIGHT_KEY);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> biasKeys(LayerConfiguration layer) {
|
||||||
|
ConvolutionLayer layerConf =
|
||||||
|
(ConvolutionLayer) layer;
|
||||||
|
if(layerConf.hasBias()){
|
||||||
|
return Collections.singletonList(BIAS_KEY);
|
||||||
|
} else {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isWeightParam(LayerConfiguration layer, String key) {
|
||||||
|
return WEIGHT_KEY.equals(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isBiasParam(LayerConfiguration layer, String key) {
|
||||||
|
return BIAS_KEY.equals(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, INDArray> init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) {
|
||||||
|
Convolution1DNew layer = (Convolution1DNew) conf;
|
||||||
|
if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");
|
||||||
|
|
||||||
|
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
|
||||||
|
|
||||||
|
Convolution1DNew layerConf =
|
||||||
|
(Convolution1DNew) conf;
|
||||||
|
|
||||||
|
val nOut = layerConf.getNOut();
|
||||||
|
|
||||||
|
if(layer.hasBias()){
|
||||||
|
//Standard case
|
||||||
|
INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
|
||||||
|
INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf)));
|
||||||
|
params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
|
||||||
|
params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
|
||||||
|
conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
|
||||||
|
conf.getNetConfiguration().addNetWideVariable(BIAS_KEY);
|
||||||
|
} else {
|
||||||
|
INDArray weightView = paramsView;
|
||||||
|
params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
|
||||||
|
conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
|
||||||
|
}
|
||||||
|
|
||||||
|
return params;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, INDArray> getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) {
|
||||||
|
|
||||||
|
Convolution1DNew layerConf =
|
||||||
|
(Convolution1DNew) conf;
|
||||||
|
|
||||||
|
int[] kernel = layerConf.getKernelSize();
|
||||||
|
val nIn = layerConf.getNIn();
|
||||||
|
val nOut = layerConf.getNOut();
|
||||||
|
|
||||||
|
Map<String, INDArray> out = new LinkedHashMap<>();
|
||||||
|
if(layerConf.hasBias()){
|
||||||
|
//Standard case
|
||||||
|
INDArray biasGradientView = gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
|
||||||
|
INDArray weightGradientView =
|
||||||
|
gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf)))
|
||||||
|
.reshape('c', nOut, nIn, kernel[0], kernel[1]);
|
||||||
|
out.put(BIAS_KEY, biasGradientView);
|
||||||
|
out.put(WEIGHT_KEY, weightGradientView);
|
||||||
|
} else {
|
||||||
|
INDArray weightGradientView = gradientView.reshape('c', nOut, nIn, kernel[0], kernel[1]);
|
||||||
|
out.put(WEIGHT_KEY, weightGradientView);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
//1 bias per feature map
|
||||||
|
protected INDArray createBias(LayerConfiguration conf, INDArray biasView, boolean initializeParams) {
|
||||||
|
//the bias is a 1D tensor -- one bias per output feature map
|
||||||
|
Convolution1DNew layerConf =
|
||||||
|
(Convolution1DNew) conf;
|
||||||
|
if (initializeParams)
|
||||||
|
biasView.assign(layerConf.getBiasInit());
|
||||||
|
return biasView;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) {
|
||||||
|
/*
|
||||||
|
Create a 4d weight matrix of:
|
||||||
|
(number of kernels, num input channels, kernel height, kernel width)
|
||||||
|
Note c order is used specifically for the CNN weights, as opposed to f order elsewhere
|
||||||
|
Inputs to the convolution layer are:
|
||||||
|
(batch size, num input feature maps, image height, image width)
|
||||||
|
*/
|
||||||
|
Convolution1DNew layerConf =
|
||||||
|
(Convolution1DNew) conf;
|
||||||
|
if (initializeParams) {
|
||||||
|
int[] kernel = layerConf.getKernelSize();
|
||||||
|
int[] stride = layerConf.getStride();
|
||||||
|
|
||||||
|
val inputDepth = layerConf.getNIn();
|
||||||
|
val outputDepth = layerConf.getNOut();
|
||||||
|
|
||||||
|
double fanIn = inputDepth * kernel[0] * kernel[1];
|
||||||
|
double fanOut = outputDepth * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]);
|
||||||
|
|
||||||
|
val weightsShape = new long[] {outputDepth, inputDepth, kernel[0], kernel[1]};
|
||||||
|
|
||||||
|
return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView);
|
||||||
|
} else {
|
||||||
|
int[] kernel = layerConf.getKernelSize();
|
||||||
|
return WeightInitUtil.reshapeWeights(
|
||||||
|
new long[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]}, weightView, 'c');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -20,7 +20,6 @@
|
||||||
|
|
||||||
package org.deeplearning4j.util;
|
package org.deeplearning4j.util;
|
||||||
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import org.deeplearning4j.exception.DL4JInvalidConfigException;
|
import org.deeplearning4j.exception.DL4JInvalidConfigException;
|
||||||
import org.deeplearning4j.exception.DL4JInvalidInputException;
|
import org.deeplearning4j.exception.DL4JInvalidInputException;
|
||||||
|
@ -37,91 +36,88 @@ public class Convolution1DUtils {
|
||||||
|
|
||||||
private static final int ONE = 1;
|
private static final int ONE = 1;
|
||||||
|
|
||||||
|
private Convolution1DUtils() {}
|
||||||
|
|
||||||
private Convolution1DUtils() {
|
public static int getOutputSize(
|
||||||
}
|
INDArray inputData, int kernel, int strides, int padding, ConvolutionMode convolutionMode) {
|
||||||
|
|
||||||
|
|
||||||
public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
|
|
||||||
ConvolutionMode convolutionMode) {
|
|
||||||
return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE);
|
return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the given layer has an
|
* Returns true if the given layer has an {@link RNNFormat}. This is true for: {@link
|
||||||
* {@link RNNFormat}.
|
* Convolution1D}, {@link Subsampling1DLayer} {@link SimpleRnn} {@link LSTM} {@link
|
||||||
* This is true for:
|
* EmbeddingSequenceLayer}
|
||||||
* {@link Convolution1D},
|
*
|
||||||
* {@link Subsampling1DLayer}
|
|
||||||
* {@link SimpleRnn}
|
|
||||||
* {@link LSTM}
|
|
||||||
* {@link EmbeddingSequenceLayer}
|
|
||||||
* @param layer the layer to test
|
* @param layer the layer to test
|
||||||
* @return true if the input layer has an rnn format
|
* @return true if the input layer has an rnn format false otherwise
|
||||||
* false otherwise
|
|
||||||
*/
|
*/
|
||||||
public static boolean hasRnnDataFormat(LayerConfiguration layer) {
|
public static boolean hasRnnDataFormat(LayerConfiguration layer) {
|
||||||
return layer instanceof Convolution1D ||
|
return layer instanceof Convolution1D
|
||||||
layer instanceof Convolution1D ||
|
|| layer instanceof Convolution1D
|
||||||
layer instanceof Subsampling1DLayer ||
|
|| layer instanceof Subsampling1DLayer
|
||||||
layer instanceof SimpleRnn ||
|
|| layer instanceof SimpleRnn
|
||||||
layer instanceof LSTM ||
|
|| layer instanceof LSTM
|
||||||
layer instanceof EmbeddingSequenceLayer;
|
|| layer instanceof EmbeddingSequenceLayer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the {@link RNNFormat} for the given layer.
|
* Get the {@link RNNFormat} for the given layer. Throws an {@link IllegalArgumentException} if a
|
||||||
* Throws an {@link IllegalArgumentException}
|
* layer doesn't have an rnn format
|
||||||
* if a layer doesn't have an rnn format
|
*
|
||||||
* @param layer the layer to get the format for
|
* @param layer the layer to get the format for
|
||||||
* @return the format for the layer
|
* @return the format for the layer
|
||||||
*/
|
*/
|
||||||
public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) {
|
public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) {
|
||||||
Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getName() + " does not have an RNNFormat");
|
Preconditions.checkState(
|
||||||
if(layer instanceof SimpleRnn) {
|
hasRnnDataFormat(layer),
|
||||||
|
"ILayer of type "
|
||||||
|
+ layer.getClass().getName()
|
||||||
|
+ " and name "
|
||||||
|
+ layer.getName()
|
||||||
|
+ " does not have an RNNFormat");
|
||||||
|
if (layer instanceof SimpleRnn) {
|
||||||
SimpleRnn simpleRnn = (SimpleRnn) layer;
|
SimpleRnn simpleRnn = (SimpleRnn) layer;
|
||||||
return simpleRnn.getDataFormat();
|
return simpleRnn.getDataFormat();
|
||||||
} else if(layer instanceof Convolution1D) {
|
} else if (layer instanceof Convolution1D) {
|
||||||
Convolution1D convolution1D = (Convolution1D) layer;
|
Convolution1D convolution1D = (Convolution1D) layer;
|
||||||
return convolution1D.getRnnDataFormat();
|
return convolution1D.getRnnDataFormat();
|
||||||
} else if(layer instanceof Convolution1D) {
|
} else if (layer instanceof Convolution1D) {
|
||||||
Convolution1D convolution1D = (Convolution1D) layer;
|
Convolution1D convolution1D = (Convolution1D) layer;
|
||||||
return convolution1D.getRnnDataFormat();
|
return convolution1D.getRnnDataFormat();
|
||||||
} else if(layer instanceof Subsampling1DLayer) {
|
} else if (layer instanceof Subsampling1DLayer) {
|
||||||
Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer;
|
Subsampling1DLayer subsampling1DLayer = (Subsampling1DLayer) layer;
|
||||||
return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC;
|
return subsampling1DLayer.getDataFormat() == CNN2DFormat.NCHW ? RNNFormat.NCW : RNNFormat.NWC;
|
||||||
} else if(layer instanceof LSTM) {
|
} else if (layer instanceof LSTM) {
|
||||||
LSTM lstm = (LSTM) layer;
|
LSTM lstm = (LSTM) layer;
|
||||||
return lstm.getDataFormat();
|
return lstm.getDataFormat();
|
||||||
} else if(layer instanceof EmbeddingSequenceLayer) {
|
} else if (layer instanceof EmbeddingSequenceLayer) {
|
||||||
EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer;
|
EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer;
|
||||||
return embeddingSequenceLayer.getOutputDataFormat();
|
return embeddingSequenceLayer.getOutputDataFormat();
|
||||||
}
|
} else {
|
||||||
else {
|
throw new IllegalArgumentException(
|
||||||
throw new IllegalArgumentException("Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName());
|
"Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reshapes the given weight
|
* Reshapes the given weight array or weight gradient to work with the specified {@link RNNFormat}
|
||||||
* array or weight gradient
|
*
|
||||||
* to work with the specified
|
|
||||||
* {@link RNNFormat}
|
|
||||||
* @param w the weight array or gradient
|
* @param w the weight array or gradient
|
||||||
* @param rnnFormat the {@link RNNFormat} to use
|
* @param rnnFormat the {@link RNNFormat} to use
|
||||||
* @return the reshaped array.
|
* @return the reshaped array.
|
||||||
*/
|
*/
|
||||||
public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) {
|
public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) {
|
||||||
if(rnnFormat == RNNFormat.NWC)
|
|
||||||
w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2)).permute(2, 1, 0); //[oC, iC, k, 1] to [k, iC, oC]
|
if (rnnFormat == RNNFormat.NWC)
|
||||||
|
w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2))
|
||||||
|
.permute(2, 1, 0); // [oC, iC, k, 1] to [k, iC, oC]
|
||||||
else {
|
else {
|
||||||
w = w.reshape(w.ordering(),w.size(2),w.size(1),w.size(0));
|
w = w.reshape(w.ordering(), w.size(2), w.size(1), w.size(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
return w;
|
return w;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the output size (height) for the given input data and CNN1D configuration
|
* Get the output size (height) for the given input data and CNN1D configuration
|
||||||
*
|
*
|
||||||
|
@ -133,8 +129,13 @@ public class Convolution1DUtils {
|
||||||
* @param dilation Kernel dilation
|
* @param dilation Kernel dilation
|
||||||
* @return Output size (width)
|
* @return Output size (width)
|
||||||
*/
|
*/
|
||||||
public static long getOutputSize(long inH, int kernel, int strides, int padding,
|
public static long getOutputSize(
|
||||||
ConvolutionMode convolutionMode, int dilation) {
|
long inH,
|
||||||
|
int kernel,
|
||||||
|
int strides,
|
||||||
|
int padding,
|
||||||
|
ConvolutionMode convolutionMode,
|
||||||
|
int dilation) {
|
||||||
long eKernel = effectiveKernelSize(kernel, dilation);
|
long eKernel = effectiveKernelSize(kernel, dilation);
|
||||||
if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
|
if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
|
||||||
return (int) Math.ceil(inH / ((double) strides));
|
return (int) Math.ceil(inH / ((double) strides));
|
||||||
|
@ -153,10 +154,14 @@ public class Convolution1DUtils {
|
||||||
* @param dilation Kernel dilation
|
* @param dilation Kernel dilation
|
||||||
* @return Output size (width)
|
* @return Output size (width)
|
||||||
*/
|
*/
|
||||||
public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
|
public static int getOutputSize(
|
||||||
ConvolutionMode convolutionMode, int dilation) {
|
INDArray inputData,
|
||||||
if (inputData.size(2) > Integer.MAX_VALUE)
|
int kernel,
|
||||||
throw new ND4JArraySizeException();
|
int strides,
|
||||||
|
int padding,
|
||||||
|
ConvolutionMode convolutionMode,
|
||||||
|
int dilation) {
|
||||||
|
if (inputData.size(2) > Integer.MAX_VALUE) throw new ND4JArraySizeException();
|
||||||
int inH = (int) inputData.size(2);
|
int inH = (int) inputData.size(2);
|
||||||
int eKernel = effectiveKernelSize(kernel, dilation);
|
int eKernel = effectiveKernelSize(kernel, dilation);
|
||||||
boolean atrous = (eKernel == kernel);
|
boolean atrous = (eKernel == kernel);
|
||||||
|
@ -171,8 +176,14 @@ public class Convolution1DUtils {
|
||||||
return outH;
|
return outH;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void validateShapes(INDArray inputData, int eKernel, int strides, int padding,
|
public static void validateShapes(
|
||||||
ConvolutionMode convolutionMode, int dilation, int inShape,
|
INDArray inputData,
|
||||||
|
int eKernel,
|
||||||
|
int strides,
|
||||||
|
int padding,
|
||||||
|
ConvolutionMode convolutionMode,
|
||||||
|
int dilation,
|
||||||
|
int inShape,
|
||||||
boolean atrous) {
|
boolean atrous) {
|
||||||
|
|
||||||
int inH = inShape;
|
int inH = inShape;
|
||||||
|
@ -186,15 +197,21 @@ public class Convolution1DUtils {
|
||||||
if (atrous) sb.append("effective ");
|
if (atrous) sb.append("effective ");
|
||||||
sb.append("kernel height <= input height + 2 * padding height. \nGot ");
|
sb.append("kernel height <= input height + 2 * padding height. \nGot ");
|
||||||
if (atrous) sb.append("effective ");
|
if (atrous) sb.append("effective ");
|
||||||
sb.append("kernel height = ").append(eKernel).append(", input height = ").append(inH)
|
sb.append("kernel height = ")
|
||||||
.append(" and padding height = ").append(padding).append(" which do not satisfy 0 < ")
|
.append(eKernel)
|
||||||
.append(eKernel).append(" <= ").append(inH + 2 * padding)
|
.append(", input height = ")
|
||||||
|
.append(inH)
|
||||||
|
.append(" and padding height = ")
|
||||||
|
.append(padding)
|
||||||
|
.append(" which do not satisfy 0 < ")
|
||||||
|
.append(eKernel)
|
||||||
|
.append(" <= ")
|
||||||
|
.append(inH + 2 * padding)
|
||||||
.append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation));
|
.append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation));
|
||||||
|
|
||||||
throw new DL4JInvalidInputException(sb.toString());
|
throw new DL4JInvalidInputException(sb.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (convolutionMode == ConvolutionMode.Strict) {
|
if (convolutionMode == ConvolutionMode.Strict) {
|
||||||
if ((inH - eKernel + 2 * padding) % strides != 0) {
|
if ((inH - eKernel + 2 * padding) % strides != 0) {
|
||||||
double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0;
|
double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0;
|
||||||
|
@ -202,32 +219,46 @@ public class Convolution1DUtils {
|
||||||
int truncated = (int) d;
|
int truncated = (int) d;
|
||||||
int sameSize = (int) Math.ceil(inH / ((double) strides));
|
int sameSize = (int) Math.ceil(inH / ((double) strides));
|
||||||
|
|
||||||
String sb = "Invalid input data or configuration: Combination of kernel size, " +
|
String sb =
|
||||||
"stride and padding are not " +
|
"Invalid input data or configuration: Combination of kernel size, "
|
||||||
"valid for given input height, using ConvolutionMode.Strict\n" +
|
+ "stride and padding are not "
|
||||||
"ConvolutionMode.Strict requires: output height = (input height - kernelSize + " +
|
+ "valid for given input height, using ConvolutionMode.Strict\n"
|
||||||
"2*padding)/stride + 1 to be an integer. Got: (" +
|
+ "ConvolutionMode.Strict requires: output height = (input height - kernelSize + "
|
||||||
inH + " - " + eKernel + " + 2*" + padding + ")/" +
|
+ "2*padding)/stride + 1 to be an integer. Got: ("
|
||||||
strides + " + 1 = " +
|
+ inH
|
||||||
str + "\n" + "See \"Constraints on strides\" at http://cs231n.github." +
|
+ " - "
|
||||||
"io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n" +
|
+ eKernel
|
||||||
"To truncate/crop the input, such that output height = floor(" +
|
+ " + 2*"
|
||||||
str + ") = " +
|
+ padding
|
||||||
truncated + ", use ConvolutionType.Truncate.\n" +
|
+ ")/"
|
||||||
"Alternatively use ConvolutionType.Same, which will use padding to give an " +
|
+ strides
|
||||||
"output height of ceil(" +
|
+ " + 1 = "
|
||||||
inH + "/" + strides + ")=" + sameSize +
|
+ str
|
||||||
getCommonErrorMsg(inputData, eKernel, strides, padding, dilation);
|
+ "\n"
|
||||||
|
+ "See \"Constraints on strides\" at http://cs231n.github."
|
||||||
|
+ "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n"
|
||||||
|
+ "To truncate/crop the input, such that output height = floor("
|
||||||
|
+ str
|
||||||
|
+ ") = "
|
||||||
|
+ truncated
|
||||||
|
+ ", use ConvolutionType.Truncate.\n"
|
||||||
|
+ "Alternatively use ConvolutionType.Same, which will use padding to give an "
|
||||||
|
+ "output height of ceil("
|
||||||
|
+ inH
|
||||||
|
+ "/"
|
||||||
|
+ strides
|
||||||
|
+ ")="
|
||||||
|
+ sameSize
|
||||||
|
+ getCommonErrorMsg(inputData, eKernel, strides, padding, dilation);
|
||||||
|
|
||||||
throw new DL4JInvalidConfigException(sb);
|
throw new DL4JInvalidConfigException(sb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int effectiveKernelSize(int kernel, int dilation) {
|
public static int effectiveKernelSize(int kernel, int dilation) {
|
||||||
//Determine the effective kernel size, accounting for dilation
|
// Determine the effective kernel size, accounting for dilation
|
||||||
//http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
|
// http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html#dilated-convolutions
|
||||||
if (dilation == 1) {
|
if (dilation == 1) {
|
||||||
return kernel;
|
return kernel;
|
||||||
} else {
|
} else {
|
||||||
|
@ -235,9 +266,13 @@ public class Convolution1DUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getCommonErrorMsg(INDArray inputData, int kernel, int strides, int padding, int dilation) {
|
private static String getCommonErrorMsg(
|
||||||
String s = "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]=" + Arrays.toString(inputData.shape())
|
INDArray inputData, int kernel, int strides, int padding, int dilation) {
|
||||||
+ ", inputKernel=" + kernel;
|
String s =
|
||||||
|
"\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]="
|
||||||
|
+ Arrays.toString(inputData.shape())
|
||||||
|
+ ", inputKernel="
|
||||||
|
+ kernel;
|
||||||
if (dilation != 1) {
|
if (dilation != 1) {
|
||||||
int effectiveKernel = effectiveKernelSize(kernel, dilation);
|
int effectiveKernel = effectiveKernelSize(kernel, dilation);
|
||||||
s += ", effectiveKernelGivenDilation=" + effectiveKernel;
|
s += ", effectiveKernelGivenDilation=" + effectiveKernel;
|
||||||
|
@ -245,16 +280,13 @@ public class Convolution1DUtils {
|
||||||
return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation;
|
return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Check that the convolution mode is consistent with the padding specification */
|
||||||
/**
|
|
||||||
* Check that the convolution mode is consistent with the padding specification
|
|
||||||
*/
|
|
||||||
public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) {
|
public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) {
|
||||||
if (mode == ConvolutionMode.Same) {
|
if (mode == ConvolutionMode.Same) {
|
||||||
boolean nullPadding = padding == 0;
|
boolean nullPadding = padding == 0;
|
||||||
if (!nullPadding)
|
if (!nullPadding)
|
||||||
throw new IllegalArgumentException("Padding cannot be used when using the `same' convolution mode");
|
throw new IllegalArgumentException(
|
||||||
|
"Padding cannot be used when using the `same' convolution mode");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -268,30 +300,48 @@ public class Convolution1DUtils {
|
||||||
* @param dilation Dilation (length 2 array, height dimension first)
|
* @param dilation Dilation (length 2 array, height dimension first)
|
||||||
* @return Top left padding (length 2 array, height dimension first)
|
* @return Top left padding (length 2 array, height dimension first)
|
||||||
*/
|
*/
|
||||||
public static int getSameModeTopLeftPadding(int outSize, int inSize, int kernel, int strides, int dilation) {
|
public static int getSameModeTopLeftPadding(
|
||||||
|
int outSize, int inSize, int kernel, int strides, int dilation) {
|
||||||
int eKernel = effectiveKernelSize(kernel, dilation);
|
int eKernel = effectiveKernelSize(kernel, dilation);
|
||||||
//Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2
|
// Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2
|
||||||
int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2;
|
int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2;
|
||||||
Preconditions.checkState(outPad >= 0, "Invalid padding values calculated: %s - " +
|
Preconditions.checkState(
|
||||||
"layer configuration is invalid? Input size %s, output size %s, kernel %s, " +
|
outPad >= 0,
|
||||||
"strides %s, dilation %s", outPad, inSize, outSize, kernel, strides, dilation);
|
"Invalid padding values calculated: %s - "
|
||||||
|
+ "layer configuration is invalid? Input size %s, output size %s, kernel %s, "
|
||||||
|
+ "strides %s, dilation %s",
|
||||||
|
outPad,
|
||||||
|
inSize,
|
||||||
|
outSize,
|
||||||
|
kernel,
|
||||||
|
strides,
|
||||||
|
dilation);
|
||||||
return outPad;
|
return outPad;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int getSameModeBottomRightPadding(int outSize, int inSize, int kernel, int strides, int dilation) {
|
public static int getSameModeBottomRightPadding(
|
||||||
|
int outSize, int inSize, int kernel, int strides, int dilation) {
|
||||||
int eKernel = effectiveKernelSize(kernel, dilation);
|
int eKernel = effectiveKernelSize(kernel, dilation);
|
||||||
int totalPad = ((outSize - 1) * strides + eKernel - inSize);
|
int totalPad = ((outSize - 1) * strides + eKernel - inSize);
|
||||||
int tlPad = totalPad / 2;
|
int tlPad = totalPad / 2;
|
||||||
int brPad = totalPad - tlPad;
|
int brPad = totalPad - tlPad;
|
||||||
Preconditions.checkState(brPad >= 0, "Invalid padding values (right) calculated: %s - " +
|
Preconditions.checkState(
|
||||||
"layer configuration is invalid? Input size %s, output size %s, kernel %s, " +
|
brPad >= 0,
|
||||||
"strides %s, dilation %s", brPad, inSize, outSize, kernel, strides, dilation);
|
"Invalid padding values (right) calculated: %s - "
|
||||||
|
+ "layer configuration is invalid? Input size %s, output size %s, kernel %s, "
|
||||||
|
+ "strides %s, dilation %s",
|
||||||
|
brPad,
|
||||||
|
inSize,
|
||||||
|
outSize,
|
||||||
|
kernel,
|
||||||
|
strides,
|
||||||
|
dilation);
|
||||||
return brPad;
|
return brPad;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for kernel size and
|
* Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for
|
||||||
* stride, and values >= 0 for padding.
|
* kernel size and stride, and values >= 0 for padding.
|
||||||
*
|
*
|
||||||
* @param kernel Kernel size to check
|
* @param kernel Kernel size to check
|
||||||
* @param stride Stride to check
|
* @param stride Stride to check
|
||||||
|
@ -300,16 +350,16 @@ public class Convolution1DUtils {
|
||||||
public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) {
|
public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) {
|
||||||
|
|
||||||
if (kernel <= 0) {
|
if (kernel <= 0) {
|
||||||
throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + kernel);
|
throw new IllegalStateException(
|
||||||
|
"Invalid kernel size: value must be positive (> 0). Got: " + kernel);
|
||||||
}
|
}
|
||||||
if (stride <= 0) {
|
if (stride <= 0) {
|
||||||
throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + stride);
|
throw new IllegalStateException(
|
||||||
|
"Invalid kernel size: value must be positive (> 0). Got: " + stride);
|
||||||
}
|
}
|
||||||
if (padding < 0) {
|
if (padding < 0) {
|
||||||
throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + padding);
|
throw new IllegalStateException(
|
||||||
|
"Invalid kernel size: value must be positive (> 0). Got: " + padding);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue