Fixing tests

Signed-off-by: brian <brian@brutex.de>
enhance-build-infrastructure
Brian Rosenberger 2023-07-25 10:59:46 +02:00
parent 997143b9dd
commit 4dc5a116b6
41 changed files with 4285 additions and 1309 deletions

View File

@ -0,0 +1,167 @@
/*
*
* ******************************************************************************
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*
*/
package net.brutex.ai.nd4j.tests;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator;
import org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.junit.jupiter.api.Test;
import org.nd4j.common.primitives.Pair;
import org.nd4j.evaluation.classification.Evaluation;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.LossFunctions;
@Slf4j
public class ExploreParamsTest {
@Test
public void testParam() {
NeuralNetConfiguration conf =
NeuralNetConfiguration.builder()
.seed(12345)
.dataType(DataType.DOUBLE)
.layer(
DenseLayer.builder().nIn(4).nOut(30).name("1. Dense").activation(Activation.TANH))
.layer(DenseLayer.builder().nIn(30).nOut(10).name("2. Dense"))
// .layer(FrozenLayer.builder(DenseLayer.builder().nOut(6).build()).build())
.layer(
OutputLayer.builder()
.nOut(3)
.lossFunction(LossFunctions.LossFunction.MSE)
.activation(Activation.SOFTMAX))
.build();
MultiLayerNetwork nn = new MultiLayerNetwork(conf);
nn.init();
log.info(nn.summary());
// INDArray input = Nd4j.rand(10,4);
INDArray labels = Nd4j.zeros(9, 3);
INDArray input =
Nd4j.create(
new double[][] {
{5.15, 3.5, 1.4, 0.21}, // setosa
{4.9, 3.2, 1.4, 0.2}, // setosa
{4.7, 3.2, 1.23, 0.2}, // setosa
{7, 3.25, 4.7, 1.41}, // versicolor
{6.4, 3.2, 4.54, 1.5}, // versicolor
{6.9, 3.1, 4.92, 1.5}, // versicolor
{7.7, 3, 6.1, 2.3}, // virginica
{6.3, 3.4, 5.6, 2.45}, // virginica
{6.4, 3.12, 5.5, 1.8} // virginica
});
labels.putScalar(0, 1);
labels.putScalar(3, 1);
labels.putScalar(6, 1);
labels.putScalar(10, 1);
labels.putScalar(13, 1);
labels.putScalar(16, 1);
labels.putScalar(20, 1);
labels.putScalar(23, 1);
labels.putScalar(26, 1);
IrisDataSetIterator iter = new IrisDataSetIterator();
//Iterable<Pair<INDArray, INDArray>> it = List.of(new Pair<INDArray, INDArray>(input, labels));
List l = new ArrayList<>();
for (int i=0; i< input.rows(); i++) {
l.add(new Pair(input.getRow(i), labels.getRow(i)));
}
Iterable<Pair<INDArray, INDArray>> it = l;
INDArrayDataSetIterator diter = new INDArrayDataSetIterator(it, 1);
for (int i = 0; i < 100; i++) {
// nn.fit(input, labels);
// nn.fit( input, labels);
nn.fit(diter);
// nn.feedForward(input);
if(i%20==0) log.info("Score: {}", nn.getScore());
}
Evaluation eval = nn.evaluate(iter, List.of("setosa", "vericolor", "virginica"));
log.info("\n{}", eval.stats());
}
@Test
public void testParam2() throws IOException {
NeuralNetConfiguration conf =
NeuralNetConfiguration.builder()
.seed(12345)
.layer(
DenseLayer.builder().nIn(784).nOut(20).name("1. Dense"))
.layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense"))
.layer(
OutputLayer.builder()
.nOut(10)
.lossFunction(LossFunctions.LossFunction.MSE)
.activation(Activation.SOFTMAX))
.build();
MultiLayerNetwork nn = new MultiLayerNetwork(conf);
nn.init();
log.info(nn.summary());
NeuralNetConfiguration conf2 =
NeuralNetConfiguration.builder()
.seed(12345)
.layer(
DenseLayer.builder().nIn(784).nOut(20).name("1. Dense").dropOut(0.7))
.layer(DenseLayer.builder().nIn(20).nOut(10).name("2. Dense"))
.layer(
OutputLayer.builder()
.nOut(10)
.lossFunction(LossFunctions.LossFunction.MSE)
.activation(Activation.SOFTMAX))
.build();
MultiLayerNetwork nn2 = new MultiLayerNetwork(conf2);
nn2.init();
log.info(nn2.summary());
MnistDataSetIterator iter = new MnistDataSetIterator(10, 500);
MnistDataSetIterator iter2 = new MnistDataSetIterator(10, 50);
for (int i = 0; i < 200; i++) {
nn.fit(iter);
nn2.fit(iter);
if(i%20==0) log.info("Score: {} vs. {}", nn.getScore(), nn2.getScore());
}
Evaluation eval = nn.evaluate(iter2);
Evaluation eval2 = nn2.evaluate(iter2);
log.info("\n{} \n{}", eval.stats(), eval2.stats());
}
}

View File

@ -45,6 +45,7 @@ import org.datavec.image.transform.PipelineImageTransform;
import org.datavec.image.transform.ResizeImageTransform; import org.datavec.image.transform.ResizeImageTransform;
import org.datavec.image.transform.ShowImageTransform; import org.datavec.image.transform.ShowImageTransform;
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator; import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.distribution.Distribution; import org.deeplearning4j.nn.conf.distribution.Distribution;
@ -65,6 +66,7 @@ import org.deeplearning4j.optimize.listeners.ScoreToChartListener;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.impl.ActivationLReLU; import org.nd4j.linalg.activations.impl.ActivationLReLU;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet; import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator; import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
@ -80,11 +82,11 @@ public class App {
private static final int X_DIM = 20 ; private static final int X_DIM = 20 ;
private static final int Y_DIM = 20; private static final int Y_DIM = 20;
private static final int CHANNELS = 1; private static final int CHANNELS = 3;
private static final int batchSize = 10; private static final int batchSize = 50;
private static final int INPUT = 128; private static final int INPUT = 128;
private static final int OUTPUT_PER_PANEL = 4; private static final int OUTPUT_PER_PANEL = 16;
private static final int ARRAY_SIZE_PER_SAMPLE = X_DIM*Y_DIM*CHANNELS; private static final int ARRAY_SIZE_PER_SAMPLE = X_DIM*Y_DIM*CHANNELS;
private static final IUpdater UPDATER = Adam.builder().learningRate(LEARNING_RATE).beta1(0.5).build(); private static final IUpdater UPDATER = Adam.builder().learningRate(LEARNING_RATE).beta1(0.5).build();
@ -146,7 +148,7 @@ public class App {
ActivationLayer.builder(new ActivationLReLU(0.2)).build(), ActivationLayer.builder(new ActivationLReLU(0.2)).build(),
DropoutLayer.builder(1 - 0.5).build(), DropoutLayer.builder(1 - 0.5).build(),
OutputLayer.builder().name("dis-output").lossFunction(LossFunction.XENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build() OutputLayer.builder().name("dis-output").lossFunction(LossFunction.MCXENT).nIn(X_DIM*Y_DIM).nOut(1).activation(Activation.SIGMOID).build()
}; };
} }
@ -196,6 +198,7 @@ public class App {
.activation( Activation.IDENTITY ) .activation( Activation.IDENTITY )
.layersFromArray( layers ) .layersFromArray( layers )
.inputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS)) .inputType( InputType.convolutional(X_DIM, Y_DIM, CHANNELS))
.dataType(DataType.FLOAT)
.build(); .build();
((NeuralNetConfiguration) conf).init(); ((NeuralNetConfiguration) conf).init();
return conf; return conf;
@ -223,7 +226,7 @@ public class App {
ImageTransform transform3 = new ResizeImageTransform(X_DIM, Y_DIM); ImageTransform transform3 = new ResizeImageTransform(X_DIM, Y_DIM);
ImageTransform tr = new PipelineImageTransform.Builder() ImageTransform tr = new PipelineImageTransform.Builder()
.addImageTransform(transform) //convert to GREY SCALE //.addImageTransform(transform) //convert to GREY SCALE
.addImageTransform(transform3) .addImageTransform(transform3)
//.addImageTransform(transform2) //.addImageTransform(transform2)
.build(); .build();
@ -270,10 +273,10 @@ public class App {
break; break;
} }
if(i%20 == 0) { //if(i%20 == 0) {
// frame2 = visualize(new INDArray[]{real}, batchSize, frame2 = visualize(new INDArray[]{real}, batchSize,
// frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images frame2 == null ? new JFrame() : frame2, true); //real has batchsize number of images
} //}
real.divi(255f); real.divi(255f);
// int batchSize = (int) real.shape()[0]; // int batchSize = (int) real.shape()[0];
@ -290,7 +293,7 @@ public class App {
DataSet data = DataSet.merge(Arrays.asList(realSet, fakeSet)); DataSet data = DataSet.merge(Arrays.asList(realSet, fakeSet));
dis.fit(data); dis.fit(data);
dis.fit(data); //dis.fit(data);
// Update the discriminator in the GAN network // Update the discriminator in the GAN network
updateGan(gen, dis, gan); updateGan(gen, dis, gan);
@ -298,7 +301,7 @@ public class App {
//gan.fit(new DataSet(Nd4j.rand(batchSize, INPUT), Nd4j.zeros(batchSize, 1))); //gan.fit(new DataSet(Nd4j.rand(batchSize, INPUT), Nd4j.zeros(batchSize, 1)));
gan.fit(new DataSet(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1))); gan.fit(new DataSet(Nd4j.rand(batchSize, CHANNELS, X_DIM, Y_DIM), Nd4j.zeros(batchSize, 1)));
//Visualize and reporting
if (j % 10 == 1) { if (j % 10 == 1) {
System.out.println("Iteration " + j + " Visualizing..."); System.out.println("Iteration " + j + " Visualizing...");
INDArray[] samples = batchSize > OUTPUT_PER_PANEL ? new INDArray[OUTPUT_PER_PANEL] : new INDArray[batchSize]; INDArray[] samples = batchSize > OUTPUT_PER_PANEL ? new INDArray[OUTPUT_PER_PANEL] : new INDArray[batchSize];
@ -320,11 +323,16 @@ public class App {
frame = visualize(samples, 1, frame == null ? new JFrame() : frame, false); //each samples only has 1 image, thus batchElements=1 frame = visualize(samples, 1, frame == null ? new JFrame() : frame, false); //each samples only has 1 image, thus batchElements=1
} }
} }
if (trainData.resetSupported()) {
trainData.reset(); trainData.reset();
} else {
log.error("Trainingdata {} does not support reset.", trainData.toString());
} }
// Copy the GANs generator to gen. // Copy the GANs generator to gen.
updateGen(gen, gan); updateGen(gen, gan);
}
gen.save(new File("mnist-mlp-generator.dlj")); gen.save(new File("mnist-mlp-generator.dlj"));
} }
@ -383,7 +391,12 @@ public class App {
} }
private static JLabel getImage(INDArray tensor, int batchElement, boolean isOrig) { private static JLabel getImage(INDArray tensor, int batchElement, boolean isOrig) {
final BufferedImage bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY); final BufferedImage bi;
if(CHANNELS>1) {
bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_INT_RGB); //need to change here based on channels
} else {
bi = new BufferedImage(X_DIM, Y_DIM, BufferedImage.TYPE_BYTE_GRAY); //need to change here based on channels
}
final int imageSize = X_DIM * Y_DIM; final int imageSize = X_DIM * Y_DIM;
final int offset = batchElement * imageSize; final int offset = batchElement * imageSize;
int pxl = offset * CHANNELS; //where to start in the INDArray int pxl = offset * CHANNELS; //where to start in the INDArray

View File

@ -24,12 +24,14 @@ package net.brutex.gan;
import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator; import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator;
import org.deeplearning4j.nn.conf.GradientNormalization; import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration; import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ActivationLayer; import org.deeplearning4j.nn.conf.layers.ActivationLayer;
import org.deeplearning4j.nn.conf.layers.DenseLayer; import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.DropoutLayer; import org.deeplearning4j.nn.conf.layers.DropoutLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer; import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit; import org.deeplearning4j.nn.weights.WeightInit;
import org.junit.jupiter.api.Test;
import org.nd4j.linalg.activations.Activation; import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.activations.impl.ActivationLReLU; import org.nd4j.linalg.activations.impl.ActivationLReLU;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
@ -98,7 +100,10 @@ public class MnistSimpleGAN {
return new MultiLayerNetwork(discConf); return new MultiLayerNetwork(discConf);
} }
@Test
public void runTest() throws Exception {
main(null);
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
GAN gan = new GAN.Builder() GAN gan = new GAN.Builder()
.generator(MnistSimpleGAN::getGenerator) .generator(MnistSimpleGAN::getGenerator)
@ -108,6 +113,7 @@ public class MnistSimpleGAN {
.updater(UPDATER) .updater(UPDATER)
.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
.gradientNormalizationThreshold(100) .gradientNormalizationThreshold(100)
.build(); .build();
Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000); Nd4j.getMemoryManager().setAutoGcWindow(15 * 1000);

View File

@ -2386,7 +2386,11 @@ public interface INDArray extends Serializable, AutoCloseable {
long[] stride(); long[] stride();
/** /**
* Return the ordering (fortran or c 'f' and 'c' respectively) of this ndarray * Return the ordering (fortran or c 'f' and 'c' respectively) of this ndarray <br/><br/>
* C Is Contiguous layout. Mathematically speaking, row major.<br/>
* F Is Fortran contiguous layout. Mathematically speaking, column major.<br/>
* {@see https://en.wikipedia.org/wiki/Row-_and_column-major_order}<br/>
*
* @return the ordering of this ndarray * @return the ordering of this ndarray
*/ */
char ordering(); char ordering();

View File

@ -5121,7 +5121,7 @@ public class Nd4j {
Nd4j.backend = backend; Nd4j.backend = backend;
updateNd4jContext(); updateNd4jContext();
props = Nd4jContext.getInstance().getConf(); props = Nd4jContext.getInstance().getConf();
logger.info("Properties for Nd4jContext " + props); log.debug("Properties for Nd4jContext {}", props);
PropertyParser pp = new PropertyParser(props); PropertyParser pp = new PropertyParser(props);
String otherDtype = pp.toString(ND4JSystemProperties.DTYPE); String otherDtype = pp.toString(ND4JSystemProperties.DTYPE);

View File

@ -122,7 +122,7 @@ public class BNGradientCheckTest extends BaseDL4JTest {
.dataType(DataType.DOUBLE) .dataType(DataType.DOUBLE)
.updater(new NoOp()).seed(12345L) .updater(new NoOp()).seed(12345L)
.dist(new NormalDistribution(0, 2)).list() .dist(new NormalDistribution(0, 2)).list()
.layer(0, ConvolutionLayer.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2) .layer(0, Convolution2D.builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2)
.activation(Activation.IDENTITY).build()) .activation(Activation.IDENTITY).build())
.layer(1,BatchNormalization.builder().useLogStd(useLogStd).build()) .layer(1,BatchNormalization.builder().useLogStd(useLogStd).build())
.layer(2, ActivationLayer.builder().activation(Activation.TANH).build()) .layer(2, ActivationLayer.builder().activation(Activation.TANH).build())

View File

@ -91,7 +91,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
.updater(new NoOp()) .updater(new NoOp())
.dist(new NormalDistribution(0, 1)) .dist(new NormalDistribution(0, 1))
.convolutionMode(ConvolutionMode.Same) .convolutionMode(ConvolutionMode.Same)
.list()
.layer( .layer(
Convolution1D.builder() Convolution1D.builder()
.activation(afn) .activation(afn)
@ -435,7 +434,6 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
.updater(new NoOp()) .updater(new NoOp())
.dist(new NormalDistribution(0, 1)) .dist(new NormalDistribution(0, 1))
.convolutionMode(ConvolutionMode.Same) .convolutionMode(ConvolutionMode.Same)
.list()
.layer( .layer(
0, 0,
Convolution1D.builder() Convolution1D.builder()
@ -461,6 +459,7 @@ public class CNN1DGradientCheckTest extends BaseDL4JTest {
.stride(stride) .stride(stride)
.padding(padding) .padding(padding)
.pnorm(pnorm) .pnorm(pnorm)
.name("SubsamplingLayer")
.build()) .build())
.layer( .layer(
3, 3,

View File

@ -0,0 +1,811 @@
/*
* ******************************************************************************
* *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*/
package org.deeplearning4j.gradientcheck;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.BaseDL4JTest;
import org.deeplearning4j.TestUtils;
import org.deeplearning4j.datasets.iterator.INDArrayDataSetIterator;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.RNNFormat;
import org.deeplearning4j.nn.conf.distribution.NormalDistribution;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.*;
import org.deeplearning4j.nn.conf.layers.convolutional.Cropping1D;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.util.Convolution1DUtils;
import org.junit.jupiter.api.Test;
import org.nd4j.common.primitives.Pair;
import org.nd4j.evaluation.classification.Evaluation;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.learning.config.NoOp;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@Slf4j
public class CNN1DNewGradientCheckTest extends BaseDL4JTest {
private static final boolean PRINT_RESULTS = true;
private static final boolean RETURN_ON_FIRST_FAILURE = false;
private static final double DEFAULT_EPS = 1e-6;
private static final double DEFAULT_MAX_REL_ERROR = 1e-3;
private static final double DEFAULT_MIN_ABS_ERROR = 1e-8;
static {
Nd4j.setDataType(DataType.DOUBLE);
}
@Test
public void testCnn1D() {
int minibatchSize = 4;
int[] dataChannels = {4, 10}; //the input
int[] kernels = {2,4,5,8};
int stride = 2;
int padding = 3;
int seriesLength = 300;
for (int kernel : kernels) {
for (int dChannels : dataChannels) {
int numLabels = ((seriesLength + (2 * padding) - kernel) / stride) + 1;
final NeuralNetConfiguration conf =
NeuralNetConfiguration.builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.convolutionMode(ConvolutionMode.Same)
.layer(
Convolution1DNew.builder()
.activation(Activation.RELU)
.kernelSize(kernel)
.stride(stride)
.padding(padding)
.nIn(dChannels) // channels
.nOut(3)
.rnnDataFormat(RNNFormat.NCW)
.build())
.layer(
RnnOutputLayer.builder()
.lossFunction(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX)
.nOut(4)
.build())
.inputType(InputType.recurrent(dChannels, seriesLength))
.build();
INDArray input = Nd4j.rand(minibatchSize, dChannels, seriesLength);
INDArray labels = Nd4j.zeros(minibatchSize, 4, numLabels);
for (int i = 0; i < minibatchSize; i++) {
for (int j = 0; j < numLabels; j++) {
labels.putScalar(new int[] {i, i % 4, j}, 1.0);
}
}
final MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
String msg =
"Minibatch="
+ minibatchSize
+ ", activationFn="
+ Activation.RELU
+ ", kernel = "
+ kernel;
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams());
/**
List<Pair<INDArray, INDArray>> iter = new java.util.ArrayList<>(Collections.emptyList());
iter.add(new Pair<>(input, labels));
for(int x=0;x<100; x++) net.fit(input, labels);
Evaluation eval = net.evaluate(new INDArrayDataSetIterator(iter,2), Arrays.asList(new String[]{"One", "Two", "Three", "Four"}));
// net.fit(input, labels);
eval.eval(labels, net.output(input));
**/
boolean gradOK =
GradientCheckUtil.checkGradients(
net,
DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR,
PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE,
input,
labels);
assertTrue(gradOK, msg);
TestUtils.testModelSerialization(net);
}
}
}
@Test
public void testCnn1DWithLocallyConnected1D() {
Nd4j.getRandom().setSeed(1337);
int[] minibatchSizes = {2, 3};
int length = 25;
int convNIn = 18;
int convNOut1 = 3;
int convNOut2 = 4;
int finalNOut = 4;
int[] kernels = {1,2,4};
int stride = 1;
int padding = 0;
Activation[] activations = {Activation.SIGMOID};
for (Activation afn : activations) {
for (int minibatchSize : minibatchSizes) {
for (int kernel : kernels) {
INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
for (int i = 0; i < minibatchSize; i++) {
for (int j = 0; j < length; j++) {
labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
}
}
NeuralNetConfiguration conf =
NeuralNetConfiguration.builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.convolutionMode(ConvolutionMode.Same)
.layer(
Convolution1DNew.builder()
.activation(afn)
.kernelSize(kernel)
.stride(stride)
.padding(padding)
.nIn(convNIn)
.nOut(convNOut1)
.rnnDataFormat(RNNFormat.NCW)
.build())
.layer(
LocallyConnected1D.builder()
.activation(afn)
.kernelSize(kernel)
.stride(stride)
.padding(padding)
.nIn(convNOut1)
.nOut(convNOut2)
.hasBias(false)
.build())
.layer(
RnnOutputLayer.builder()
.lossFunction(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX)
.nOut(finalNOut)
.build())
.inputType(InputType.recurrent(convNIn, length))
.build();
String json = conf.toJson();
NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
assertEquals(conf, c2);
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
String msg =
"Minibatch=" + minibatchSize + ", activationFn=" + afn + ", kernel = " + kernel;
if (PRINT_RESULTS) {
System.out.println(msg);
// for (int j = 0; j < net.getnLayers(); j++)
// System.out.println("ILayer " + j + " # params: " +
// net.getLayer(j).numParams());
}
boolean gradOK =
GradientCheckUtil.checkGradients(
net,
DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR,
PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE,
input,
labels);
assertTrue(gradOK, msg);
TestUtils.testModelSerialization(net);
}
}
}
}
@Test
public void testCnn1DWithCropping1D() {
Nd4j.getRandom().setSeed(1337);
int[] minibatchSizes = {1, 3};
int length = 7;
int convNIn = 2;
int convNOut1 = 3;
int convNOut2 = 4;
int finalNOut = 4;
int[] kernels = {1, 2, 4};
int stride = 1;
int padding = 0;
int cropping = 1;
int croppedLength = length - 2 * cropping;
Activation[] activations = {Activation.SIGMOID};
SubsamplingLayer.PoolingType[] poolingTypes =
new SubsamplingLayer.PoolingType[] {
SubsamplingLayer.PoolingType.MAX,
SubsamplingLayer.PoolingType.AVG,
SubsamplingLayer.PoolingType.PNORM
};
for (Activation afn : activations) {
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
for (int minibatchSize : minibatchSizes) {
for (int kernel : kernels) {
INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, croppedLength);
for (int i = 0; i < minibatchSize; i++) {
for (int j = 0; j < croppedLength; j++) {
labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
}
}
NeuralNetConfiguration conf =
NeuralNetConfiguration.builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.convolutionMode(ConvolutionMode.Same)
.layer(
Convolution1DNew.builder()
.activation(afn)
.kernelSize(kernel)
.stride(stride)
.padding(padding)
.nOut(convNOut1)
.build())
.layer(Cropping1D.builder(cropping).build())
.layer(
Convolution1DNew.builder()
.activation(afn)
.kernelSize(kernel)
.stride(stride)
.padding(padding)
.nOut(convNOut2)
.build())
.layer(
RnnOutputLayer.builder()
.lossFunction(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX)
.nOut(finalNOut)
.build())
.inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
.build();
String json = conf.toJson();
NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
assertEquals(conf, c2);
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
String msg =
"PoolingType="
+ poolingType
+ ", minibatch="
+ minibatchSize
+ ", activationFn="
+ afn
+ ", kernel = "
+ kernel;
if (PRINT_RESULTS) {
System.out.println(msg);
// for (int j = 0; j < net.getnLayers(); j++)
// System.out.println("ILayer " + j + " # params: " +
// net.getLayer(j).numParams());
}
boolean gradOK =
GradientCheckUtil.checkGradients(
net,
DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR,
PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE,
input,
labels);
assertTrue(gradOK, msg);
TestUtils.testModelSerialization(net);
}
}
}
}
}
@Test
public void testCnn1DWithZeroPadding1D() {
Nd4j.getRandom().setSeed(1337);
int[] minibatchSizes = {1, 3};
int length = 7;
int convNIn = 2;
int convNOut1 = 3;
int convNOut2 = 4;
int finalNOut = 4;
int[] kernels = {1, 2, 4};
int stride = 1;
int pnorm = 2;
int padding = 0;
int zeroPadding = 2;
int paddedLength = length + 2 * zeroPadding;
Activation[] activations = {Activation.SIGMOID};
SubsamplingLayer.PoolingType[] poolingTypes =
new SubsamplingLayer.PoolingType[] {
SubsamplingLayer.PoolingType.MAX,
SubsamplingLayer.PoolingType.AVG,
SubsamplingLayer.PoolingType.PNORM
};
for (Activation afn : activations) {
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
for (int minibatchSize : minibatchSizes) {
for (int kernel : kernels) {
INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, paddedLength);
for (int i = 0; i < minibatchSize; i++) {
for (int j = 0; j < paddedLength; j++) {
labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
}
}
NeuralNetConfiguration conf =
NeuralNetConfiguration.builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.convolutionMode(ConvolutionMode.Same)
.layer(
Convolution1DNew.builder()
.activation(afn)
.kernelSize(2, kernel)
.stride(stride)
.padding(padding)
.nOut(convNOut1)
.build())
.layer(ZeroPadding1DLayer.builder(zeroPadding).build())
.layer(
Convolution1DNew.builder()
.activation(afn)
.kernelSize(kernel)
.stride(stride)
.padding(padding)
.nOut(convNOut2)
.build())
.layer(ZeroPadding1DLayer.builder(0).build())
.layer(
Subsampling1DLayer.builder(poolingType)
.kernelSize(kernel)
.stride(stride)
.padding(padding)
.pnorm(pnorm)
.build())
.layer(
RnnOutputLayer.builder()
.lossFunction(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX)
.nOut(finalNOut)
.build())
.inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
.build();
String json = conf.toJson();
NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
assertEquals(conf, c2);
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
String msg =
"PoolingType="
+ poolingType
+ ", minibatch="
+ minibatchSize
+ ", activationFn="
+ afn
+ ", kernel = "
+ kernel;
if (PRINT_RESULTS) {
System.out.println(msg);
// for (int j = 0; j < net.getnLayers(); j++)
// System.out.println("ILayer " + j + " # params: " +
// net.getLayer(j).numParams());
}
boolean gradOK =
GradientCheckUtil.checkGradients(
net,
DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR,
PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE,
input,
labels);
assertTrue(gradOK, msg);
TestUtils.testModelSerialization(net);
}
}
}
}
}
@Test
public void testCnn1DWithSubsampling1D() {
Nd4j.getRandom().setSeed(12345);
int[] minibatchSizes = {1, 3};
int length = 7;
int convNIn = 2;
int convNOut1 = 3;
int convNOut2 = 4;
int finalNOut = 4;
int[] kernels = {1, 2, 4};
int stride = 1;
int padding = 0;
int pnorm = 2;
Activation[] activations = {Activation.SIGMOID, Activation.TANH};
SubsamplingLayer.PoolingType[] poolingTypes =
new SubsamplingLayer.PoolingType[] {
SubsamplingLayer.PoolingType.MAX,
SubsamplingLayer.PoolingType.AVG,
SubsamplingLayer.PoolingType.PNORM
};
for (Activation afn : activations) {
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
for (int minibatchSize : minibatchSizes) {
for (int kernel : kernels) {
INDArray input = Nd4j.rand(minibatchSize, convNIn, length);
INDArray labels = Nd4j.zeros(minibatchSize, finalNOut, length);
for (int i = 0; i < minibatchSize; i++) {
for (int j = 0; j < length; j++) {
labels.putScalar(new int[] {i, i % finalNOut, j}, 1.0);
}
}
NeuralNetConfiguration conf =
NeuralNetConfiguration.builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.dist(new NormalDistribution(0, 1))
.convolutionMode(ConvolutionMode.Same)
.layer(
0,
Convolution1DNew.builder()
.activation(afn)
.kernelSize(kernel)
.stride(stride)
.padding(padding)
.nOut(convNOut1)
.build())
.layer(
1,
Convolution1DNew.builder()
.activation(afn)
.kernelSize(kernel)
.stride(stride)
.padding(padding)
.nOut(convNOut2)
.build())
.layer(
2,
Subsampling1DLayer.builder(poolingType)
.kernelSize(kernel)
.stride(stride)
.padding(padding)
.pnorm(pnorm)
.name("SubsamplingLayer")
.build())
.layer(
3,
RnnOutputLayer.builder()
.lossFunction(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX)
.nOut(finalNOut)
.build())
.inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
.build();
String json = conf.toJson();
NeuralNetConfiguration c2 = NeuralNetConfiguration.fromJson(json);
assertEquals(conf, c2);
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
String msg =
"PoolingType="
+ poolingType
+ ", minibatch="
+ minibatchSize
+ ", activationFn="
+ afn
+ ", kernel = "
+ kernel;
if (PRINT_RESULTS) {
System.out.println(msg);
// for (int j = 0; j < net.getnLayers(); j++)
// System.out.println("ILayer " + j + " # params: " +
// net.getLayer(j).numParams());
}
boolean gradOK =
GradientCheckUtil.checkGradients(
net,
DEFAULT_EPS,
DEFAULT_MAX_REL_ERROR,
DEFAULT_MIN_ABS_ERROR,
PRINT_RESULTS,
RETURN_ON_FIRST_FAILURE,
input,
labels);
assertTrue(gradOK, msg);
TestUtils.testModelSerialization(net);
}
}
}
}
}
@Test
public void testCnn1dWithMasking() {
int length = 12;
int convNIn = 2;
int convNOut1 = 3;
int convNOut2 = 4;
int finalNOut = 3;
int pnorm = 2;
SubsamplingLayer.PoolingType[] poolingTypes =
new SubsamplingLayer.PoolingType[] {
SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG
};
for (SubsamplingLayer.PoolingType poolingType : poolingTypes) {
for (ConvolutionMode cm :
new ConvolutionMode[] {ConvolutionMode.Same, ConvolutionMode.Truncate}) {
for (int stride : new int[] {1, 2}) {
String s = cm + ", stride=" + stride + ", pooling=" + poolingType;
log.info("Starting test: " + s);
Nd4j.getRandom().setSeed(12345);
NeuralNetConfiguration conf =
NeuralNetConfiguration.builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH)
.dist(new NormalDistribution(0, 1))
.convolutionMode(cm)
.seed(12345)
.layer(
Convolution1DNew.builder()
.kernelSize(2)
.rnnDataFormat(RNNFormat.NCW)
.stride(stride)
.nIn(convNIn)
.nOut(convNOut1)
.build())
.layer(
Subsampling1DLayer.builder(poolingType)
.kernelSize(2)
.stride(stride)
.pnorm(pnorm)
.build())
.layer(
Convolution1DNew.builder()
.kernelSize(2)
.rnnDataFormat(RNNFormat.NCW)
.stride(stride)
.nIn(convNOut1)
.nOut(convNOut2)
.build())
.layer(GlobalPoolingLayer.builder().poolingType(PoolingType.AVG).build())
.layer(
OutputLayer.builder()
.lossFunction(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX)
.nOut(finalNOut)
.build())
.inputType(InputType.recurrent(convNIn, length))
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray f = Nd4j.rand(2, convNIn, length);
INDArray fm = Nd4j.create(2, length);
fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, 6)).assign(1);
INDArray label = TestUtils.randomOneHot(2, finalNOut);
boolean gradOK =
GradientCheckUtil.checkGradients(
new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm));
assertTrue(gradOK, s);
TestUtils.testModelSerialization(net);
// TODO also check that masked step values don't impact forward pass, score or gradients
DataSet ds = new DataSet(f, label, fm, null);
double scoreBefore = net.score(ds);
net.setInput(f);
net.setLabels(label);
net.setLayerMaskArrays(fm, null);
net.computeGradientAndScore();
INDArray gradBefore = net.getFlattenedGradients().dup();
f.putScalar(1, 0, 10, 10.0);
f.putScalar(1, 1, 11, 20.0);
double scoreAfter = net.score(ds);
net.setInput(f);
net.setLabels(label);
net.setLayerMaskArrays(fm, null);
net.computeGradientAndScore();
INDArray gradAfter = net.getFlattenedGradients().dup();
assertEquals(scoreBefore, scoreAfter, 1e-6);
assertEquals(gradBefore, gradAfter);
}
}
}
}
@Test
public void testCnn1Causal() throws Exception {
int convNIn = 2;
int convNOut1 = 3;
int convNOut2 = 4;
int finalNOut = 3;
int[] lengths = {11, 12, 13, 9, 10, 11};
int[] kernels = {2, 3, 2, 4, 2, 3};
int[] dilations = {1, 1, 2, 1, 2, 1};
int[] strides = {1, 2, 1, 2, 1, 1};
boolean[] masks = {false, true, false, true, false, true};
boolean[] hasB = {true, false, true, false, true, true};
for (int i = 0; i < lengths.length; i++) {
int length = lengths[i];
int k = kernels[i];
int d = dilations[i];
int st = strides[i];
boolean mask = masks[i];
boolean hasBias = hasB[i];
// TODO has bias
String s = "k=" + k + ", s=" + st + " d=" + d + ", seqLen=" + length;
log.info("Starting test: " + s);
Nd4j.getRandom().setSeed(12345);
NeuralNetConfiguration conf =
NeuralNetConfiguration.builder()
.dataType(DataType.DOUBLE)
.updater(new NoOp())
.activation(Activation.TANH)
.weightInit(new NormalDistribution(0, 1))
.seed(12345)
.layer(
Convolution1DNew.builder()
.kernelSize(k)
.dilation(d)
.hasBias(hasBias)
.convolutionMode(ConvolutionMode.Causal)
.stride(st)
.nOut(convNOut1)
.build())
.layer(
Convolution1DNew.builder()
.kernelSize(k)
.dilation(d)
.convolutionMode(ConvolutionMode.Causal)
.stride(st)
.nOut(convNOut2)
.build())
.layer(
RnnOutputLayer.builder()
.lossFunction(LossFunctions.LossFunction.MCXENT)
.activation(Activation.SOFTMAX)
.nOut(finalNOut)
.build())
.inputType(InputType.recurrent(convNIn, length, RNNFormat.NCW))
.build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray f = Nd4j.rand(DataType.DOUBLE, 2, convNIn, length);
INDArray fm = null;
if (mask) {
fm = Nd4j.create(2, length);
fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1);
fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0, length - 2)).assign(1);
}
long outSize1 = Convolution1DUtils.getOutputSize(length, k, st, 0, ConvolutionMode.Causal, d);
long outSize2 =
Convolution1DUtils.getOutputSize(outSize1, k, st, 0, ConvolutionMode.Causal, d);
INDArray label = TestUtils.randomOneHotTimeSeries(2, finalNOut, (int) outSize2);
String msg =
"Minibatch="
+ 1
+ ", activationFn="
+ Activation.RELU
+ ", kernel = "
+ k;
System.out.println(msg);
for (int j = 0; j < net.getnLayers(); j++)
System.out.println("ILayer " + j + " # params: " + net.getLayer(j).numParams());
boolean gradOK =
GradientCheckUtil.checkGradients(
new GradientCheckUtil.MLNConfig().net(net).input(f).labels(label).inputMask(fm));
assertTrue(gradOK, s);
TestUtils.testModelSerialization(net);
}
}
}

View File

@ -108,8 +108,8 @@ public class CNNGradientCheckTest extends BaseDL4JTest {
.updater(new NoOp()) .updater(new NoOp())
.weightInit(WeightInit.XAVIER) .weightInit(WeightInit.XAVIER)
.seed(12345L) .seed(12345L)
.list()
.layer(0, ConvolutionLayer.builder(1, 1).nOut(6).activation(afn).build()) .layer(0, Convolution2D.builder().kernelSize(1).stride(1).nOut(6).activation(afn).build())
.layer(1, OutputLayer.builder(lf).activation(outputActivation).nOut(3).build()) .layer(1, OutputLayer.builder(lf).activation(outputActivation).nOut(3).build())
.inputType(InputType.convolutionalFlat(1, 4, 1)); .inputType(InputType.convolutionalFlat(1, 4, 1));

View File

@ -24,6 +24,7 @@ import lombok.val;
import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.Convolution2D;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
@ -85,7 +86,7 @@ public class KerasAtrousConvolution2D extends KerasConvolution {
IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(), IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
enforceTrainingConfig, conf, kerasMajorVersion); enforceTrainingConfig, conf, kerasMajorVersion);
val builder = ConvolutionLayer.builder().name(this.name) val builder = Convolution2D.builder().name(this.name)
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
.activation(getIActivationFromConfig(layerConfig, conf)) .activation(getIActivationFromConfig(layerConfig, conf))
.weightInit(init) .weightInit(init)

View File

@ -28,6 +28,7 @@ import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.Convolution2D;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer; import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException; import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
@ -95,7 +96,7 @@ public class KerasConvolution2D extends KerasConvolution {
LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig( LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion); layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
final var builder = ConvolutionLayer.builder().name(this.name) final var builder = Convolution2D.builder().name(this.name)
.nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout) .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
.activation(getIActivationFromConfig(layerConfig, conf)) .activation(getIActivationFromConfig(layerConfig, conf))
.weightInit(init) .weightInit(init)

View File

@ -222,6 +222,14 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
// TODO do not put inside self to avoid serialization issues // TODO do not put inside self to avoid serialization issues
// innerConfigurations.add(0, this); //put this configuration at first place // innerConfigurations.add(0, this); //put this configuration at first place
getLayerConfigurations().stream()
.forEach(
lconf ->
lconf.setNetConfiguration(
this)); // set this as net config for all layers (defined in here, not stacked
/** /**
* Inherit network wide configuration setting to those layer configurations that do not have an * Inherit network wide configuration setting to those layer configurations that do not have an
* individual setting (nor a default) * individual setting (nor a default)
@ -230,11 +238,6 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
lconf.runInheritance(); lconf.runInheritance();
} }
getLayerConfigurations().stream()
.forEach(
lconf ->
lconf.setNetConfiguration(
this)); // set this as net config for all layers (defined in here, not stacked
// Validate BackpropType setting // Validate BackpropType setting
if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH) if ((tbpttBackLength != DEFAULT_TBPTT_LENGTH || tbpttFwdLength != DEFAULT_TBPTT_LENGTH)
@ -326,7 +329,7 @@ public class NeuralNetConfiguration extends NeuralNetBaseBuilderConfiguration {
LayerConfiguration layer = getFlattenedLayerConfigurations().get(i - 1); LayerConfiguration layer = getFlattenedLayerConfigurations().get(i - 1);
// convolution 1d is an edge case where it has rnn input type but the filters // convolution 1d is an edge case where it has rnn input type but the filters
// should be the output // should be the output
if (layer instanceof Convolution1D) { if (layer instanceof Convolution1D || layer instanceof Convolution1DNew) {
if (l instanceof DenseLayer && getInputType() instanceof InputType.InputTypeRecurrent) { if (l instanceof DenseLayer && getInputType() instanceof InputType.InputTypeRecurrent) {
FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l; FeedForwardLayer feedForwardLayer = (FeedForwardLayer) l;
if (getInputType() instanceof InputType.InputTypeRecurrent) { if (getInputType() instanceof InputType.InputTypeRecurrent) {

View File

@ -21,7 +21,13 @@
package org.deeplearning4j.nn.conf; package org.deeplearning4j.nn.conf;
/**
* N is the batch size<br/>
* C is the number of feature maps (that is,, number of channels)<br/>
* H is the image height (not used for 1D conv as this is an RNN format<br/>
* W is the image width<br/>
* **/
public enum RNNFormat implements DataFormat { public enum RNNFormat implements DataFormat {
NCW, /** n=batch size; c=channels/ features; w=width **/ NCW,
NWC /** n=batch size; w=width; c=channels/ features **/ NWC
} }

View File

@ -0,0 +1,142 @@
/*
*
* ******************************************************************************
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*
*/
package org.deeplearning4j.nn.conf.layers;
import java.util.Arrays;
import com.fasterxml.jackson.annotation.JsonIgnore;
import lombok.*;
import lombok.experimental.Accessors;
import lombok.experimental.SuperBuilder;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.util.ValidationUtils;
/**
* ConvolutionLayer nIn in the input layer is the number of channels nOut is the number of filters
* to be used in the net or in other words the channels The builder specifies the filter/kernel
* size, the stride and padding The pooling layer takes the kernel size
*
* <p>Supports multiple dimensions: In 1D CNN, kernel moves in 1 direction. Input and output data of
* 1D CNN is 2 dimensional. Mostly used on Time-Series data.
*
* <p>In 2D CNN, kernel moves in 2 directions. Input and output data of 2D CNN is 3 dimensional.
* Mostly used on Image data.
*
* <p>In 3D CNN, kernel moves in 3 directions. Input and output data of 3D CNN is 4 dimensional.
* Mostly used on 3D Image data (MRI, CT Scans, Video).
*/
@ToString(callSuper = true)
@NoArgsConstructor
@EqualsAndHashCode(callSuper = true)
@Slf4j
@SuperBuilder
public abstract class AbstractConvolutionLayer extends FeedForwardLayer {
/** The kernel of this convolution with size in each n-dimensions */
@Getter private int[] kernelSize;
/** The stride */
@Getter private int[] stride;
/** The padding */
@Getter private int[] padding;
/** The dilation */
@Getter private int[] dilation;
/** If true (default): include bias parameters in the model. False: no bias. */
@Builder.Default
@Getter
@Accessors(fluent = true)
@Setter
private boolean hasBias = true;
/**
* Set the convolution mode for the Convolution layer. See {@link ConvolutionMode} for more
* details Default is {@link ConvolutionMode}.Truncate.
*/
@Builder.Default @Getter @Setter
private ConvolutionMode convolutionMode = ConvolutionMode.Truncate;
/**
* When using CuDNN and an error is encountered, should fallback to the non-CuDNN implementatation
* be allowed? If set to false, an exception in CuDNN will be propagated back to the user. If
* false, the built-in (non-CuDNN) implementation for ConvolutionLayer will be used
*/
@Getter @Setter @Builder.Default private boolean cudnnAllowFallback = true;
/** Defaults to "PREFER_FASTEST", but "NO_WORKSPACE" uses less memory. */
@Getter @Setter @Builder.Default private ConvolutionLayer.AlgoMode cudnnAlgoMode = ConvolutionLayer.AlgoMode.PREFER_FASTEST;
@Getter @Setter private ConvolutionLayer.FwdAlgo cudnnFwdAlgo;
@Getter @Setter private ConvolutionLayer.BwdFilterAlgo cudnnBwdFilterAlgo;
@Getter @Setter private ConvolutionLayer.BwdDataAlgo cudnnBwdDataAlgo;
/**
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
* See {@link CNN2DFormat} for more details.<br>
* Default: NCHW
*
* @param format Format for activations (in and out)
*/
@Builder.Default @Getter @Setter
private CNN2DFormat convFormat =
CNN2DFormat.NCHW; // default value for legacy serialization reasons
/**
* Number of parameters this layer has a result of its configuration.
*
* @return number or parameters
*/
@Override
public long numParams() {
var kern = 1;
for (int i : getKernelSize()) {
kern = kern * i;
}
return nIn * nOut * kern + (hasBias() ? nOut : 0);
}
public abstract static class AbstractConvolutionLayerBuilder<
C extends AbstractConvolutionLayer, B extends AbstractConvolutionLayerBuilder<C, B>>
extends FeedForwardLayerBuilder<C, B> {
public B kernelSize(int @NonNull ... kernelSize) {
if (this.kernelSize != null) {
log.warn("You are setting the kernel more than once, last call with override prior calls.");
}
this.kernelSize = kernelSize;
return self();
}
public B stride(int @NonNull ... stride) {
this.stride = stride;
return self();
}
public B padding(int @NonNull ... padding) {
this.padding = padding;
return self();
}
public B dilation(int @NonNull ... dilation) {
this.dilation = dilation;
return self();
}
}
}

View File

@ -52,6 +52,16 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
@SuperBuilder @SuperBuilder
public abstract class BaseLayerConfiguration extends LayerConfiguration public abstract class BaseLayerConfiguration extends LayerConfiguration
implements ITraininableLayerConfiguration, Serializable, Cloneable { implements ITraininableLayerConfiguration, Serializable, Cloneable {
/**
* Number of parameters this layer has a result of its configuration. This default implementation
* calls {@link #initializer()}.numParams( this ).
*
* @return number or parameters
*/
@Override
public long numParams() {
return initializer().numParams(this);
}
/** /**
* Set constraints to be applied to all layers. Default: no constraints.<br> * Set constraints to be applied to all layers. Default: no constraints.<br>

View File

@ -45,6 +45,7 @@ import org.nd4j.linalg.factory.Nd4j;
@NoArgsConstructor @NoArgsConstructor
public class CapsuleLayer extends SameDiffLayer { public class CapsuleLayer extends SameDiffLayer {
private static final String WEIGHT_PARAM = "weight"; private static final String WEIGHT_PARAM = "weight";
private static final String BIAS_PARAM = "bias"; private static final String BIAS_PARAM = "bias";
/** /**

View File

@ -36,22 +36,17 @@ import org.deeplearning4j.util.ValidationUtils;
import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
/*
//TODO: We will eventually want to NOT subclass off of ConvolutionLayer. //TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
//Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1 //Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
/**
* This approach treats a multivariate time series with L time steps and * This approach treats a multivariate time series with L time steps and
* P variables as an L x 1 x P image (L rows high, 1 column wide, P * P variables as an L x 1 x P image (L rows high, 1 column wide, P
* channels deep). The kernel should be H<L pixels high and W=1 pixels * channels deep). The kernel should be H<L pixels high and W=1 pixels
* wide. * wide.
*
In 1D CNN, kernel moves in 1 direction. * In 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions.
Input and output data of 1D CNN is 2 dimensional. Mostly used on Time-Series data. * Input and output data of 1D CNN is 2-dimensional. Mostly used on Time-Series data.
In 2D CNN, kernel moves in 2 directions.
Input and output data of 2D CNN is 3 dimensional. Mostly used on Image data.
In 3D CNN, kernel moves in 3 directions.
Input and output data of 3D CNN is 4 dimensional. Mostly used on 3D Image data (MRI, CT Scans, Video).
*/ */
@Data @Data
@ToString(callSuper = true) @ToString(callSuper = true)
@ -223,7 +218,7 @@ public class Convolution1D extends ConvolutionLayer {
} }
public abstract static class Convolution1DBuilder< public abstract static class Convolution1DBuilder<
C extends ConvolutionLayer, B extends Convolution1DBuilder<C, B>> C extends Convolution1D, B extends Convolution1DBuilder<C, B>>
extends ConvolutionLayerBuilder<C, B> { extends ConvolutionLayerBuilder<C, B> {
@Override @Override

View File

@ -0,0 +1,250 @@
/*
* ******************************************************************************
* *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*/
package org.deeplearning4j.nn.conf.layers;
import java.util.Collection;
import java.util.Map;
import lombok.*;
import lombok.experimental.SuperBuilder;
import lombok.extern.jackson.Jacksonized;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.RNNFormat;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.params.ConvolutionNewParamInitializer;
import org.deeplearning4j.optimize.api.TrainingListener;
import org.deeplearning4j.util.Convolution1DUtils;
import org.deeplearning4j.util.ValidationUtils;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
// TODO: We will eventually want to NOT subclass off of ConvolutionLayer.
// Currently, we just subclass off the ConvolutionLayer and hard code the "width" dimension to 1
/**
* This approach treats a multivariate time series with L time steps and P variables as an L x 1 x P
* image (L rows high, 1 column wide, P channels deep). The kernel should be H<L pixels high and W=1
* pixels wide.
*
* <p>In 1D CNN, kernel moves in 1 direction. The kernel has 2-dimensions. Input and output data of
* 1D CNN is 2-dimensional. Mostly used on Time-Series data.
*/
@Data
@Slf4j
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true)
@Jacksonized
@SuperBuilder
public class Convolution1DNew extends AbstractConvolutionLayer {
/**
* Set the data format for the CNN activations - NCHW (channels first) or NHWC (channels last).
* See {@link CNN2DFormat} for more details.<br>
* Default: NCHW
*
* @param format Format for activations (in and out)
*/
@Builder.Default
protected CNN2DFormat dataFormat =
CNN2DFormat.NCHW; // default value for legacy serialization reasons
@Builder.Default private RNNFormat rnnDataFormat = RNNFormat.NCW;
@Override
public ParamInitializer initializer() {
return ConvolutionNewParamInitializer.getInstance();
}
@Override
public org.deeplearning4j.nn.api.Layer instantiate(
NeuralNetConfiguration conf,
Collection<TrainingListener> trainingListeners,
int layerIndex,
INDArray layerParamsView,
boolean initializeParams,
DataType networkDataType) {
setNetConfiguration(conf);
LayerValidation.assertNInNOutSet("Convolution1D", getName(), layerIndex, getNIn(), getNOut());
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
/*
Object ret;
try {
ret = lconf.getCanConfigure()
.getConstructor(LayerConfiguration.class, DataType.class)
.newInstance(new Object[] { lconf, networkDataType });
} catch (Exception e) {
throw new RuntimeException(e);
*/
org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer ret =
new org.deeplearning4j.nn.layers.convolution.Convolution1DNewLayer(lconf, networkDataType);
ret.addTrainingListeners(trainingListeners);
ret.setIndex(layerIndex);
ret.setParamsViewArray(layerParamsView);
Map<String, INDArray> paramTable = initializer().init(this, layerParamsView, initializeParams);
ret.setParamTable(paramTable);
ret.setLayerConfiguration(this);
return ret;
}
@Override
public InputType getOutputType(int layerIndex, InputType inputType) {
if (inputType == null || inputType.getType() != InputType.Type.RNN) {
throw new IllegalStateException(
"Invalid input for 1D CNN layer (layer index = "
+ layerIndex
+ ", layer name = \""
+ getName()
+ "\"): expect RNN input type with size > 0. Got: "
+ inputType);
}
InputType.InputTypeRecurrent it = (InputType.InputTypeRecurrent) inputType;
long inputTsLength = it.getTimeSeriesLength();
long outLength;
if (inputTsLength < 0) {
// Probably: user did InputType.recurrent(x) without specifying sequence length
outLength = -1;
} else {
outLength =
Convolution1DUtils.getOutputSize(
inputTsLength,
getKernelSize()[0],
getStride()[0],
getPadding()[0],
getConvolutionMode(),
getDilation()[0]);
}
return InputType.recurrent(nOut, outLength, rnnDataFormat);
}
@Override
public void setNIn(InputType inputType, boolean override) {
if (inputType == null || inputType.getType() != InputType.Type.RNN) {
throw new IllegalStateException(
"Invalid input for 1D CNN layer (layer name = \""
+ getName()
+ "\"): expect RNN input type with size > 0. Got: "
+ inputType);
}
InputType.InputTypeRecurrent r = (InputType.InputTypeRecurrent) inputType;
if (nIn <= 0 || override) {
this.nIn = r.getSize();
}
if (this.rnnDataFormat == null || override) this.rnnDataFormat = r.getFormat();
if (this.dataFormat == null || override)
this.dataFormat = rnnDataFormat == RNNFormat.NCW ? CNN2DFormat.NCHW : CNN2DFormat.NHWC;
}
@Override
public InputPreProcessor getPreProcessorForInputType(InputType inputType) {
if (inputType == null) {
throw new IllegalStateException(
"Invalid input for Convolution1D layer (layer name=\""
+ getName()
+ "\"): input is null");
}
return InputTypeUtil.getPreprocessorForInputTypeRnnLayers(inputType, rnnDataFormat, getName());
}
/**
* This is a report of the estimated memory consumption for the given layer
*
* @param inputType Input type to the layer. Memory consumption is often a function of the input
* type
* @return Memory report for the layer
*/
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
return null;
}
protected boolean allowCausal() {
return true;
}
private static final class Convolution1DNewBuilderImpl
extends Convolution1DNewBuilder<Convolution1DNew, Convolution1DNewBuilderImpl> {
public Convolution1DNew build() {
Convolution1DNew l = new Convolution1DNew(this);
if (l.getDilation() == null) {
dilation(1, 1);
}
if (l.getPadding() == null) {
padding(0);
}
l = new Convolution1DNew(this);
Convolution1DUtils.validateConvolutionModePadding(l.getConvolutionMode(), l.getPadding()[0]);
Convolution1DUtils.validateCnn1DKernelStridePadding(
l.getKernelSize()[0], l.getStride()[0], l.getPadding()[0]);
l.initializeConstraints();
return l;
}
}
public abstract static class Convolution1DNewBuilder<
C extends Convolution1DNew, B extends Convolution1DNewBuilder<C, B>>
extends AbstractConvolutionLayerBuilder<C, B> {
private int dimensions(Class arrayType) {
return arrayType.isArray() ? 1 + dimensions(arrayType.getComponentType()) : 0;
}
@Override
public B kernelSize(int @NonNull ... kernel) {
// Todo, we always provide arrays, but only first element is really used
if (dimensions(kernel.getClass()) > 1)
log.warn(
"Kernel size has '{}' dimensions, only using first dimensions for 1D convolution layer.",
dimensions(kernel.getClass()));
super.kernelSize(
ValidationUtils.validate1NonNegative(new int[] {kernel[0]}, "kernelSize")[0], 1);
return self();
}
public B padding(int @NonNull ... padding) {
// Todo, we always provide arrays, but only first element is really used
super.padding(ValidationUtils.validate1NonNegative(new int[] {padding[0]}, "padding"));
return self();
}
public B dilation(int @NonNull ... dilation) {
// Todo, we always provide arrays, but only first element is really used
super.dilation(ValidationUtils.validate1NonNegative(new int[] {dilation[0]}, "dilation"));
return self();
}
public B stride(int @NonNull ... stride) {
// Todo, we always provide arrays, but only first element is really used
super.stride(ValidationUtils.validate1NonNegative(new int[] {stride[0]}, "stride")[0], 1);
return self();
}
}
}

View File

@ -61,6 +61,23 @@ import org.nd4j.linalg.api.ndarray.INDArray;
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@SuperBuilder(builderMethodName = "innerBuilder") @SuperBuilder(builderMethodName = "innerBuilder")
public class ConvolutionLayer extends FeedForwardLayer { public class ConvolutionLayer extends FeedForwardLayer {
public static ConvolutionLayerBuilder<?, ?> builder() {
return innerBuilder();
}
public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
return innerBuilder().kernelSize(kernelSize);
}
public static ConvolutionLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
return innerBuilder().kernelSize(kernelSize).stride(stride);
}
public static ConvolutionLayerBuilder<?, ?> builder(
int[] kernelSize, int[] stride, int[] padding) {
return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding);
}
/** /**
* Size of the convolution rows/columns * Size of the convolution rows/columns
* *
@ -122,23 +139,6 @@ public class ConvolutionLayer extends FeedForwardLayer {
@Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter @Builder.Default @JsonIgnore @EqualsAndHashCode.Exclude @Getter @Setter
private boolean defaultValueOverriden = false; private boolean defaultValueOverriden = false;
public static ConvolutionLayerBuilder<?, ?> builder() {
return innerBuilder();
}
public static ConvolutionLayerBuilder<?, ?> builder(int... kernelSize) {
return innerBuilder().kernelSize(kernelSize);
}
public static ConvolutionLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
return innerBuilder().kernelSize(kernelSize).stride(stride);
}
public static ConvolutionLayerBuilder<?, ?> builder(
int[] kernelSize, int[] stride, int[] padding) {
return innerBuilder().kernelSize(kernelSize).stride(stride).padding(padding);
}
public boolean hasBias() { public boolean hasBias() {
return hasBias; return hasBias;
} }
@ -429,6 +429,7 @@ public class ConvolutionLayer extends FeedForwardLayer {
} }
} }
/*
private static final class ConvolutionLayerBuilderImpl private static final class ConvolutionLayerBuilderImpl
extends ConvolutionLayerBuilder<ConvolutionLayer, ConvolutionLayerBuilderImpl> { extends ConvolutionLayerBuilder<ConvolutionLayer, ConvolutionLayerBuilderImpl> {
public ConvolutionLayer build() { public ConvolutionLayer build() {
@ -473,6 +474,6 @@ public class ConvolutionLayer extends FeedForwardLayer {
return l; return l;
} }
} }
*/
} }

View File

@ -38,56 +38,24 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
/** /**
* Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of filters to be used * Deconvolution2D layer nIn in the input layer is the number of channels nOut is the number of
* in the net or in other words the channels The builder specifies the filter/kernel size, the stride and padding * filters to be used in the net or in other words the channels The builder specifies the
* The pooling layer takes the kernel size * filter/kernel size, the stride and padding The pooling layer takes the kernel size
*/ */
@Data @Data
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@Jacksonized @SuperBuilder(builderMethodName = "innerBuilder") @Jacksonized
@SuperBuilder
public class Deconvolution2D extends ConvolutionLayer { public class Deconvolution2D extends ConvolutionLayer {
@Builder.Default private CNN2DFormat format = CNN2DFormat.NCHW;
@Builder.Default
private CNN2DFormat format = CNN2DFormat.NCHW;
protected boolean allowCausal() { protected boolean allowCausal() {
// Causal convolution - allowed for 1D only // Causal convolution - allowed for 1D only
return false; return false;
} }
private static final class Deconvolution2DBuilderImpl extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
public Deconvolution2D build() {
Deconvolution2D l = new Deconvolution2D(this);
l.initializeConstraints();
return l;
}
}
public static abstract class Deconvolution2DBuilder<C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>> extends ConvolutionLayerBuilder<C, B> {
@Override
public B kernelSize(int... kernelSize) {
super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
return self();
}
@Override
public B stride(int... stride) {
super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
return self();
}
@Override
public B padding(int... padding) {
super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
return self();
}
@Override
public B dilation(int... dilation) {
super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
return self();
}
}
public boolean hasBias() { public boolean hasBias() {
return isHasBias(); return isHasBias();
} }
@ -108,8 +76,13 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
} }
@Override @Override
public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners, public Layer instantiate(
int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { NeuralNetConfiguration conf,
Collection<TrainingListener> trainingListeners,
int layerIndex,
INDArray layerParamsView,
boolean initializeParams,
DataType networkDataType) {
setNetConfiguration(conf); setNetConfiguration(conf);
LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut()); LayerValidation.assertNInNOutSet("Deconvolution2D", getName(), layerIndex, getNIn(), getNOut());
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
@ -135,13 +108,61 @@ private CNN2DFormat format = CNN2DFormat.NCHW;
@Override @Override
public InputType getOutputType(int layerIndex, InputType inputType) { public InputType getOutputType(int layerIndex, InputType inputType) {
if (inputType == null || inputType.getType() != InputType.Type.CNN) { if (inputType == null || inputType.getType() != InputType.Type.CNN) {
throw new IllegalStateException("Invalid input for Convolution layer (layer name=\"" + getName() throw new IllegalStateException(
+ "\"): Expected CNN input, got " + inputType); "Invalid input for Convolution layer (layer name=\""
+ getName()
+ "\"): Expected CNN input, got "
+ inputType);
} }
return InputTypeUtil.getOutputTypeDeconvLayer(inputType, getKernelSize(), getStride(), getPadding(), getDilation(), getConvolutionMode(), return InputTypeUtil.getOutputTypeDeconvLayer(
nOut, layerIndex, getName(), Deconvolution2DLayer.class); inputType,
getKernelSize(),
getStride(),
getPadding(),
getDilation(),
getConvolutionMode(),
nOut,
layerIndex,
getName(),
Deconvolution2DLayer.class);
} }
private static final class Deconvolution2DBuilderImpl
extends Deconvolution2DBuilder<Deconvolution2D, Deconvolution2DBuilderImpl> {
public Deconvolution2D build() {
Deconvolution2D l = new Deconvolution2D(this);
l.initializeConstraints();
return l;
}
}
public abstract static class Deconvolution2DBuilder<
C extends Deconvolution2D, B extends Deconvolution2DBuilder<C, B>>
extends ConvolutionLayerBuilder<C, B> {
@Override
public B kernelSize(int... kernelSize) {
super.kernelSize(ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"));
return self();
}
@Override
public B stride(int... stride) {
super.stride(ValidationUtils.validate2NonNegative(stride, false, "stride"));
return self();
}
@Override
public B padding(int... padding) {
super.padding(ValidationUtils.validate2NonNegative(padding, false, "padding"));
return self();
}
@Override
public B dilation(int... dilation) {
super.dilation(ValidationUtils.validate2NonNegative(dilation, false, "dilation"));
return self();
}
}
} }

View File

@ -63,6 +63,7 @@ public class DenseLayer extends FeedForwardLayer {
LayerValidation.assertNInNOutSet( LayerValidation.assertNInNOutSet(
"DenseLayerConfiguration", getName(), layerIndex, getNIn(), getNOut()); "DenseLayerConfiguration", getName(), layerIndex, getNIn(), getNOut());
LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
lconf.setNetConfiguration(conf);
runInheritance(); runInheritance();
org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret = org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret =

View File

@ -31,6 +31,7 @@ import lombok.experimental.SuperBuilder;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import net.brutex.ai.dnn.api.ILayerConfiguration; import net.brutex.ai.dnn.api.ILayerConfiguration;
import net.brutex.ai.dnn.api.LayerType; import net.brutex.ai.dnn.api.LayerType;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.api.layers.LayerConstraint; import org.deeplearning4j.nn.api.layers.LayerConstraint;
import org.deeplearning4j.nn.conf.InputPreProcessor; import org.deeplearning4j.nn.conf.InputPreProcessor;
@ -56,7 +57,7 @@ import org.nd4j.linalg.learning.regularization.Regularization;
@NoArgsConstructor @NoArgsConstructor
// @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id") // @JsonIdentityInfo(generator= ObjectIdGenerators.IntSequenceGenerator.class, property="@id")
@Slf4j @Slf4j
@SuperBuilder @SuperBuilder(toBuilder = true)
public abstract class LayerConfiguration public abstract class LayerConfiguration
implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration implements ILayerConfiguration, Serializable, Cloneable { // ITrainableLayerConfiguration
@ -66,10 +67,20 @@ public abstract class LayerConfiguration
@Getter @Setter protected List<LayerConstraint> biasConstraints; @Getter @Setter protected List<LayerConstraint> biasConstraints;
@Getter @Setter protected List<LayerConstraint> constraints; @Getter @Setter protected List<LayerConstraint> constraints;
@Getter @Setter protected IWeightNoise weightNoise; @Getter @Setter protected IWeightNoise weightNoise;
@Builder.Default private @Getter @Setter LinkedHashSet<String> variables = new LinkedHashSet<>(); @Builder.Default private @Getter @Setter @NonNull LinkedHashSet<String> variables = new LinkedHashSet<>();
@Getter @Setter private IDropout dropOut; @Getter @Setter private IDropout dropOut;
/** The type of the layer, basically defines the base class and its properties */ /** The type of the layer, basically defines the base class and its properties */
@Builder.Default @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN; @Builder.Default @Getter @Setter @NonNull private LayerType type = LayerType.UNKNOWN;
/**
* Number of parameters this layer has a result of its configuration
* @return number or parameters
*/
public long numParams() {
return initializer().numParams(this);
}
/** /**
* A reference to the neural net configuration. This field is excluded from json serialization as * A reference to the neural net configuration. This field is excluded from json serialization as
* well as from equals check to avoid circular referenced. * well as from equals check to avoid circular referenced.

View File

@ -22,6 +22,8 @@ package org.deeplearning4j.nn.conf.layers;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import java.util.*; import java.util.*;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.*; import lombok.*;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import lombok.extern.jackson.Jacksonized; import lombok.extern.jackson.Jacksonized;
@ -59,10 +61,12 @@ public class LocallyConnected1D extends SameDiffLayer {
/** /**
* @param nIn Number of inputs to the layer (input size) * @param nIn Number of inputs to the layer (input size)
*/ */
@JsonProperty("nIn")
private long nIn; private long nIn;
/** /**
* @param nOut Number of outputs (output size) * @param nOut Number of outputs (output size)
*/ */
@JsonProperty("nOut")
private long nOut; private long nOut;
/** /**
* @param activation Activation function for the layer * @param activation Activation function for the layer

View File

@ -34,6 +34,16 @@ import org.nd4j.linalg.learning.regularization.Regularization;
@SuperBuilder @SuperBuilder
public abstract class NoParamLayer extends LayerConfiguration { public abstract class NoParamLayer extends LayerConfiguration {
/**
* Number of parameters this layer. This will always return 0
*
* @return 0
*/
@Override
public long numParams() {
return 0;
}
@Override @Override
public ParamInitializer initializer() { public ParamInitializer initializer() {
return EmptyParamInitializer.getInstance(); return EmptyParamInitializer.getInstance();
@ -58,6 +68,7 @@ public abstract class NoParamLayer extends LayerConfiguration {
/** /**
* Will always return no-Op updater. * Will always return no-Op updater.
*
* @return * @return
*/ */
@Override @Override
@ -65,7 +76,7 @@ public abstract class NoParamLayer extends LayerConfiguration {
return Updater.NONE.getIUpdaterWithDefaultConfig(); return Updater.NONE.getIUpdaterWithDefaultConfig();
} }
public static abstract class NoParamLayerBuilder<C extends NoParamLayer, B extends NoParamLayerBuilder<C,B>> public abstract static class NoParamLayerBuilder<
extends LayerConfigurationBuilder<C,B> C extends NoParamLayer, B extends NoParamLayerBuilder<C, B>>
{} extends LayerConfigurationBuilder<C, B> {}
} }

View File

@ -23,6 +23,7 @@ package org.deeplearning4j.nn.conf.layers;
import java.util.Collection; import java.util.Collection;
import java.util.Map; import java.util.Map;
import lombok.EqualsAndHashCode; import lombok.EqualsAndHashCode;
import lombok.NonNull;
import lombok.ToString; import lombok.ToString;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import lombok.extern.jackson.Jacksonized; import lombok.extern.jackson.Jacksonized;
@ -35,6 +36,7 @@ import org.deeplearning4j.optimize.api.TrainingListener;
import org.deeplearning4j.util.Convolution1DUtils; import org.deeplearning4j.util.Convolution1DUtils;
import org.deeplearning4j.util.Convolution2DUtils; import org.deeplearning4j.util.Convolution2DUtils;
import org.deeplearning4j.util.ValidationUtils; import org.deeplearning4j.util.ValidationUtils;
import org.jetbrains.annotations.NotNull;
import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
@ -50,9 +52,91 @@ import org.nd4j.linalg.api.ndarray.INDArray;
@ToString(callSuper = true) @ToString(callSuper = true)
@EqualsAndHashCode(callSuper = true) @EqualsAndHashCode(callSuper = true)
@Jacksonized @Jacksonized
@SuperBuilder @SuperBuilder(builderMethodName = "innerBuilder")
public class Subsampling1DLayer extends SubsamplingLayer { public class Subsampling1DLayer extends SubsamplingLayer {
public static Subsampling1DLayerBuilder<?, ?> builder() {
return innerBuilder();
}
public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType) {
return innerBuilder()
.poolingType(poolingType);
}
public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType) {
return innerBuilder()
.poolingType(poolingType.toPoolingType());
}
public static Subsampling1DLayerBuilder<?, ?> builder(int... kernelSize) {
return innerBuilder()
.kernelSize(kernelSize);
}
public static Subsampling1DLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride) {
return innerBuilder()
.kernelSize(kernelSize)
.stride(stride);
}
public static Subsampling1DLayerBuilder<?, ?> builder(int[] kernelSize, int[] stride, int[] padding) {
return innerBuilder()
.kernelSize(kernelSize)
.stride(stride)
.padding(padding);
}
public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) {
return innerBuilder()
.poolingType(poolingType.toPoolingType())
.kernelSize(kernelSize)
.stride(stride)
.padding(padding)
;
}
public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride, int[] padding) {
return innerBuilder()
.poolingType(poolingType)
.kernelSize(kernelSize)
.stride(stride)
.padding(padding)
;
}
public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize) {
return innerBuilder()
.poolingType(poolingType)
.kernelSize(kernelSize)
;
}
public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize) {
return innerBuilder()
.poolingType(poolingType.toPoolingType())
.kernelSize(kernelSize)
;
}
public static Subsampling1DLayerBuilder<?, ?> builder(org.deeplearning4j.nn.conf.layers.PoolingType poolingType, int[] kernelSize, int[] stride) {
return innerBuilder()
.poolingType(poolingType)
.kernelSize(kernelSize)
.stride(stride)
;
}
public static Subsampling1DLayerBuilder<?, ?> builder(PoolingType poolingType, int[] kernelSize, int[] stride) {
return innerBuilder()
.poolingType(poolingType.toPoolingType())
.kernelSize(kernelSize)
.stride(stride)
;
}
@Override @Override
public org.deeplearning4j.nn.api.Layer instantiate( public org.deeplearning4j.nn.api.Layer instantiate(
NeuralNetConfiguration conf, NeuralNetConfiguration conf,
@ -176,20 +260,20 @@ public class Subsampling1DLayer extends SubsamplingLayer {
* @return * @return
*/ */
@Override @Override
public B kernelSize(int... kernelSize) { public B kernelSize(int @NonNull ... kernelSize) {
super.kernelSize( ValidationUtils.validate1NonNegative(kernelSize, "kernelSize")[0]); super.kernelSize(ValidationUtils.validate1NonNegative(new int[]{kernelSize[0]}, "kernelSize")[0]); //fix width = 1
return self(); return self();
} }
@Override @Override
public B stride(int... stride) { public B stride(@NotNull int... stride) {
super.stride( ValidationUtils.validate1NonNegative(stride, "stride")[0]); super.stride( ValidationUtils.validate1NonNegative(new int[]{stride[0]}, "stride")[0]);
return self(); return self();
} }
@Override @Override
public B padding(int... padding) { public B padding(@NotNull int... padding) {
super.padding( ValidationUtils.validate1NonNegative(padding, "padding")[0]); super.padding( ValidationUtils.validate1NonNegative(new int[]{padding[0]}, "padding"));
return self(); return self();
} }
} }

View File

@ -27,10 +27,7 @@ import lombok.*;
import lombok.experimental.SuperBuilder; import lombok.experimental.SuperBuilder;
import lombok.extern.jackson.Jacksonized; import lombok.extern.jackson.Jacksonized;
import org.deeplearning4j.nn.api.ParamInitializer; import org.deeplearning4j.nn.api.ParamInitializer;
import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.*;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.InputPreProcessor;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.inputs.InputType; import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.memory.LayerMemoryReport; import org.deeplearning4j.nn.conf.memory.LayerMemoryReport;
import org.deeplearning4j.nn.conf.memory.MemoryReport; import org.deeplearning4j.nn.conf.memory.MemoryReport;
@ -84,6 +81,7 @@ public class SubsamplingLayer extends NoParamLayer {
* @param padding padding in the height and width dimensions * @param padding padding in the height and width dimensions
*/ */
@Builder.Default protected int[] padding = new int[] {0, 0}; @Builder.Default protected int[] padding = new int[] {0, 0};
protected int pnorm; protected int pnorm;
@Builder.Default protected double eps = 1e-8; @Builder.Default protected double eps = 1e-8;
/** /**
@ -104,6 +102,7 @@ public class SubsamplingLayer extends NoParamLayer {
*/ */
protected @Builder.Default CNN2DFormat dataFormat = protected @Builder.Default CNN2DFormat dataFormat =
CNN2DFormat.NCHW; // default value for legacy reasons CNN2DFormat.NCHW; // default value for legacy reasons
protected @Builder.Default RNNFormat rnnFormat = RNNFormat.NCW;
/** /**
* When doing average pooling, should the padding values be included in the divisor or not?<br> * When doing average pooling, should the padding values be included in the divisor or not?<br>
* Not applicable for max and p-norm pooling.<br> * Not applicable for max and p-norm pooling.<br>
@ -127,6 +126,7 @@ public class SubsamplingLayer extends NoParamLayer {
* average pooling * average pooling
*/ */
@Builder.Default protected boolean avgPoolIncludePadInDivisor = true; @Builder.Default protected boolean avgPoolIncludePadInDivisor = true;
/** /**
* Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated * Kernel dilation. Default: {1, 1}, which is standard convolutions. Used for implementing dilated
* convolutions, which are also known as atrous convolutions.<br> * convolutions, which are also known as atrous convolutions.<br>
@ -301,7 +301,7 @@ public class SubsamplingLayer extends NoParamLayer {
public void setNIn(InputType inputType, boolean override) { public void setNIn(InputType inputType, boolean override) {
// No op: subsampling layer doesn't have nIn value // No op: subsampling layer doesn't have nIn value
if (!defaultValueOverridden || override) { if (!defaultValueOverridden || override) {
this.dataFormat = ((InputType.InputTypeConvolutional) inputType).getFormat(); this.rnnFormat = ((InputType.InputTypeRecurrent) inputType).getFormat();
defaultValueOverridden = true; defaultValueOverridden = true;
} }
} }
@ -355,14 +355,6 @@ public class SubsamplingLayer extends NoParamLayer {
.build(); .build();
} }
public int getPnorm() {
return pnorm;
}
public double getEps() {
return eps;
}
public enum PoolingType { public enum PoolingType {
MAX, MAX,
AVG, AVG,
@ -394,33 +386,33 @@ public class SubsamplingLayer extends NoParamLayer {
return self(); return self();
} }
public B eps(int eps) { public B eps(double eps) {
ValidationUtils.validateNonNegative(eps, "eps"); ValidationUtils.validateNonNegative(eps, "eps");
this.eps$value = eps; this.eps$value = eps;
this.eps$set = true; this.eps$set = true;
return self(); return self();
} }
public B kernelSize(int... kernelSize) { public B kernelSize(int @NonNull... kernelSize) {
this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, false, "kernelSize"); this.kernelSize$value = ValidationUtils.validate2NonNegative(kernelSize, true, "kernelSize");
this.kernelSize$set = true; this.kernelSize$set = true;
return self(); return self();
} }
public B stride(int... stride) { public B stride(int @NonNull ... stride) {
this.stride$value = ValidationUtils.validate2NonNegative(stride, false, "stride"); this.stride$value = ValidationUtils.validate2NonNegative(stride, true, "stride");
this.stride$set = true; this.stride$set = true;
return self(); return self();
} }
public B padding(int... padding) { public B padding(int @NonNull ... padding) {
this.padding$value = ValidationUtils.validate2NonNegative(padding, false, "padding"); this.padding$value = ValidationUtils.validate2NonNegative(padding, true, "padding");
this.padding$set = true; this.padding$set = true;
return self(); return self();
} }
public B dilation(int... dilation) { public B dilation(int @NonNull ... dilation) {
this.dilation$value = ValidationUtils.validate2NonNegative(dilation, false, "dilation"); this.dilation$value = ValidationUtils.validate2NonNegative(dilation, true, "dilation");
this.dilation$set = true; this.dilation$set = true;
return self(); return self();
} }

View File

@ -74,6 +74,7 @@ public class FrozenLayer extends LayerConfiguration {
boolean initializeParams, boolean initializeParams,
DataType networkDataType) { DataType networkDataType) {
innerConfiguration.setNetConfiguration(conf);
// Need to be able to instantiate a layer, from a config - for JSON -> net type situations // Need to be able to instantiate a layer, from a config - for JSON -> net type situations
org.deeplearning4j.nn.api.Layer underlying = org.deeplearning4j.nn.api.Layer underlying =
innerConfiguration.instantiate( innerConfiguration.instantiate(

View File

@ -20,6 +20,7 @@
package org.deeplearning4j.nn.conf.layers.samediff; package org.deeplearning4j.nn.conf.layers.samediff;
import com.fasterxml.jackson.annotation.JsonIgnore;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -52,7 +53,8 @@ import org.nd4j.linalg.learning.regularization.WeightDecay;
@EqualsAndHashCode(callSuper = true, doNotUseGetters = true) @EqualsAndHashCode(callSuper = true, doNotUseGetters = true)
@NoArgsConstructor @NoArgsConstructor
@SuperBuilder @SuperBuilder
public abstract class AbstractSameDiffLayer extends LayerConfiguration { public abstract class AbstractSameDiffLayer extends LayerConfiguration
implements org.deeplearning4j.nn.api.ITraininableLayerConfiguration {
/** /**
* The regularization for the parameters (excluding biases) - for example {@link WeightDecay} * The regularization for the parameters (excluding biases) - for example {@link WeightDecay}
@ -63,16 +65,14 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
* @param regularization Regularization to apply for the network parameters/weights (excluding * @param regularization Regularization to apply for the network parameters/weights (excluding
* biases) * biases)
*/ */
@Getter @Getter protected List<Regularization> regularization;
protected List<Regularization> regularization;
/** /**
* The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the * The regularization for the biases only - for example {@link WeightDecay} -- SETTER -- Set the
* regularization for the biases only - for example {@link WeightDecay} * regularization for the biases only - for example {@link WeightDecay}
* *
* @param regularizationBias Regularization to apply for the network biases only * @param regularizationBias Regularization to apply for the network biases only
*/ */
@Getter @Getter protected List<Regularization> regularizationBias;
protected List<Regularization> regularizationBias;
/** /**
* Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link * Gradient updater. For example, {@link org.nd4j.linalg.learning.config.Adam} or {@link
* org.nd4j.linalg.learning.config.Nesterovs} * org.nd4j.linalg.learning.config.Nesterovs}
@ -87,20 +87,22 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
* @param biasUpdater Updater to use for bias parameters * @param biasUpdater Updater to use for bias parameters
*/ */
protected @Getter @Setter IUpdater biasUpdater; protected @Getter @Setter IUpdater biasUpdater;
@Getter @Setter
protected GradientNormalization gradientNormalization; @Getter @Setter protected GradientNormalization gradientNormalization;
@Getter @Setter @Getter @Setter protected double gradientNormalizationThreshold = Double.NaN;
protected double gradientNormalizationThreshold = Double.NaN; @Getter @Setter private SDLayerParams layerParams;
@Getter @Setter
private SDLayerParams layerParams; @Getter @Setter private DataType dataType;
@Override @Override
public void runInheritance(@NotNull NeuralNetConfiguration conf) { public void runInheritance(@NotNull NeuralNetConfiguration conf) {
super.runInheritance(conf); super.runInheritance(conf);
if (this.biasUpdater == null) this.biasUpdater = conf.getBiasUpdater(); if (this.biasUpdater == null) this.biasUpdater = conf.getBiasUpdater();
if (this.updater == null) this.updater = conf.getUpdater(); if (this.updater == null) this.updater = conf.getUpdater();
if (this.regularizationBias == null || regularizationBias.isEmpty()) this.regularizationBias = conf.getRegularizationBias(); if (this.regularizationBias == null || regularizationBias.isEmpty())
if (this.regularization == null || regularization.isEmpty()) this.regularization = conf.getRegularization(); this.regularizationBias = conf.getRegularizationBias();
if (this.regularization == null || regularization.isEmpty())
this.regularization = conf.getRegularization();
// if( this.weightInit == null) this.weightInit = conf.getWeightInit(); // if( this.weightInit == null) this.weightInit = conf.getWeightInit();
if (this.gradientNormalization == null) if (this.gradientNormalization == null)
this.gradientNormalization = conf.getGradientNormalization(); this.gradientNormalization = conf.getGradientNormalization();
@ -109,6 +111,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
this.gradientNormalizationThreshold = conf.getGradientNormalizationThreshold(); this.gradientNormalizationThreshold = conf.getGradientNormalizationThreshold();
} }
} }
@Override @Override
public List<Regularization> getRegularizationByParam(String paramName) { public List<Regularization> getRegularizationByParam(String paramName) {
if (layerParams.isWeightParam(paramName)) { if (layerParams.isWeightParam(paramName)) {
@ -119,6 +122,7 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
return null; return null;
} }
@JsonIgnore
public SDLayerParams getLayerParams() { public SDLayerParams getLayerParams() {
if (layerParams == null) { if (layerParams == null) {
layerParams = new SDLayerParams(); layerParams = new SDLayerParams();
@ -138,7 +142,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
return null; return null;
} }
/** /**
* Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String, * Define the parameters for the network. Use {@link SDLayerParams#addWeightParam(String,
* long...)} and {@link SDLayerParams#addBiasParam(String, long...)} * long...)} and {@link SDLayerParams#addBiasParam(String, long...)}
@ -207,7 +210,6 @@ public abstract class AbstractSameDiffLayer extends LayerConfiguration {
fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array); fanIn, fanOut, array.shape(), weightInit, null, paramReshapeOrder(null), array);
} }
/** /**
* This method generates an "all ones" mask array for use in the SameDiff model when none is * This method generates an "all ones" mask array for use in the SameDiff model when none is
* provided. * provided.

View File

@ -80,10 +80,8 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
protected DataType dataType; protected DataType dataType;
protected @Getter @Setter int iterationCount; protected @Getter @Setter int iterationCount;
protected @Getter @Setter int epochCount; protected @Getter @Setter int epochCount;
@JsonIgnore @JsonIgnore private @Getter @Setter IModel net;
private @Getter @Setter IModel net; @JsonIgnore @Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration;
@JsonIgnore
@Getter @Setter @NonNull private NeuralNetConfiguration netConfiguration;
public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) { public AbstractLayer(@NonNull LayerConfiguration layerConf, @NonNull DataType dataType) {
//noinspection unchecked //noinspection unchecked
@ -95,19 +93,18 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
} }
this.dataType = dataType; this.dataType = dataType;
if (layerConfiguration.getNetConfiguration() == null) { if (layerConfiguration.getNetConfiguration() == null) {
throw new RuntimeException("You cannot create a layer from a layer configuration, that is not part of any neural network configuration."); throw new RuntimeException(
"You cannot create a layer from a layer configuration, that is not part of any neural network configuration.");
} }
this.net = layerConfiguration.getNetConfiguration().getNet(); this.net = layerConfiguration.getNetConfiguration().getNet();
} }
public void addTrainingListeners(TrainingListener... listeners) { public void addTrainingListeners(TrainingListener... listeners) {
if(listeners != null) if (listeners != null) trainingListeners.addAll(List.of(listeners));
trainingListeners.addAll(List.of(listeners));
} }
public void addTrainingListeners(Collection<TrainingListener> listeners) { public void addTrainingListeners(Collection<TrainingListener> listeners) {
if(listeners != null) if (listeners != null) trainingListeners.addAll(listeners);
trainingListeners.addAll(listeners);
} }
@Override @Override
@ -565,7 +562,8 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
*/ */
@Override @Override
public void setParamTable(Map<String, INDArray> paramTable) { public void setParamTable(Map<String, INDArray> paramTable) {
log.warn("Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName()); log.warn(
"Using setParamTable on this layer {} has no effect.", getLayerConfiguration().getName());
// throw new RuntimeException("Not implemented"); // throw new RuntimeException("Not implemented");
} }

View File

@ -662,6 +662,7 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>
*/ */
public boolean hasBias() { public boolean hasBias() {
// Overridden by layers supporting no bias mode: dense, output, convolutional, embedding // Overridden by layers supporting no bias mode: dense, output, convolutional, embedding
//return true;
return true; return true;
} }

View File

@ -24,6 +24,7 @@ import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration;
import org.deeplearning4j.nn.api.Layer; import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.conf.layers.BaseLayerConfiguration;
import org.deeplearning4j.nn.conf.misc.DummyConfig; import org.deeplearning4j.nn.conf.misc.DummyConfig;
import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.gradient.Gradient;
@ -88,6 +89,8 @@ public class FrozenLayer extends BaseWrapperLayer {
return underlying.activate(input, false, workspaceMgr); return underlying.activate(input, false, workspaceMgr);
} }
@Override @Override
public void fit() { public void fit() {
if (!logFit) { if (!logFit) {

View File

@ -51,21 +51,26 @@ public class Convolution1DLayer extends ConvolutionLayer {
super(conf, dataType); super(conf, dataType);
} }
@Override @Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { public Pair<Gradient, INDArray> backpropGradient(
INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(true); assertInputSet(true);
if (epsilon.rank() != 3) if (epsilon.rank() != 3)
throw new DL4JInvalidInputException("Got rank " + epsilon.rank() throw new DL4JInvalidInputException(
"Got rank "
+ epsilon.rank()
+ " array as epsilon for Convolution1D backprop with shape " + " array as epsilon for Convolution1D backprop with shape "
+ Arrays.toString(epsilon.shape()) + Arrays.toString(epsilon.shape())
+ ". Expected rank 3 array with shape [minibatchSize, features, length]. " + layerId()); + ". Expected rank 3 array with shape [minibatchSize, features, length]. "
+ layerId());
Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr); Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr);
IActivation afn = getTypedLayerConfiguration().getActivationFn(); IActivation afn = getTypedLayerConfiguration().getActivationFn();
INDArray delta = afn.backprop(fwd.getFirst(), epsilon).getFirst(); //TODO handle activation function params INDArray delta =
afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params
Convolution1D c = getTypedLayerConfiguration(); org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration();
Conv1DConfig conf = Conv1DConfig.builder() Conv1DConfig conf =
Conv1DConfig.builder()
.k(c.getKernelSize()[0]) .k(c.getKernelSize()[0])
.s(c.getStride()[0]) .s(c.getStride()[0])
.d(c.getDilation()[0]) .d(c.getDilation()[0])
@ -74,16 +79,18 @@ public class Convolution1DLayer extends ConvolutionLayer {
.paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
.build(); .build();
INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( INDArray w =
getParam(ConvolutionParamInitializer.WEIGHT_KEY), Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
RNNFormat.NCW); getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
INDArray[] inputArrs; INDArray[] inputArrs;
INDArray[] outputArrs; INDArray[] outputArrs;
INDArray wg = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( INDArray wg =
gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
getRnnDataFormat()); gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat());
INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); INDArray epsOut =
workspaceMgr.createUninitialized(
ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
INDArray input = this.input.castTo(dataType); INDArray input = this.input.castTo(dataType);
if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) { if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
input = input.permute(0, 2, 1); // NHWC to NCHW input = input.permute(0, 2, 1); // NHWC to NCHW
@ -106,9 +113,14 @@ public class Convolution1DLayer extends ConvolutionLayer {
Gradient retGradient = new DefaultGradient(); Gradient retGradient = new DefaultGradient();
if (getTypedLayerConfiguration().hasBias()) { if (getTypedLayerConfiguration().hasBias()) {
retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, gradientViews.get(ConvolutionParamInitializer.BIAS_KEY)); retGradient.setGradientFor(
ConvolutionParamInitializer.BIAS_KEY,
gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
} }
retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), 'c'); retGradient.setGradientFor(
ConvolutionParamInitializer.WEIGHT_KEY,
gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
'c');
if (getRnnDataFormat() == RNNFormat.NWC) { if (getRnnDataFormat() == RNNFormat.NWC) {
epsOut = epsOut.permute(0, 2, 1); epsOut = epsOut.permute(0, 2, 1);
} }
@ -116,7 +128,8 @@ public class Convolution1DLayer extends ConvolutionLayer {
} }
@Override @Override
protected Pair<INDArray, INDArray> preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { protected Pair<INDArray, INDArray> preOutput4d(
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr); Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
INDArray p3d = preOutput.getFirst(); INDArray p3d = preOutput.getFirst();
INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1); INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
@ -125,7 +138,8 @@ public class Convolution1DLayer extends ConvolutionLayer {
} }
@Override @Override
protected Pair<INDArray,INDArray> preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { protected Pair<INDArray, INDArray> preOutput(
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(false); assertInputSet(false);
INDArray input = this.input.castTo(dataType); INDArray input = this.input.castTo(dataType);
@ -133,8 +147,9 @@ public class Convolution1DLayer extends ConvolutionLayer {
input = input.permute(0, 2, 1); // NHWC to NCHW input = input.permute(0, 2, 1); // NHWC to NCHW
} }
Convolution1D c = getTypedLayerConfiguration(); org.deeplearning4j.nn.conf.layers.ConvolutionLayer c = getTypedLayerConfiguration();
Conv1DConfig conf = Conv1DConfig.builder() Conv1DConfig conf =
Conv1DConfig.builder()
.k(c.getKernelSize()[0]) .k(c.getKernelSize()[0])
.s(c.getStride()[0]) .s(c.getStride()[0])
.d(c.getDilation()[0]) .d(c.getDilation()[0])
@ -143,11 +158,9 @@ public class Convolution1DLayer extends ConvolutionLayer {
.paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode)) .paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
.build(); .build();
INDArray w =
INDArray w = Convolution1DUtils.reshapeWeightArrayOrGradientForFormat( Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
getParam(ConvolutionParamInitializer.WEIGHT_KEY) getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
,RNNFormat.NCW);
INDArray[] inputs; INDArray[] inputs;
if (getTypedLayerConfiguration().hasBias()) { if (getTypedLayerConfiguration().hasBias()) {
@ -171,39 +184,51 @@ public class Convolution1DLayer extends ConvolutionLayer {
return new Pair<>(output, null); return new Pair<>(output, null);
} }
@Override @Override
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
INDArray act4d = super.activate(training, workspaceMgr); INDArray act4d = super.activate(training, workspaceMgr);
INDArray act3d = act4d.rank() > 3 ? INDArray act3d =
act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d; act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
if (maskArray != null) { if (maskArray != null) {
INDArray maskOut = feedForwardMaskArray(maskArray, MaskState.Active, (int)act3d.size(0)).getFirst(); INDArray maskOut =
Preconditions.checkState(act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1), feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst();
Preconditions.checkState(
act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
"Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s", "Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
act3d.shape(), maskOut.shape()); act3d.shape(),
maskOut.shape());
Broadcast.mul(act3d, maskOut, act3d, 0, 2); Broadcast.mul(act3d, maskOut, act3d, 0, 2);
} }
return workspaceMgr.leverageTo(ArrayType.ACTIVATIONS, act3d); //Should be zero copy most of the time return workspaceMgr.leverageTo(
ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time
} }
@Override @Override
public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, public Pair<INDArray, MaskState> feedForwardMaskArray(
int minibatchSize) { INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
INDArray reduced = Convolution2DUtils.cnn1dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize()[0], INDArray reduced =
getTypedLayerConfiguration().getStride()[0], getTypedLayerConfiguration().getPadding()[0], getTypedLayerConfiguration().getDilation()[0], Convolution2DUtils.cnn1dMaskReduction(
maskArray,
getTypedLayerConfiguration().getKernelSize()[0],
getTypedLayerConfiguration().getStride()[0],
getTypedLayerConfiguration().getPadding()[0],
getTypedLayerConfiguration().getDilation()[0],
getTypedLayerConfiguration().getConvolutionMode()); getTypedLayerConfiguration().getConvolutionMode());
return new Pair<>(reduced, currentMaskState); return new Pair<>(reduced, currentMaskState);
} }
@Override
public Convolution1D getTypedLayerConfiguration() {
return (Convolution1D)layerConfiguration;
}
private RNNFormat getRnnDataFormat() { private RNNFormat getRnnDataFormat() {
return getTypedLayerConfiguration().getRnnDataFormat(); return getTypedLayerConfiguration().getRnnDataFormat();
} }
/**
*
* @return
*/
@Override
public Convolution1D getTypedLayerConfiguration() {
return (Convolution1D) super.getTypedLayerConfiguration();
}
} }

View File

@ -0,0 +1,226 @@
/*
* ******************************************************************************
* *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*/
package org.deeplearning4j.nn.layers.convolution;
import java.util.Arrays;
import java.util.List;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.RNNFormat;
import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
import org.deeplearning4j.nn.workspace.ArrayType;
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
import org.deeplearning4j.util.Convolution1DUtils;
import org.deeplearning4j.util.Convolution2DUtils;
import org.nd4j.common.base.Preconditions;
import org.nd4j.common.primitives.Pair;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1D;
import org.nd4j.linalg.api.ops.impl.layers.convolution.Conv1DDerivative;
import org.nd4j.linalg.api.ops.impl.layers.convolution.config.Conv1DConfig;
import org.nd4j.linalg.api.shape.LongShapeDescriptor;
import org.nd4j.linalg.factory.Broadcast;
import org.nd4j.linalg.factory.Nd4j;
public class Convolution1DNewLayer<Layer_ConfT extends Convolution1DNew>
extends ConvolutionNewLayer<Layer_ConfT> {
public Convolution1DNewLayer(LayerConfiguration conf, DataType dataType) {
super(conf, dataType);
}
@Override
public Pair<Gradient, INDArray> backpropGradient(
INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(true);
if (epsilon.rank() != 3)
throw new DL4JInvalidInputException(
"Got rank "
+ epsilon.rank()
+ " array as epsilon for Convolution1D backprop with shape "
+ Arrays.toString(epsilon.shape())
+ ". Expected rank 3 array with shape [minibatchSize, features, length]. "
+ layerId());
Pair<INDArray, INDArray> fwd = preOutput(false, true, workspaceMgr);
IActivation afn = getTypedLayerConfiguration().getActivationFn();
INDArray delta =
afn.backprop(fwd.getFirst(), epsilon).getFirst(); // TODO handle activation function params
Convolution1DNew c = getTypedLayerConfiguration();
Conv1DConfig conf =
Conv1DConfig.builder()
.k(c.getKernelSize()[0])
.s(c.getStride()[0])
.d(c.getDilation()[0])
.p(c.getPadding()[0])
.dataFormat(Conv1DConfig.NCW)
.paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
.build();
INDArray w =
Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
INDArray[] inputArrs;
INDArray[] outputArrs;
INDArray wg =
Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY), getRnnDataFormat());
INDArray epsOut =
workspaceMgr.createUninitialized(
ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
INDArray input = this.input.castTo(dataType);
if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
input = input.permute(0, 2, 1); // NHWC to NCHW
}
if (getTypedLayerConfiguration().hasBias()) {
INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
b = b.reshape(b.length());
inputArrs = new INDArray[] {input, w, b, delta};
INDArray bg = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
bg = bg.reshape(bg.length());
outputArrs = new INDArray[] {epsOut, wg, bg};
} else {
inputArrs = new INDArray[] {input, w, delta};
outputArrs = new INDArray[] {epsOut, wg};
}
Conv1DDerivative op = new Conv1DDerivative(inputArrs, outputArrs, conf);
Nd4j.exec(op);
Gradient retGradient = new DefaultGradient();
if (getTypedLayerConfiguration().hasBias()) {
retGradient.setGradientFor(
ConvolutionParamInitializer.BIAS_KEY,
gradientViews.get(ConvolutionParamInitializer.BIAS_KEY));
}
retGradient.setGradientFor(
ConvolutionParamInitializer.WEIGHT_KEY,
gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY),
'c');
if (getRnnDataFormat() == RNNFormat.NWC) {
epsOut = epsOut.permute(0, 2, 1);
}
return new Pair<>(retGradient, epsOut);
}
@Override
protected Pair<INDArray, INDArray> preOutput4d(
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
Pair<INDArray, INDArray> preOutput = super.preOutput(true, forBackprop, workspaceMgr);
INDArray p3d = preOutput.getFirst();
INDArray p = preOutput.getFirst().reshape(p3d.size(0), p3d.size(1), p3d.size(2), 1);
preOutput.setFirst(p);
return preOutput;
}
@Override
protected Pair<INDArray, INDArray> preOutput(
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(false);
INDArray input = this.input.castTo(dataType);
if (getTypedLayerConfiguration().getRnnDataFormat() == RNNFormat.NWC) {
input = input.permute(0, 2, 1); // NHWC to NCHW
}
Convolution1DNew c = getTypedLayerConfiguration();
Conv1DConfig conf =
Conv1DConfig.builder()
.k(c.getKernelSize()[0])
.s(c.getStride()[0])
.d(c.getDilation()[0])
.p(c.getPadding()[0])
.dataFormat(Conv1DConfig.NCW)
.paddingMode(Convolution2DUtils.paddingModeForConvolutionMode(convolutionMode))
.build();
INDArray w =
Convolution1DUtils.reshapeWeightArrayOrGradientForFormat(
getParam(ConvolutionParamInitializer.WEIGHT_KEY), RNNFormat.NCW);
INDArray[] inputs;
if (getTypedLayerConfiguration().hasBias()) {
INDArray b = getParam(ConvolutionParamInitializer.BIAS_KEY);
b = b.reshape(b.length());
inputs = new INDArray[] {input, w, b};
} else {
inputs = new INDArray[] {input, w};
}
Conv1D op = new Conv1D(inputs, null, conf);
List<LongShapeDescriptor> outShape = op.calculateOutputShape();
op.setOutputArgument(0, Nd4j.create(outShape.get(0), false));
Nd4j.exec(op);
INDArray output = op.getOutputArgument(0);
if (getRnnDataFormat() == RNNFormat.NWC) {
output = output.permute(0, 2, 1);
}
return new Pair<>(output, null);
}
@Override
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
INDArray act4d = super.activate(training, workspaceMgr);
INDArray act3d =
act4d.rank() > 3 ? act4d.reshape(act4d.size(0), act4d.size(1), act4d.size(2)) : act4d;
if (maskArray != null) {
INDArray maskOut =
feedForwardMaskArray(maskArray, MaskState.Active, (int) act3d.size(0)).getFirst();
Preconditions.checkState(
act3d.size(0) == maskOut.size(0) && act3d.size(2) == maskOut.size(1),
"Activations dimensions (0,2) and mask dimensions (0,1) don't match: Activations %s, Mask %s",
act3d.shape(),
maskOut.shape());
Broadcast.mul(act3d, maskOut, act3d, 0, 2);
}
return workspaceMgr.leverageTo(
ArrayType.ACTIVATIONS, act3d); // Should be zero copy most of the time
}
@Override
public Pair<INDArray, MaskState> feedForwardMaskArray(
INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
INDArray reduced =
Convolution2DUtils.cnn1dMaskReduction(
maskArray,
getTypedLayerConfiguration().getKernelSize()[0],
getTypedLayerConfiguration().getStride()[0],
getTypedLayerConfiguration().getPadding()[0],
getTypedLayerConfiguration().getDilation()[0],
getTypedLayerConfiguration().getConvolutionMode());
return new Pair<>(reduced, currentMaskState);
}
private RNNFormat getRnnDataFormat() {
return getTypedLayerConfiguration().getRnnDataFormat();
}
}

View File

@ -20,7 +20,6 @@
package org.deeplearning4j.nn.layers.convolution; package org.deeplearning4j.nn.layers.convolution;
import java.util.Arrays; import java.util.Arrays;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.common.config.DL4JClassLoading; import org.deeplearning4j.common.config.DL4JClassLoading;
@ -29,6 +28,7 @@ import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.CNN2DFormat; import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.CacheMode; import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.conf.ConvolutionMode; import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
import org.deeplearning4j.nn.conf.layers.LayerConfiguration; import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient; import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient; import org.deeplearning4j.nn.gradient.Gradient;
@ -51,7 +51,8 @@ import org.nd4j.linalg.exception.ND4JOpProfilerException;
import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.factory.Nd4j;
@Slf4j @Slf4j
public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.ConvolutionLayer> { public class ConvolutionLayer
extends BaseLayer<org.deeplearning4j.nn.conf.layers.ConvolutionLayer> {
protected INDArray i2d; protected INDArray i2d;
protected ConvolutionHelper helper = null; protected ConvolutionHelper helper = null;
@ -63,13 +64,22 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
public ConvolutionLayer(LayerConfiguration conf, DataType dataType) { public ConvolutionLayer(LayerConfiguration conf, DataType dataType) {
super(conf, dataType); super(conf, dataType);
initializeHelper(); initializeHelper();
convolutionMode = ((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode(); if (conf instanceof Convolution1DNew) {
convolutionMode =
((Convolution1DNew) conf).getConvolutionMode();
} else
if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) {
convolutionMode =
((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode();
}
} }
void initializeHelper() { void initializeHelper() {
String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend"); String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend");
if ("CUDA".equalsIgnoreCase(backend)) { if ("CUDA".equalsIgnoreCase(backend)) {
helper = DL4JClassLoading.createNewInstance( helper =
DL4JClassLoading.createNewInstance(
"org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper", "org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper",
ConvolutionHelper.class, ConvolutionHelper.class,
dataType); dataType);
@ -94,14 +104,15 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
} }
@Override @Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { public Pair<Gradient, INDArray> backpropGradient(
INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(true); assertInputSet(true);
INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr); INDArray weights =
getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr);
INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr); INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr);
INDArray input = this.input.castTo(dataType); // No op if correct type INDArray input = this.input.castTo(dataType); // No op if correct type
if(epsilon.dataType() != dataType) if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType);
epsilon = epsilon.castTo(dataType);
INDArray origInput = input; INDArray origInput = input;
INDArray origEps = epsilon; INDArray origEps = epsilon;
@ -110,7 +121,6 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW
} }
long miniBatch = input.size(0); long miniBatch = input.size(0);
int inH = (int) input.size(2); int inH = (int) input.size(2);
int inW = (int) input.size(3); int inW = (int) input.size(3);
@ -126,23 +136,41 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
int[] pad; int[] pad;
int[] outSize; int[] outSize;
if (convolutionMode == ConvolutionMode.Same) { if (convolutionMode == ConvolutionMode.Same) {
outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, CNN2DFormat.NCHW); //Also performs validation outSize =
pad = Convolution2DUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); Convolution2DUtils.getOutputSize(
input,
kernel,
strides,
null,
convolutionMode,
dilation,
CNN2DFormat.NCHW); // Also performs validation
pad =
Convolution2DUtils.getSameModeTopLeftPadding(
outSize, new int[] {inH, inW}, kernel, strides, dilation);
} else { } else {
pad = getTypedLayerConfiguration().getPadding(); pad = getTypedLayerConfiguration().getPadding();
outSize = Convolution2DUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, CNN2DFormat.NCHW); //Also performs validation outSize =
Convolution2DUtils.getOutputSize(
input,
kernel,
strides,
pad,
convolutionMode,
dilation,
CNN2DFormat.NCHW); // Also performs validation
} }
int outH = outSize[0]; int outH = outSize[0];
int outW = outSize[1]; int outW = outSize[1];
INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY); INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
INDArray weightGradView = gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY); //4d, c order. Shape: [outDepth,inDepth,kH,kW] INDArray weightGradView =
INDArray weightGradView2df = Shape gradientViews.get(
.newShapeNoCopy(weightGradView, new long[]{outDepth, inDepth * kH * kW}, false).transpose(); ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW]
INDArray weightGradView2df =
Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false)
.transpose();
INDArray delta; INDArray delta;
IActivation afn = getTypedLayerConfiguration().getActivationFn(); IActivation afn = getTypedLayerConfiguration().getActivationFn();
@ -155,7 +183,8 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
} }
delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params
if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { if (helper != null
&& (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
INDArray helperDelta = delta; INDArray helperDelta = delta;
if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC)
helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC
@ -172,10 +201,25 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
Pair<Gradient, INDArray> ret = null; Pair<Gradient, INDArray> ret = null;
try { try {
ret = helper.backpropGradient(origInput, weights, bias, helperDelta, kernel, strides, ret =
pad, biasGradView, weightGradView, afn, helper.backpropGradient(
getTypedLayerConfiguration().getCudnnAlgoMode(), getTypedLayerConfiguration().getCudnnBwdFilterAlgo(), getTypedLayerConfiguration().getCudnnBwdDataAlgo(), origInput,
convolutionMode, dilation, getTypedLayerConfiguration().getConvFormat(), workspaceMgr); weights,
bias,
helperDelta,
kernel,
strides,
pad,
biasGradView,
weightGradView,
afn,
getTypedLayerConfiguration().getCudnnAlgoMode(),
getTypedLayerConfiguration().getCudnnBwdFilterAlgo(),
getTypedLayerConfiguration().getCudnnBwdDataAlgo(),
convolutionMode,
dilation,
getTypedLayerConfiguration().getConvFormat(),
workspaceMgr);
} catch (ND4JOpProfilerException e) { } catch (ND4JOpProfilerException e) {
throw e; // NaN panic etc for debugging throw e; // NaN panic etc for debugging
} catch (Exception e) { } catch (Exception e) {
@ -192,7 +236,9 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
log.warn("CuDNN execution failed - falling back on built-in implementation", e); log.warn("CuDNN execution failed - falling back on built-in implementation", e);
} }
} else { } else {
throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false", e); throw new RuntimeException(
"Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false",
e);
} }
} }
@ -207,46 +253,82 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW] delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW]
//Note: due to the permute in preOut, and the fact that we essentially do a preOut.muli(epsilon), this reshape // Note: due to the permute in preOut, and the fact that we essentially do a
// should be zero-copy; only possible exception being sometimes with the "identity" activation case // preOut.muli(epsilon), this reshape
INDArray delta2d = delta.reshape('c', outDepth, miniBatch * outH * outW); //Shape.newShapeNoCopy(delta,new int[]{outDepth,miniBatch*outH*outW},false); // should be zero-copy; only possible exception being sometimes with the "identity" activation
// case
INDArray delta2d =
delta.reshape(
'c',
outDepth,
miniBatch * outH
* outW); // Shape.newShapeNoCopy(delta,new
// int[]{outDepth,miniBatch*outH*outW},false);
//Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation // Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input
//To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
// To get this: create an array of the order we want, permute it to the order required by im2col
// implementation, and then do im2col on that
// to get old order from required order: permute(0,3,4,5,1,2) // to get old order from required order: permute(0,3,4,5,1,2)
INDArray im2col2d = p.getSecond(); //Re-use im2col2d array from forward pass if available; recalculate if not INDArray im2col2d =
p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not
if (im2col2d == null) { if (im2col2d == null) {
INDArray col = Nd4j.createUninitialized(dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); INDArray col =
Nd4j.createUninitialized(
dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
INDArray col2 = col.permute(0, 3, 4, 5, 1, 2); INDArray col2 = col.permute(0, 3, 4, 5, 1, 2);
Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], dilation[0], dilation[1], Convolution.im2col(
convolutionMode == ConvolutionMode.Same, col2); input,
kH,
kW,
strides[0],
strides[1],
pad[0],
pad[1],
dilation[0],
dilation[1],
convolutionMode == ConvolutionMode.Same,
col2);
// Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape // Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape
im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW); im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW);
} }
// Calculate weight gradients, using cc->c mmul. // Calculate weight gradients, using cc->c mmul.
// weightGradView2df is f order, but this is because it's transposed from c order // weightGradView2df is f order, but this is because it's transposed from c order
//Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c order, not usual f order // Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c
// order, not usual f order
Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0); Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0);
//Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally in c order for some reason) // Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally
INDArray wPermuted = weights.permute(3, 2, 1, 0); //Start with c order weights, switch order to f order // in c order for some reason)
INDArray wPermuted =
weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order
INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth); INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth);
//Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format before col2im reduction) // Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format
// before col2im reduction)
// Note: cc -> f mmul here, then reshape to 6d in f order // Note: cc -> f mmul here, then reshape to 6d in f order
INDArray epsNext2d = w2d.mmul(delta2d); //TODO can we reuse im2col array instead of allocating new result array? INDArray epsNext2d =
INDArray eps6d = Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true); w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array?
INDArray eps6d =
Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true);
// Calculate epsilonNext by doing im2col reduction. // Calculate epsilonNext by doing im2col reduction.
// Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW] // Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW]
// currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first // currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first
eps6d = eps6d.permute(5, 2, 1, 0, 4, 3); eps6d = eps6d.permute(5, 2, 1, 0, 4, 3);
INDArray epsNextOrig = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, eps6d.dataType(), new long[] {inDepth, miniBatch, inH, inW}, 'c'); INDArray epsNextOrig =
workspaceMgr.createUninitialized(
ArrayType.ACTIVATION_GRAD,
eps6d.dataType(),
new long[] {inDepth, miniBatch, inH, inW},
'c');
//Note: we are execute col2im in a way that the output array should be used in a stride 1 muli in the layer below... (same strides as zs/activations) // Note: we are execute col2im in a way that the output array should be used in a stride 1 muli
// in the layer below... (same strides as zs/activations)
INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3); INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3);
Convolution.col2im(eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]); Convolution.col2im(
eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]);
Gradient retGradient = new DefaultGradient(); Gradient retGradient = new DefaultGradient();
if (getTypedLayerConfiguration().hasBias()) { if (getTypedLayerConfiguration().hasBias()) {
@ -267,11 +349,12 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
} }
/** /**
* preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain their standard * preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain
* non-4d preOutput method, while overriding this to return 4d activations (for use in backprop) without modifying * their standard non-4d preOutput method, while overriding this to return 4d activations (for use
* the public API * in backprop) without modifying the public API
*/ */
protected Pair<INDArray, INDArray> preOutput4d(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { protected Pair<INDArray, INDArray> preOutput4d(
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
return preOutput(training, forBackprop, workspaceMgr); return preOutput(training, forBackprop, workspaceMgr);
} }
@ -279,16 +362,23 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
// Input validation: expect rank 4 matrix // Input validation: expect rank 4 matrix
if (input.rank() != 4) { if (input.rank() != 4) {
String layerName = layerConfiguration.getName(); String layerName = layerConfiguration.getName();
if (layerName == null) if (layerName == null) layerName = "(not named)";
layerName = "(not named)"; throw new DL4JInvalidInputException(
throw new DL4JInvalidInputException("Got rank " + input.rank() "Got rank "
+ " array as input to ConvolutionLayer (layer name = " + layerName + ", layer index = " + input.rank()
+ index + ") with shape " + Arrays.toString(input.shape()) + ". " + " array as input to ConvolutionLayer (layer name = "
+ layerName
+ ", layer index = "
+ index
+ ") with shape "
+ Arrays.toString(input.shape())
+ ". "
+ "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]." + "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]."
+ (input.rank() == 2 + (input.rank() == 2
? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)" ? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)"
: "") : "")
+ " " + layerId()); + " "
+ layerId());
} }
} }
@ -297,13 +387,26 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
int dim = format == CNN2DFormat.NHWC ? 3 : 1; int dim = format == CNN2DFormat.NHWC ? 3 : 1;
if (input.size(dim) != inDepth) { if (input.size(dim) != inDepth) {
String layerName = layerConfiguration.getName(); String layerName = layerConfiguration.getName();
if (layerName == null) if (layerName == null) layerName = "(not named)";
layerName = "(not named)";
String s = "Cannot do forward pass in Convolution layer (layer name = " + layerName String s =
+ ", layer index = " + index + "): input array channels does not match CNN layer configuration" "Cannot do forward pass in Convolution layer (layer name = "
+ " (data format = " + format + ", data input channels = " + input.size(dim) + ", " + getTypedLayerConfiguration().getConvFormat().dimensionNames() + layerName
+ "=" + Arrays.toString(input.shape()) + "; expected" + " input channels = " + inDepth + ") " + ", layer index = "
+ index
+ "): input array channels does not match CNN layer configuration"
+ " (data format = "
+ format
+ ", data input channels = "
+ input.size(dim)
+ ", "
+ getTypedLayerConfiguration().getConvFormat().dimensionNames()
+ "="
+ Arrays.toString(input.shape())
+ "; expected"
+ " input channels = "
+ inDepth
+ ") "
+ layerId(); + layerId();
int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3; int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3;
@ -312,24 +415,26 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG; s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG;
} }
throw new DL4JInvalidInputException(s); throw new DL4JInvalidInputException(s);
} }
} }
/** /**
* PreOutput method that also returns the im2col2d array (if being called for backprop), as this can be re-used * PreOutput method that also returns the im2col2d array (if being called for backprop), as this
* instead of being calculated again. * can be re-used instead of being calculated again.
* *
* @param training Train or test time (impacts dropout) * @param training Train or test time (impacts dropout)
* @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return null for second * @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return
* pair entry. Note that it may still be null in the case of CuDNN and the like. * null for second pair entry. Note that it may still be null in the case of CuDNN and the
* like.
* @return Pair of arrays: preOutput (activations) and optionally the im2col2d array * @return Pair of arrays: preOutput (activations) and optionally the im2col2d array
*/ */
protected Pair<INDArray, INDArray> preOutput(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { protected Pair<INDArray, INDArray> preOutput(
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(false); assertInputSet(false);
INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr); INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr);
INDArray weights = getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr); INDArray weights =
getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr);
validateInputRank(); validateInputRank();
@ -347,17 +452,15 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
long kH = weights.size(2); long kH = weights.size(2);
long kW = weights.size(3); long kW = weights.size(3);
int[] dilation = getTypedLayerConfiguration().getDilation(); int[] dilation = getTypedLayerConfiguration().getDilation();
int[] kernel = getTypedLayerConfiguration().getKernelSize(); int[] kernel = getTypedLayerConfiguration().getKernelSize();
int[] strides = getTypedLayerConfiguration().getStride(); int[] strides = getTypedLayerConfiguration().getStride();
int[] pad; int[] pad;
int[] outSize; int[] outSize;
if (convolutionMode == ConvolutionMode.Same) { if (convolutionMode == ConvolutionMode.Same) {
outSize = Convolution2DUtils.getOutputSize( outSize =
Convolution2DUtils.getOutputSize(
input, input,
kernel, kernel,
strides, strides,
@ -379,15 +482,13 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
} }
else else
throw new IllegalStateException("No data format configured!");*/ throw new IllegalStateException("No data format configured!");*/
pad = Convolution2DUtils.getSameModeTopLeftPadding( pad =
outSize, Convolution2DUtils.getSameModeTopLeftPadding(
inWidthHeight, outSize, inWidthHeight, kernel, strides, dilation);
kernel,
strides,
dilation);
} else { } else {
pad = getTypedLayerConfiguration().getPadding(); pad = getTypedLayerConfiguration().getPadding();
outSize = Convolution2DUtils.getOutputSize( outSize =
Convolution2DUtils.getOutputSize(
input, input,
kernel, kernel,
strides, strides,
@ -400,8 +501,8 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
int outH = outSize[0]; int outH = outSize[0];
int outW = outSize[1]; int outW = outSize[1];
if (helper != null
if (helper != null && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
if (preOutput != null && forBackprop) { if (preOutput != null && forBackprop) {
return new Pair<>(preOutput, null); return new Pair<>(preOutput, null);
} }
@ -418,8 +519,20 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
INDArray ret = null; INDArray ret = null;
try { try {
ret = helper.preOutput(inputOrig, weights, bias, kernel, strides, pad, getTypedLayerConfiguration().getCudnnAlgoMode(), ret =
getTypedLayerConfiguration().getCudnnFwdAlgo(), convolutionMode, dilation, getTypedLayerConfiguration().getConvFormat(), workspaceMgr); helper.preOutput(
inputOrig,
weights,
bias,
kernel,
strides,
pad,
getTypedLayerConfiguration().getCudnnAlgoMode(),
getTypedLayerConfiguration().getCudnnFwdAlgo(),
convolutionMode,
dilation,
getTypedLayerConfiguration().getConvFormat(),
workspaceMgr);
} catch (ND4JOpProfilerException e) { } catch (ND4JOpProfilerException e) {
throw e; // NaN panic etc for debugging throw e; // NaN panic etc for debugging
} catch (Exception e) { } catch (Exception e) {
@ -436,7 +549,9 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
log.warn("CuDNN execution failed - falling back on built-in implementation", e); log.warn("CuDNN execution failed - falling back on built-in implementation", e);
} }
} else { } else {
throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", e); throw new RuntimeException(
"Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
e);
} }
} }
if (ret != null) { if (ret != null) {
@ -448,49 +563,67 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
return new Pair<>(preOutput, i2d); return new Pair<>(preOutput, i2d);
} }
//im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation // im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input
//To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that // [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
// To get this: create an array of the order we want, permute it to the order required by im2col
// implementation, and then do im2col on that
// to get old order from required order: permute(0,3,4,5,1,2) // to get old order from required order: permute(0,3,4,5,1,2)
//Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through the rows post-reshape // Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through
INDArray col = Nd4j.createUninitialized(weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c'); // the rows post-reshape
INDArray col =
Nd4j.createUninitialized(
weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
int[] permute = new int[] {0, 3, 4, 5, 1, 2}; int[] permute = new int[] {0, 3, 4, 5, 1, 2};
INDArray col2 = col.permute(permute); INDArray col2 = col.permute(permute);
INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float
if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException();
throw new ND4JArraySizeException();
Convolution.im2col( Convolution.im2col(
im2ColIn, im2ColIn,
(int) kH, (int) kH,
(int) kW, (int) kW,
strides[0], strides[1], strides[0],
pad[0], pad[1], strides[1],
dilation[0], dilation[1], pad[0],
pad[1],
dilation[0],
dilation[1],
convolutionMode == ConvolutionMode.Same, convolutionMode == ConvolutionMode.Same,
col2); col2);
INDArray im2col2d =
INDArray im2col2d = Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false); Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false);
// Current order of weights: [depthOut,depthIn,kH,kW], c order // Current order of weights: [depthOut,depthIn,kH,kW], c order
// Permute to give [kW,kH,depthIn,depthOut], f order // Permute to give [kW,kH,depthIn,depthOut], f order
//Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless weights aren't in c order for some reason // Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless
// weights aren't in c order for some reason
INDArray permutedW = weights.permute(3, 2, 1, 0); INDArray permutedW = weights.permute(3, 2, 1, 0);
INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth); INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth);
// Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut] // Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut]
INDArray z = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, weights.dataType(), new long[]{im2col2d.size(0), reshapedW.size(1)}, 'f'); INDArray z =
workspaceMgr.createUninitialized(
ArrayType.ACTIVATIONS,
weights.dataType(),
new long[] {im2col2d.size(0), reshapedW.size(1)},
'f');
im2col2d.mmuli(reshapedW, z); im2col2d.mmuli(reshapedW, z);
//Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is [miniBatch*outH*outW,depthOut] -> addiRowVector // Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is
// [miniBatch*outH*outW,depthOut] -> addiRowVector
if (getTypedLayerConfiguration().hasBias()) { if (getTypedLayerConfiguration().hasBias()) {
z.addiRowVector(bias); z.addiRowVector(bias);
} }
//Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order: [miniBatch,outDepth,outH,outW]; // Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order:
// [miniBatch,outDepth,outH,outW];
z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true); z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true);
z = z.permute(2, 3, 1, 0); z = z.permute(2, 3, 1, 0);
if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { if (training
&& cacheMode != CacheMode.NONE
&& workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
&& workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
i2d = im2col2d.unsafeDuplication(); i2d = im2col2d.unsafeDuplication();
} }
@ -507,18 +640,21 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
@Override @Override
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
if (input == null) { if (input == null) {
throw new IllegalArgumentException("Cannot perform forward pass with null input " + layerId()); throw new IllegalArgumentException(
"Cannot perform forward pass with null input " + layerId());
} }
if (cacheMode == null) if (cacheMode == null) cacheMode = CacheMode.NONE;
cacheMode = CacheMode.NONE;
applyDropOutIfNecessary(training, workspaceMgr); applyDropOutIfNecessary(training, workspaceMgr);
INDArray z = preOutput(training, false, workspaceMgr).getFirst(); INDArray z = preOutput(training, false, workspaceMgr).getFirst();
// we do cache only if cache workspace exists. Skip otherwise // we do cache only if cache workspace exists. Skip otherwise
if (training && cacheMode != CacheMode.NONE && workspaceMgr.hasConfiguration(ArrayType.FF_CACHE) && workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) { if (training
&& cacheMode != CacheMode.NONE
&& workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
&& workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) { try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
preOutput = z.unsafeDuplication(); preOutput = z.unsafeDuplication();
} }
@ -527,7 +663,9 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
// String afn = conf.getLayer().getActivationFunction(); // String afn = conf.getLayer().getActivationFunction();
IActivation afn = getTypedLayerConfiguration().getActivationFn(); IActivation afn = getTypedLayerConfiguration().getActivationFn();
if (helper != null && Shape.strideDescendingCAscendingF(z) && (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) { if (helper != null
&& Shape.strideDescendingCAscendingF(z)
&& (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
INDArray ret = null; INDArray ret = null;
try { try {
ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training); ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training);
@ -547,7 +685,9 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
log.warn("CuDNN execution failed - falling back on built-in implementation", e); log.warn("CuDNN execution failed - falling back on built-in implementation", e);
} }
} else { } else {
throw new RuntimeException("Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false", e); throw new RuntimeException(
"Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
e);
} }
} }
@ -587,15 +727,22 @@ public class ConvolutionLayer extends BaseLayer<org.deeplearning4j.nn.conf.layer
} }
@Override @Override
public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, int minibatchSize) { public Pair<INDArray, MaskState> feedForwardMaskArray(
INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
if (maskArray == null) { if (maskArray == null) {
//For same mode (with stride 1): output activations size is always same size as input activations size -> mask array is same size // For same mode (with stride 1): output activations size is always same size as input
// activations size -> mask array is same size
return new Pair<>(maskArray, currentMaskState); return new Pair<>(maskArray, currentMaskState);
} }
INDArray outMask = Convolution2DUtils.cnn2dMaskReduction(maskArray, getTypedLayerConfiguration().getKernelSize(), getTypedLayerConfiguration().getStride(), INDArray outMask =
getTypedLayerConfiguration().getPadding(), getTypedLayerConfiguration().getDilation(), getTypedLayerConfiguration().getConvolutionMode()); Convolution2DUtils.cnn2dMaskReduction(
maskArray,
getTypedLayerConfiguration().getKernelSize(),
getTypedLayerConfiguration().getStride(),
getTypedLayerConfiguration().getPadding(),
getTypedLayerConfiguration().getDilation(),
getTypedLayerConfiguration().getConvolutionMode());
return new Pair<>(outMask, currentMaskState); return new Pair<>(outMask, currentMaskState);
} }
} }

View File

@ -0,0 +1,753 @@
/*
* ******************************************************************************
* *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*/
package org.deeplearning4j.nn.layers.convolution;
import java.util.Arrays;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.common.config.DL4JClassLoading;
import org.deeplearning4j.exception.DL4JInvalidInputException;
import org.deeplearning4j.nn.api.MaskState;
import org.deeplearning4j.nn.conf.CNN2DFormat;
import org.deeplearning4j.nn.conf.CacheMode;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
import org.deeplearning4j.nn.gradient.DefaultGradient;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.nn.layers.BaseLayer;
import org.deeplearning4j.nn.layers.LayerHelper;
import org.deeplearning4j.nn.layers.mkldnn.MKLDNNConvHelper;
import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
import org.deeplearning4j.nn.workspace.ArrayType;
import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
import org.deeplearning4j.util.Convolution2DUtils;
import org.nd4j.common.primitives.Pair;
import org.nd4j.linalg.activations.IActivation;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.convolution.Convolution;
import org.nd4j.linalg.exception.ND4JArraySizeException;
import org.nd4j.linalg.exception.ND4JOpProfilerException;
import org.nd4j.linalg.factory.Nd4j;
@Slf4j
public class ConvolutionNewLayer<
LayerConf_T extends org.deeplearning4j.nn.conf.layers.Convolution1DNew>
extends BaseLayer<org.deeplearning4j.nn.conf.layers.Convolution1DNew> {
protected INDArray i2d;
protected ConvolutionHelper helper = null;
protected int helperCountFail = 0;
protected ConvolutionMode convolutionMode;
protected transient INDArray dummyBias; // Used only when: hasBias == false AND helpers are used
protected transient INDArray dummyBiasGrad; // As above
public ConvolutionNewLayer(LayerConfiguration conf, DataType dataType) {
super(conf, dataType);
initializeHelper();
if (conf instanceof Convolution1DNew) {
convolutionMode = ((Convolution1DNew) conf).getConvolutionMode();
} else if (conf instanceof org.deeplearning4j.nn.conf.layers.ConvolutionLayer) {
convolutionMode =
((org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf).getConvolutionMode();
}
}
void initializeHelper() {
String backend = Nd4j.getExecutioner().getEnvironmentInformation().getProperty("backend");
if ("CUDA".equalsIgnoreCase(backend)) {
helper =
DL4JClassLoading.createNewInstance(
"org.deeplearning4j.cuda.convolution.CudnnConvolutionHelper",
ConvolutionHelper.class,
dataType);
log.debug("CudnnConvolutionHelper successfully initialized");
if (!helper.checkSupported()) {
helper = null;
}
} else if ("CPU".equalsIgnoreCase(backend)) {
helper = new MKLDNNConvHelper(dataType);
log.trace("Created MKLDNNConvHelper, layer {}", getTypedLayerConfiguration().getName());
}
if (helper != null && !helper.checkSupported()) {
log.debug("Removed helper {} as not supported", helper.getClass());
helper = null;
}
}
@Override
public Type type() {
return Type.CONVOLUTIONAL;
}
/**
*
* @return
*/
@Override
public Convolution1DNew getTypedLayerConfiguration() {
return super.getTypedLayerConfiguration();
}
@Override
public Pair<Gradient, INDArray> backpropGradient(
INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(true);
INDArray weights =
getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, true, workspaceMgr);
INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, true, workspaceMgr);
INDArray input = this.input.castTo(dataType); // No op if correct type
if (epsilon.dataType() != dataType) epsilon = epsilon.castTo(dataType);
INDArray origInput = input;
INDArray origEps = epsilon;
if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) {
input = input.permute(0, 3, 1, 2); // NHWC to NCHW
epsilon = epsilon.permute(0, 3, 1, 2); // NHWC to NCHW
}
long miniBatch = input.size(0);
int inH = (int) input.size(2);
int inW = (int) input.size(3);
long outDepth = weights.size(0);
long inDepth = weights.size(1);
int kH = (int) weights.size(2);
int kW = (int) weights.size(3);
int[] dilation = getTypedLayerConfiguration().getDilation();
int[] kernel = getTypedLayerConfiguration().getKernelSize();
int[] strides = getTypedLayerConfiguration().getStride();
int[] pad;
int[] outSize;
if (convolutionMode == ConvolutionMode.Same) {
outSize =
Convolution2DUtils.getOutputSize(
input,
kernel,
strides,
null,
convolutionMode,
dilation,
CNN2DFormat.NCHW); // Also performs validation
pad =
Convolution2DUtils.getSameModeTopLeftPadding(
outSize, new int[] {inH, inW}, kernel, strides, dilation);
} else {
pad = getTypedLayerConfiguration().getPadding();
outSize =
Convolution2DUtils.getOutputSize(
input,
kernel,
strides,
pad,
convolutionMode,
dilation,
CNN2DFormat.NCHW); // Also performs validation
}
int outH = outSize[0];
int outW = outSize[1];
INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
INDArray weightGradView =
gradientViews.get(
ConvolutionParamInitializer.WEIGHT_KEY); // 4d, c order. Shape: [outDepth,inDepth,kH,kW]
INDArray weightGradView2df =
Shape.newShapeNoCopy(weightGradView, new long[] {outDepth, inDepth * kH * kW}, false)
.transpose();
INDArray delta;
IActivation afn = getTypedLayerConfiguration().getActivationFn();
Pair<INDArray, INDArray> p = preOutput4d(true, true, workspaceMgr);
INDArray z = p.getFirst();
CNN2DFormat f = getTypedLayerConfiguration().getConvFormat();
if (f != CNN2DFormat.NCHW) {
z = z.permute(0, 3, 1, 2); // NHWC to NCHW
}
delta = afn.backprop(z, epsilon).getFirst(); // TODO handle activation function params
if (helper != null
&& (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
INDArray helperDelta = delta;
if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC)
helperDelta = delta.permute(0, 2, 3, 1); // NCHW to NHWC
if (!hasBias() && !(helper instanceof MKLDNNConvHelper)) {
// MKL-DNN supports no bias, CuDNN doesn't
if (dummyBiasGrad == null) {
try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
dummyBiasGrad = Nd4j.create(1, getTypedLayerConfiguration().getNOut());
}
}
biasGradView = dummyBiasGrad;
}
Pair<Gradient, INDArray> ret = null;
try {
ret =
helper.backpropGradient(
origInput,
weights,
bias,
helperDelta,
kernel,
strides,
pad,
biasGradView,
weightGradView,
afn,
getTypedLayerConfiguration().getCudnnAlgoMode(),
getTypedLayerConfiguration().getCudnnBwdFilterAlgo(),
getTypedLayerConfiguration().getCudnnBwdDataAlgo(),
convolutionMode,
dilation,
getTypedLayerConfiguration().getConvFormat(),
workspaceMgr);
} catch (ND4JOpProfilerException e) {
throw e; // NaN panic etc for debugging
} catch (Exception e) {
if (e.getMessage().contains("Failed to allocate")) {
// This is a memory exception - don't fallback to built-in implementation
throw e;
}
if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
helperCountFail++;
if (helper instanceof MKLDNNConvHelper) {
log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
} else {
log.warn("CuDNN execution failed - falling back on built-in implementation", e);
}
} else {
throw new RuntimeException(
"Error during ConvolutionLayer MKL/CuDNN helper backprop - isCudnnAllowFallback() is set to false",
e);
}
}
if (ret != null) {
// Backprop dropout, if present
INDArray gradPostDropout = ret.getRight();
gradPostDropout = backpropDropOutIfPresent(gradPostDropout);
ret.setSecond(gradPostDropout);
return ret;
}
}
delta = delta.permute(1, 0, 2, 3); // To shape: [outDepth,miniBatch,outH,outW]
// Note: due to the permute in preOut, and the fact that we essentially do a
// preOut.muli(epsilon), this reshape
// should be zero-copy; only possible exception being sometimes with the "identity" activation
// case
INDArray delta2d =
delta.reshape('c', outDepth, miniBatch * outH * outW); // Shape.newShapeNoCopy(delta,new
// int[]{outDepth,miniBatch*outH*outW},false);
// Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input
// [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
// To get this: create an array of the order we want, permute it to the order required by im2col
// implementation, and then do im2col on that
// to get old order from required order: permute(0,3,4,5,1,2)
INDArray im2col2d =
p.getSecond(); // Re-use im2col2d array from forward pass if available; recalculate if not
if (im2col2d == null) {
INDArray col =
Nd4j.createUninitialized(
dataType, new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
INDArray col2 = col.permute(0, 3, 4, 5, 1, 2);
Convolution.im2col(
input,
kH,
kW,
strides[0],
strides[1],
pad[0],
pad[1],
dilation[0],
dilation[1],
convolutionMode == ConvolutionMode.Same,
col2);
// Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape
im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW);
}
// Calculate weight gradients, using cc->c mmul.
// weightGradView2df is f order, but this is because it's transposed from c order
// Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c
// order, not usual f order
Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0);
// Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally
// in c order for some reason)
INDArray wPermuted =
weights.permute(3, 2, 1, 0); // Start with c order weights, switch order to f order
INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth);
// Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format
// before col2im reduction)
// Note: cc -> f mmul here, then reshape to 6d in f order
INDArray epsNext2d =
w2d.mmul(delta2d); // TODO can we reuse im2col array instead of allocating new result array?
INDArray eps6d =
Shape.newShapeNoCopy(epsNext2d, new long[] {kW, kH, inDepth, outW, outH, miniBatch}, true);
// Calculate epsilonNext by doing im2col reduction.
// Current col2im implementation expects input with order: [miniBatch,channels,kH,kW,outH,outW]
// currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first
eps6d = eps6d.permute(5, 2, 1, 0, 4, 3);
INDArray epsNextOrig =
workspaceMgr.createUninitialized(
ArrayType.ACTIVATION_GRAD,
eps6d.dataType(),
new long[] {inDepth, miniBatch, inH, inW},
'c');
// Note: we are execute col2im in a way that the output array should be used in a stride 1 muli
// in the layer below... (same strides as zs/activations)
INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3);
Convolution.col2im(
eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW, dilation[0], dilation[1]);
Gradient retGradient = new DefaultGradient();
if (getTypedLayerConfiguration().hasBias()) {
delta2d.sum(biasGradView, 1); // biasGradView is initialized/zeroed first in sum op
retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView);
}
retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c');
weightNoiseParams.clear();
epsNext = backpropDropOutIfPresent(epsNext);
if (getTypedLayerConfiguration().getConvFormat() != CNN2DFormat.NCHW) {
epsNext = epsNext.permute(0, 2, 3, 1); // NCHW to NHWC
}
return new Pair<>(retGradient, epsNext);
}
/**
* preOutput4d: Used so that ConvolutionLayer subclasses (such as Convolution1D) can maintain
* their standard non-4d preOutput method, while overriding this to return 4d activations (for use
* in backprop) without modifying the public API
*/
protected Pair<INDArray, INDArray> preOutput4d(
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
return preOutput(training, forBackprop, workspaceMgr);
}
protected void validateInputRank() {
// Input validation: expect rank 4 matrix
if (input.rank() != 4) {
String layerName = layerConfiguration.getName();
if (layerName == null) layerName = "(not named)";
throw new DL4JInvalidInputException(
"Got rank "
+ input.rank()
+ " array as input to ConvolutionLayer (layer name = "
+ layerName
+ ", layer index = "
+ index
+ ") with shape "
+ Arrays.toString(input.shape())
+ ". "
+ "Expected rank 4 array with shape [minibatchSize, layerInputDepth, inputHeight, inputWidth]."
+ (input.rank() == 2
? " (Wrong input type (see InputType.convolutionalFlat()) or wrong data type?)"
: "")
+ " "
+ layerId());
}
}
protected void validateInputDepth(long inDepth) {
CNN2DFormat format = getTypedLayerConfiguration().getConvFormat();
int dim = format == CNN2DFormat.NHWC ? 3 : 1;
if (input.size(dim) != inDepth) {
String layerName = layerConfiguration.getName();
if (layerName == null) layerName = "(not named)";
String s =
"Cannot do forward pass in Convolution layer (layer name = "
+ layerName
+ ", layer index = "
+ index
+ "): input array channels does not match CNN layer configuration"
+ " (data format = "
+ format
+ ", data input channels = "
+ input.size(dim)
+ ", "
+ getTypedLayerConfiguration().getConvFormat().dimensionNames()
+ "="
+ Arrays.toString(input.shape())
+ "; expected"
+ " input channels = "
+ inDepth
+ ") "
+ layerId();
int dimIfWrongFormat = format == CNN2DFormat.NHWC ? 1 : 3;
if (input.size(dimIfWrongFormat) == inDepth) {
// User might have passed NCHW data to a NHWC net, or vice versa?
s += "\n" + Convolution2DUtils.NCHW_NHWC_ERROR_MSG;
}
throw new DL4JInvalidInputException(s);
}
}
/**
* PreOutput method that also returns the im2col2d array (if being called for backprop), as this
* can be re-used instead of being calculated again.
*
* @param training Train or test time (impacts dropout)
* @param forBackprop If true: return the im2col2d array for re-use during backprop. False: return
* null for second pair entry. Note that it may still be null in the case of CuDNN and the
* like.
* @return Pair of arrays: preOutput (activations) and optionally the im2col2d array
*/
protected Pair<INDArray, INDArray> preOutput(
boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(false);
INDArray bias = getParamWithNoise(ConvolutionParamInitializer.BIAS_KEY, training, workspaceMgr);
INDArray weights =
getParamWithNoise(ConvolutionParamInitializer.WEIGHT_KEY, training, workspaceMgr);
validateInputRank();
INDArray input = this.input.castTo(dataType);
INDArray inputOrig = input;
if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
input = input.permute(0, 3, 1, 2).dup(); // NHWC to NCHW
}
long miniBatch = input.size(0);
long outDepth = weights.size(0);
long inDepth = weights.size(1);
validateInputDepth(inDepth);
long kH = weights.size(2);
long kW = weights.size(3);
int[] dilation = getTypedLayerConfiguration().getDilation();
int[] kernel = getTypedLayerConfiguration().getKernelSize();
int[] strides = getTypedLayerConfiguration().getStride();
int[] pad;
int[] outSize;
if (convolutionMode == ConvolutionMode.Same) {
outSize =
Convolution2DUtils.getOutputSize(
input,
kernel,
strides,
null,
convolutionMode,
dilation,
CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method
if (input.size(2) > Integer.MAX_VALUE || input.size(3) > Integer.MAX_VALUE)
throw new ND4JArraySizeException();
int[] inWidthHeight;
// if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NCHW)
// TODO: Switch hardcoded state later. For now, convolution is implemented as
// switch to NCHW then permute back for NWHC
inWidthHeight = new int[] {(int) input.size(2), (int) input.size(3)};
/* else if(layerConf().getCnn2dDataFormat() == CNN2DFormat.NHWC) {
inWidthHeight = new int[] {(int) input.size(1), (int) input.size(2)};
}
else
throw new IllegalStateException("No data format configured!");*/
pad =
Convolution2DUtils.getSameModeTopLeftPadding(
outSize, inWidthHeight, kernel, strides, dilation);
} else {
pad = getTypedLayerConfiguration().getPadding();
outSize =
Convolution2DUtils.getOutputSize(
input,
kernel,
strides,
pad,
convolutionMode,
dilation,
CNN2DFormat.NCHW); // Note: hardcoded to NCHW due to permute earlier in this method
}
int outH = outSize[0];
int outW = outSize[1];
if (helper != null
&& (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
if (preOutput != null && forBackprop) {
return new Pair<>(preOutput, null);
}
// For no-bias convolutional layers: use an empty (all 0s) value for biases
if (!hasBias()) {
if (dummyBias == null) {
try (MemoryWorkspace wsO = Nd4j.getMemoryManager().scopeOutOfWorkspaces()) {
dummyBias = Nd4j.create(1, getTypedLayerConfiguration().getNOut());
}
}
bias = dummyBias;
}
INDArray ret = null;
try {
ret =
helper.preOutput(
inputOrig,
weights,
bias,
kernel,
strides,
pad,
getTypedLayerConfiguration().getCudnnAlgoMode(),
getTypedLayerConfiguration().getCudnnFwdAlgo(),
convolutionMode,
dilation,
getTypedLayerConfiguration().getConvFormat(),
workspaceMgr);
} catch (ND4JOpProfilerException e) {
throw e; // NaN panic etc for debugging
} catch (Exception e) {
if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
// This is a memory exception - don't fallback to built-in implementation
throw e;
}
if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
helperCountFail++;
if (helper instanceof MKLDNNConvHelper) {
log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
} else {
log.warn("CuDNN execution failed - falling back on built-in implementation", e);
}
} else {
throw new RuntimeException(
"Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
e);
}
}
if (ret != null) {
return new Pair<>(ret, null);
}
}
if (preOutput != null && i2d != null && forBackprop) {
return new Pair<>(preOutput, i2d);
}
// im2col in the required order: want [outW,outH,miniBatch,depthIn,kH,kW], but need to input
// [miniBatch,channels,kH,kW,outH,outW] given the current im2col implementation
// To get this: create an array of the order we want, permute it to the order required by im2col
// implementation, and then do im2col on that
// to get old order from required order: permute(0,3,4,5,1,2)
// Post reshaping: rows are such that minibatch varies slowest, outW fastest as we step through
// the rows post-reshape
INDArray col =
Nd4j.createUninitialized(
weights.dataType(), new long[] {miniBatch, outH, outW, inDepth, kH, kW}, 'c');
int[] permute = new int[] {0, 3, 4, 5, 1, 2};
INDArray col2 = col.permute(permute);
INDArray im2ColIn = input.castTo(col2.dataType()); // No op if already (for example) float
if (kH > Integer.MAX_VALUE || kW > Integer.MAX_VALUE) throw new ND4JArraySizeException();
Convolution.im2col(
im2ColIn,
(int) kH,
(int) kW,
strides[0],
strides[1],
pad[0],
pad[1],
dilation[0],
dilation[1],
convolutionMode == ConvolutionMode.Same,
col2);
INDArray im2col2d =
Shape.newShapeNoCopy(col, new long[] {miniBatch * outH * outW, inDepth * kH * kW}, false);
// Current order of weights: [depthOut,depthIn,kH,kW], c order
// Permute to give [kW,kH,depthIn,depthOut], f order
// Reshape to give [kW*kH*depthIn, depthOut]. This should always be zero-copy reshape, unless
// weights aren't in c order for some reason
INDArray permutedW = weights.permute(3, 2, 1, 0);
INDArray reshapedW = permutedW.reshape('f', kW * kH * inDepth, outDepth);
// Do the MMUL; c and f orders in, f order out. output shape: [miniBatch*outH*outW,depthOut]
INDArray z =
workspaceMgr.createUninitialized(
ArrayType.ACTIVATIONS,
weights.dataType(),
new long[] {im2col2d.size(0), reshapedW.size(1)},
'f');
im2col2d.mmuli(reshapedW, z);
// Add biases, before reshaping. Note that biases are [1,depthOut] and currently z is
// [miniBatch*outH*outW,depthOut] -> addiRowVector
if (getTypedLayerConfiguration().hasBias()) {
z.addiRowVector(bias);
}
// Now, reshape to [outW,outH,miniBatch,outDepth], and permute to have correct output order:
// [miniBatch,outDepth,outH,outW];
z = Shape.newShapeNoCopy(z, new long[] {outW, outH, miniBatch, outDepth}, true);
z = z.permute(2, 3, 1, 0);
if (training
&& cacheMode != CacheMode.NONE
&& workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
&& workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
i2d = im2col2d.unsafeDuplication();
}
}
if (getTypedLayerConfiguration().getConvFormat() == CNN2DFormat.NHWC) {
z = z.permute(0, 2, 3, 1); // NCHW to NHWC
z = workspaceMgr.dup(ArrayType.ACTIVATIONS, z);
}
return new Pair<>(z, forBackprop ? im2col2d : null);
}
@Override
public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) {
if (input == null) {
throw new IllegalArgumentException(
"Cannot perform forward pass with null input " + layerId());
}
if (cacheMode == null) cacheMode = CacheMode.NONE;
applyDropOutIfNecessary(training, workspaceMgr);
INDArray z = preOutput(training, false, workspaceMgr).getFirst();
// we do cache only if cache workspace exists. Skip otherwise
if (training
&& cacheMode != CacheMode.NONE
&& workspaceMgr.hasConfiguration(ArrayType.FF_CACHE)
&& workspaceMgr.isWorkspaceOpen(ArrayType.FF_CACHE)) {
try (MemoryWorkspace wsB = workspaceMgr.notifyScopeBorrowed(ArrayType.FF_CACHE)) {
preOutput = z.unsafeDuplication();
}
}
// String afn = conf.getLayer().getActivationFunction();
IActivation afn = getTypedLayerConfiguration().getActivationFn();
if (helper != null
&& Shape.strideDescendingCAscendingF(z)
&& (helperCountFail == 0 || !getTypedLayerConfiguration().isCudnnAllowFallback())) {
INDArray ret = null;
try {
ret = helper.activate(z, getTypedLayerConfiguration().getActivationFn(), training);
} catch (ND4JOpProfilerException e) {
throw e; // NaN panic etc for debugging
} catch (Exception e) {
if (e.getMessage() != null && e.getMessage().contains("Failed to allocate")) {
// This is a memory exception - don't fallback to built-in implementation
throw e;
}
if (getTypedLayerConfiguration().isCudnnAllowFallback()) {
helperCountFail++;
if (helper instanceof MKLDNNConvHelper) {
log.warn("MKL-DNN execution failed - falling back on built-in implementation", e);
} else {
log.warn("CuDNN execution failed - falling back on built-in implementation", e);
}
} else {
throw new RuntimeException(
"Error during ConvolutionLayer MKL/CuDNN helper forward pass - isCudnnAllowFallback() is set to false",
e);
}
}
if (ret != null) {
return ret;
}
}
INDArray activation = afn.getActivation(z, training);
return activation;
}
@Override
public boolean hasBias() {
return getTypedLayerConfiguration().hasBias();
}
@Override
public boolean isPretrainLayer() {
return false;
}
@Override
public LayerHelper getHelper() {
return helper;
}
@Override
public void fit(INDArray input, LayerWorkspaceMgr workspaceMgr) {
throw new UnsupportedOperationException("Not supported");
}
@Override
public void setParamsTable(INDArray paramsTable) {
// Override, as base layer does f order parameter flattening by default
setParams(paramsTable, 'c');
}
@Override
public Pair<INDArray, MaskState> feedForwardMaskArray(
INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
if (maskArray == null) {
// For same mode (with stride 1): output activations size is always same size as input
// activations size -> mask array is same size
return new Pair<>(maskArray, currentMaskState);
}
INDArray outMask =
Convolution2DUtils.cnn2dMaskReduction(
maskArray,
getTypedLayerConfiguration().getKernelSize(),
getTypedLayerConfiguration().getStride(),
getTypedLayerConfiguration().getPadding(),
getTypedLayerConfiguration().getDilation(),
getTypedLayerConfiguration().getConvolutionMode());
return new Pair<>(outMask, currentMaskState);
}
}

View File

@ -47,7 +47,7 @@ public class DenseLayer extends BaseLayer<org.deeplearning4j.nn.conf.layers.Dens
@Override @Override
public boolean hasBias() { public boolean hasBias() {
return super.hasBias(); return getTypedLayerConfiguration().isHasBias();
} }
@Override @Override

View File

@ -37,27 +37,41 @@ import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.lossfunctions.ILossFunction; import org.nd4j.linalg.lossfunctions.ILossFunction;
public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.layers.RnnOutputLayer> { public class RnnOutputLayer
extends BaseOutputLayer<org.deeplearning4j.nn.conf.layers.RnnOutputLayer> {
public RnnOutputLayer(LayerConfiguration conf, DataType dataType) { public RnnOutputLayer(LayerConfiguration conf, DataType dataType) {
super(conf, dataType); super(conf, dataType);
} }
@Override @Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { public Pair<Gradient, INDArray> backpropGradient(
INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
assertInputSet(true); assertInputSet(true);
if (input.rank() != 3) { if (input.rank() != 3) {
throw new UnsupportedOperationException( throw new UnsupportedOperationException(
"Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]." + "Input is not rank 3. RnnOutputLayer expects rank 3 input with shape [minibatch, layerInSize, sequenceLength]."
" Got input with rank " + input.rank() + " and shape " + Arrays.toString(input.shape()) + " - " + layerId()); + " Got input with rank "
+ input.rank()
+ " and shape "
+ Arrays.toString(input.shape())
+ " - "
+ layerId());
} }
RNNFormat format = getTypedLayerConfiguration().getDataFormat(); RNNFormat format = getTypedLayerConfiguration().getDataFormat();
int td = (format == RNNFormat.NCW) ? 2 : 1; int td = (format == RNNFormat.NCW) ? 2 : 1; //either NCW or NWC
Preconditions.checkState(labels.rank() == 3, "Expected rank 3 labels array, got label array with shape %ndShape", labels); Preconditions.checkState(
Preconditions.checkState(input.size(td) == labels.size(td), "Sequence lengths do not match for RnnOutputLayer input and labels:" + labels.rank() == 3,
"Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape", input, labels); "Expected rank 3 labels array, got label array with shape %ndShape",
labels);
Preconditions.checkState(
input.size(td) == labels.size(td),
"Sequence lengths do not match for RnnOutputLayer input and labels:"
+ "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - "
+ "mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape.\n",
input, "\n\n",
labels);
INDArray inputTemp = input; INDArray inputTemp = input;
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
@ -66,13 +80,19 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM); this.input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM);
applyDropOutIfNecessary(true, workspaceMgr); //Edge case: we skip OutputLayer forward pass during training as this isn't required to calculate gradients applyDropOutIfNecessary(
true,
workspaceMgr); // Edge case: we skip OutputLayer forward pass during training as this isn't
// required to calculate gradients
Pair<Gradient, INDArray> gradAndEpsilonNext = super.backpropGradient(epsilon, workspaceMgr); //Also applies dropout Pair<Gradient, INDArray> gradAndEpsilonNext =
super.backpropGradient(epsilon, workspaceMgr); // Also applies dropout
this.input = inputTemp; this.input = inputTemp;
INDArray epsilon2d = gradAndEpsilonNext.getSecond(); INDArray epsilon2d = gradAndEpsilonNext.getSecond();
INDArray epsilon3d = TimeSeriesUtils.reshape2dTo3d(epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD); INDArray epsilon3d =
TimeSeriesUtils.reshape2dTo3d(
epsilon2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD);
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
epsilon3d = epsilon3d.permute(0, 2, 1); epsilon3d = epsilon3d.permute(0, 2, 1);
} }
@ -82,14 +102,17 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d); return new Pair<>(gradAndEpsilonNext.getFirst(), epsilon3d);
} }
/**{@inheritDoc} /** {@inheritDoc} */
*/
@Override @Override
public double f1Score(INDArray examples, INDArray labels) { public double f1Score(INDArray examples, INDArray labels) {
if (examples.rank() == 3) if (examples.rank() == 3)
examples = TimeSeriesUtils.reshape3dTo2d(examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS); examples =
TimeSeriesUtils.reshape3dTo2d(
examples, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
if (labels.rank() == 3) if (labels.rank() == 3)
labels = TimeSeriesUtils.reshape3dTo2d(labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS); labels =
TimeSeriesUtils.reshape3dTo2d(
labels, LayerWorkspaceMgr.noWorkspaces(), ArrayType.ACTIVATIONS);
return super.f1Score(examples, labels); return super.f1Score(examples, labels);
} }
@ -108,7 +131,10 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
if (input.rank() == 3) { if (input.rank() == 3) {
// Case when called from RnnOutputLayer // Case when called from RnnOutputLayer
INDArray inputTemp = input; INDArray inputTemp = input;
input = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? input.permute(0, 2, 1):input; input =
(getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC)
? input.permute(0, 2, 1)
: input;
input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM); input = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.FF_WORKING_MEM);
INDArray out = super.preOutput(training, workspaceMgr); INDArray out = super.preOutput(training, workspaceMgr);
this.input = inputTemp; this.input = inputTemp;
@ -124,7 +150,10 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) { protected INDArray getLabels2d(LayerWorkspaceMgr workspaceMgr, ArrayType arrayType) {
INDArray labels = this.labels; INDArray labels = this.labels;
if (labels.rank() == 3) { if (labels.rank() == 3) {
labels = (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) ? labels.permute(0, 2, 1) : labels; labels =
(getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC)
? labels.permute(0, 2, 1)
: labels;
return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType); return TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, arrayType);
} }
return labels; return labels;
@ -143,9 +172,14 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
input = input.permute(0, 2, 1); input = input.permute(0, 2, 1);
} }
INDArray input2d = TimeSeriesUtils.reshape3dTo2d(input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray input2d =
TimeSeriesUtils.reshape3dTo2d(
input.castTo(W.dataType()), workspaceMgr, ArrayType.FF_WORKING_MEM);
INDArray act2d = getTypedLayerConfiguration().getActivationFn().getActivation(input2d.mmul(W).addiRowVector(b), training); INDArray act2d =
getTypedLayerConfiguration()
.getActivationFn()
.getActivation(input2d.mmul(W).addiRowVector(b), training);
if (maskArray != null) { if (maskArray != null) {
if (!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())) { if (!maskArray.isColumnVectorOrScalar() || Arrays.equals(maskArray.shape(), act2d.shape())) {
// Per output masking // Per output masking
@ -156,7 +190,8 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
} }
} }
INDArray ret = TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS); INDArray ret =
TimeSeriesUtils.reshape2dTo3d(act2d, input.size(0), workspaceMgr, ArrayType.ACTIVATIONS);
if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) { if (getTypedLayerConfiguration().getDataFormat() == RNNFormat.NWC) {
ret = ret.permute(0, 2, 1); ret = ret.permute(0, 2, 1);
} }
@ -170,13 +205,21 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
// (a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector) // (a) per time step masking - rank 2 mask array -> reshape to rank 1 (column vector)
// (b) per output masking - rank 3 mask array -> reshape to rank 2 ( // (b) per output masking - rank 3 mask array -> reshape to rank 2 (
if (maskArray.rank() == 2) { if (maskArray.rank() == 2) {
this.maskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); this.maskArray =
TimeSeriesUtils.reshapeTimeSeriesMaskToVector(
maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
} else if (maskArray.rank() == 3) { } else if (maskArray.rank() == 3) {
this.maskArray = TimeSeriesUtils.reshape3dTo2d(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); this.maskArray =
TimeSeriesUtils.reshape3dTo2d(
maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
} else { } else {
throw new UnsupportedOperationException( throw new UnsupportedOperationException(
"Invalid mask array: must be rank 2 or 3 (got: rank " + maskArray.rank() + ", shape = " "Invalid mask array: must be rank 2 or 3 (got: rank "
+ Arrays.toString(maskArray.shape()) + ") " + layerId()); + maskArray.rank()
+ ", shape = "
+ Arrays.toString(maskArray.shape())
+ ") "
+ layerId());
} }
} else { } else {
this.maskArray = null; this.maskArray = null;
@ -184,12 +227,14 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
} }
@Override @Override
public Pair<INDArray, MaskState> feedForwardMaskArray(INDArray maskArray, MaskState currentMaskState, public Pair<INDArray, MaskState> feedForwardMaskArray(
int minibatchSize) { INDArray maskArray, MaskState currentMaskState, int minibatchSize) {
// If the *input* mask array is present and active, we should use it to mask the output // If the *input* mask array is present and active, we should use it to mask the output
if (maskArray != null && currentMaskState == MaskState.Active) { if (maskArray != null && currentMaskState == MaskState.Active) {
this.inputMaskArray = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT); this.inputMaskArray =
TimeSeriesUtils.reshapeTimeSeriesMaskToVector(
maskArray, LayerWorkspaceMgr.noWorkspacesImmutable(), ArrayType.INPUT);
this.inputMaskArrayState = currentMaskState; this.inputMaskArrayState = currentMaskState;
} else { } else {
this.inputMaskArray = null; this.inputMaskArray = null;
@ -199,27 +244,35 @@ public class RnnOutputLayer extends BaseOutputLayer<org.deeplearning4j.nn.conf.l
return null; // Last layer in network return null; // Last layer in network
} }
/**Compute the score for each example individually, after labels and input have been set. /**
* Compute the score for each example individually, after labels and input have been set.
* *
* @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include regularization) * @param fullNetRegTerm Regularization score term for the entire network (or, 0.0 to not include
* @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example * regularization)
* @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith
* example
*/ */
@Override @Override
public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) { public INDArray computeScoreForExamples(double fullNetRegTerm, LayerWorkspaceMgr workspaceMgr) {
// For RNN: need to sum up the score over each time step before returning. // For RNN: need to sum up the score over each time step before returning.
if (input == null || labels == null) if (input == null || labels == null)
throw new IllegalStateException("Cannot calculate score without input and labels " + layerId()); throw new IllegalStateException(
"Cannot calculate score without input and labels " + layerId());
INDArray preOut = preOutput2d(false, workspaceMgr); INDArray preOut = preOutput2d(false, workspaceMgr);
ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction(); ILossFunction lossFunction = getTypedLayerConfiguration().getLossFunction();
INDArray scoreArray = INDArray scoreArray =
lossFunction.computeScoreArray(getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM), preOut, lossFunction.computeScoreArray(
getTypedLayerConfiguration().getActivationFn(), maskArray); getLabels2d(workspaceMgr, ArrayType.FF_WORKING_MEM),
preOut,
getTypedLayerConfiguration().getActivationFn(),
maskArray);
// scoreArray: shape [minibatch*timeSeriesLength, 1] // scoreArray: shape [minibatch*timeSeriesLength, 1]
// Reshape it to [minibatch, timeSeriesLength] then sum over time step // Reshape it to [minibatch, timeSeriesLength] then sum over time step
INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int)input.size(0)); INDArray scoreArrayTs =
TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, (int) input.size(0));
INDArray summedScores = scoreArrayTs.sum(true, 1); INDArray summedScores = scoreArrayTs.sum(true, 1);
if (fullNetRegTerm != 0.0) { if (fullNetRegTerm != 0.0) {

View File

@ -47,7 +47,7 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator; import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
@Data @Data
public abstract class BaseWrapperLayer extends AbstractLayer { public abstract class BaseWrapperLayer<LayerConf_T extends LayerConfiguration> extends AbstractLayer {
protected Layer underlying; protected Layer underlying;
@ -57,8 +57,8 @@ public abstract class BaseWrapperLayer extends AbstractLayer {
} }
@Override @Override
public BaseLayerConfiguration getTypedLayerConfiguration() { public LayerConf_T getTypedLayerConfiguration() {
return (BaseLayerConfiguration) underlying.getLayerConfiguration(); return (LayerConf_T) underlying.getLayerConfiguration();
} }
/** /**

View File

@ -712,7 +712,7 @@ public class MultiLayerNetwork extends ArtificialNeuralNetwork
if (layer_conf instanceof BaseLayerConfiguration) if (layer_conf instanceof BaseLayerConfiguration)
((BaseLayerConfiguration) layer_conf).setDataType(netDtype); ((BaseLayerConfiguration) layer_conf).setDataType(netDtype);
nParamsPerLayer[i] = layer_conf.initializer().numParams(layer_conf); nParamsPerLayer[i] = layer_conf.numParams();
paramLength += nParamsPerLayer[i]; paramLength += nParamsPerLayer[i];
} }
log.debug( log.debug(

View File

@ -0,0 +1,183 @@
/*
* ******************************************************************************
* *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*/
package org.deeplearning4j.nn.params;
import java.util.*;
import lombok.val;
import org.deeplearning4j.nn.api.AbstractParamInitializer;
import org.deeplearning4j.nn.conf.layers.Convolution1DNew;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.LayerConfiguration;
import org.deeplearning4j.nn.weights.WeightInitUtil;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.indexing.NDArrayIndex;
public class ConvolutionNewParamInitializer extends AbstractParamInitializer {
private static final ConvolutionNewParamInitializer INSTANCE = new ConvolutionNewParamInitializer();
public static ConvolutionNewParamInitializer getInstance() {
return INSTANCE;
}
public final static String WEIGHT_KEY = DefaultParamInitializer.WEIGHT_KEY;
public final static String BIAS_KEY = DefaultParamInitializer.BIAS_KEY;
@Override
public long numParams(LayerConfiguration l) {
return l.numParams();
}
@Override
public List<String> paramKeys(LayerConfiguration layer) {
ConvolutionLayer layerConf =
(ConvolutionLayer) layer;
if(layerConf.hasBias()){
return Arrays.asList(WEIGHT_KEY, BIAS_KEY);
} else {
return weightKeys(layer);
}
}
@Override
public List<String> weightKeys(LayerConfiguration layer) {
return Collections.singletonList(WEIGHT_KEY);
}
@Override
public List<String> biasKeys(LayerConfiguration layer) {
ConvolutionLayer layerConf =
(ConvolutionLayer) layer;
if(layerConf.hasBias()){
return Collections.singletonList(BIAS_KEY);
} else {
return Collections.emptyList();
}
}
@Override
public boolean isWeightParam(LayerConfiguration layer, String key) {
return WEIGHT_KEY.equals(key);
}
@Override
public boolean isBiasParam(LayerConfiguration layer, String key) {
return BIAS_KEY.equals(key);
}
@Override
public Map<String, INDArray> init(LayerConfiguration conf, INDArray paramsView, boolean initializeParams) {
Convolution1DNew layer = (Convolution1DNew) conf;
if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
Convolution1DNew layerConf =
(Convolution1DNew) conf;
val nOut = layerConf.getNOut();
if(layer.hasBias()){
//Standard case
INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf)));
params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
conf.getNetConfiguration().addNetWideVariable(BIAS_KEY);
} else {
INDArray weightView = paramsView;
params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
conf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
}
return params;
}
@Override
public Map<String, INDArray> getGradientsFromFlattened(LayerConfiguration conf, INDArray gradientView) {
Convolution1DNew layerConf =
(Convolution1DNew) conf;
int[] kernel = layerConf.getKernelSize();
val nIn = layerConf.getNIn();
val nOut = layerConf.getNOut();
Map<String, INDArray> out = new LinkedHashMap<>();
if(layerConf.hasBias()){
//Standard case
INDArray biasGradientView = gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
INDArray weightGradientView =
gradientView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, numParams(conf)))
.reshape('c', nOut, nIn, kernel[0], kernel[1]);
out.put(BIAS_KEY, biasGradientView);
out.put(WEIGHT_KEY, weightGradientView);
} else {
INDArray weightGradientView = gradientView.reshape('c', nOut, nIn, kernel[0], kernel[1]);
out.put(WEIGHT_KEY, weightGradientView);
}
return out;
}
//1 bias per feature map
protected INDArray createBias(LayerConfiguration conf, INDArray biasView, boolean initializeParams) {
//the bias is a 1D tensor -- one bias per output feature map
Convolution1DNew layerConf =
(Convolution1DNew) conf;
if (initializeParams)
biasView.assign(layerConf.getBiasInit());
return biasView;
}
protected INDArray createWeightMatrix(LayerConfiguration conf, INDArray weightView, boolean initializeParams) {
/*
Create a 4d weight matrix of:
(number of kernels, num input channels, kernel height, kernel width)
Note c order is used specifically for the CNN weights, as opposed to f order elsewhere
Inputs to the convolution layer are:
(batch size, num input feature maps, image height, image width)
*/
Convolution1DNew layerConf =
(Convolution1DNew) conf;
if (initializeParams) {
int[] kernel = layerConf.getKernelSize();
int[] stride = layerConf.getStride();
val inputDepth = layerConf.getNIn();
val outputDepth = layerConf.getNOut();
double fanIn = inputDepth * kernel[0] * kernel[1];
double fanOut = outputDepth * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]);
val weightsShape = new long[] {outputDepth, inputDepth, kernel[0], kernel[1]};
return layerConf.getWeightInit().init(fanIn, fanOut, weightsShape, 'c', weightView);
} else {
int[] kernel = layerConf.getKernelSize();
return WeightInitUtil.reshapeWeights(
new long[] {layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1]}, weightView, 'c');
}
}
}

View File

@ -20,7 +20,6 @@
package org.deeplearning4j.util; package org.deeplearning4j.util;
import java.util.Arrays; import java.util.Arrays;
import org.deeplearning4j.exception.DL4JInvalidConfigException; import org.deeplearning4j.exception.DL4JInvalidConfigException;
import org.deeplearning4j.exception.DL4JInvalidInputException; import org.deeplearning4j.exception.DL4JInvalidInputException;
@ -37,47 +36,45 @@ public class Convolution1DUtils {
private static final int ONE = 1; private static final int ONE = 1;
private Convolution1DUtils() {}
private Convolution1DUtils() { public static int getOutputSize(
} INDArray inputData, int kernel, int strides, int padding, ConvolutionMode convolutionMode) {
public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding,
ConvolutionMode convolutionMode) {
return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE); return getOutputSize(inputData, kernel, strides, padding, convolutionMode, ONE);
} }
/** /**
* Returns true if the given layer has an * Returns true if the given layer has an {@link RNNFormat}. This is true for: {@link
* {@link RNNFormat}. * Convolution1D}, {@link Subsampling1DLayer} {@link SimpleRnn} {@link LSTM} {@link
* This is true for: * EmbeddingSequenceLayer}
* {@link Convolution1D}, *
* {@link Subsampling1DLayer}
* {@link SimpleRnn}
* {@link LSTM}
* {@link EmbeddingSequenceLayer}
* @param layer the layer to test * @param layer the layer to test
* @return true if the input layer has an rnn format * @return true if the input layer has an rnn format false otherwise
* false otherwise
*/ */
public static boolean hasRnnDataFormat(LayerConfiguration layer) { public static boolean hasRnnDataFormat(LayerConfiguration layer) {
return layer instanceof Convolution1D || return layer instanceof Convolution1D
layer instanceof Convolution1D || || layer instanceof Convolution1D
layer instanceof Subsampling1DLayer || || layer instanceof Subsampling1DLayer
layer instanceof SimpleRnn || || layer instanceof SimpleRnn
layer instanceof LSTM || || layer instanceof LSTM
layer instanceof EmbeddingSequenceLayer; || layer instanceof EmbeddingSequenceLayer;
} }
/** /**
* Get the {@link RNNFormat} for the given layer. * Get the {@link RNNFormat} for the given layer. Throws an {@link IllegalArgumentException} if a
* Throws an {@link IllegalArgumentException} * layer doesn't have an rnn format
* if a layer doesn't have an rnn format *
* @param layer the layer to get the format for * @param layer the layer to get the format for
* @return the format for the layer * @return the format for the layer
*/ */
public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) { public static RNNFormat getRnnFormatFromLayer(LayerConfiguration layer) {
Preconditions.checkState(hasRnnDataFormat(layer),"ILayer of type " + layer.getClass().getName() + " and name " + layer.getName() + " does not have an RNNFormat"); Preconditions.checkState(
hasRnnDataFormat(layer),
"ILayer of type "
+ layer.getClass().getName()
+ " and name "
+ layer.getName()
+ " does not have an RNNFormat");
if (layer instanceof SimpleRnn) { if (layer instanceof SimpleRnn) {
SimpleRnn simpleRnn = (SimpleRnn) layer; SimpleRnn simpleRnn = (SimpleRnn) layer;
return simpleRnn.getDataFormat(); return simpleRnn.getDataFormat();
@ -96,24 +93,24 @@ public class Convolution1DUtils {
} else if (layer instanceof EmbeddingSequenceLayer) { } else if (layer instanceof EmbeddingSequenceLayer) {
EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer; EmbeddingSequenceLayer embeddingSequenceLayer = (EmbeddingSequenceLayer) layer;
return embeddingSequenceLayer.getOutputDataFormat(); return embeddingSequenceLayer.getOutputDataFormat();
} } else {
else { throw new IllegalArgumentException(
throw new IllegalArgumentException("Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName()); "Illegal layer type " + layer.getClass().getName() + " and name " + layer.getName());
} }
} }
/** /**
* Reshapes the given weight * Reshapes the given weight array or weight gradient to work with the specified {@link RNNFormat}
* array or weight gradient *
* to work with the specified
* {@link RNNFormat}
* @param w the weight array or gradient * @param w the weight array or gradient
* @param rnnFormat the {@link RNNFormat} to use * @param rnnFormat the {@link RNNFormat} to use
* @return the reshaped array. * @return the reshaped array.
*/ */
public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) { public static INDArray reshapeWeightArrayOrGradientForFormat(INDArray w, RNNFormat rnnFormat) {
if (rnnFormat == RNNFormat.NWC) if (rnnFormat == RNNFormat.NWC)
w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2)).permute(2, 1, 0); //[oC, iC, k, 1] to [k, iC, oC] w = w.reshape(w.ordering(), w.size(0), w.size(1), w.size(2))
.permute(2, 1, 0); // [oC, iC, k, 1] to [k, iC, oC]
else { else {
w = w.reshape(w.ordering(), w.size(2), w.size(1), w.size(0)); w = w.reshape(w.ordering(), w.size(2), w.size(1), w.size(0));
} }
@ -121,7 +118,6 @@ public class Convolution1DUtils {
return w; return w;
} }
/** /**
* Get the output size (height) for the given input data and CNN1D configuration * Get the output size (height) for the given input data and CNN1D configuration
* *
@ -133,8 +129,13 @@ public class Convolution1DUtils {
* @param dilation Kernel dilation * @param dilation Kernel dilation
* @return Output size (width) * @return Output size (width)
*/ */
public static long getOutputSize(long inH, int kernel, int strides, int padding, public static long getOutputSize(
ConvolutionMode convolutionMode, int dilation) { long inH,
int kernel,
int strides,
int padding,
ConvolutionMode convolutionMode,
int dilation) {
long eKernel = effectiveKernelSize(kernel, dilation); long eKernel = effectiveKernelSize(kernel, dilation);
if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) { if (convolutionMode == ConvolutionMode.Same || convolutionMode == ConvolutionMode.Causal) {
return (int) Math.ceil(inH / ((double) strides)); return (int) Math.ceil(inH / ((double) strides));
@ -153,10 +154,14 @@ public class Convolution1DUtils {
* @param dilation Kernel dilation * @param dilation Kernel dilation
* @return Output size (width) * @return Output size (width)
*/ */
public static int getOutputSize(INDArray inputData, int kernel, int strides, int padding, public static int getOutputSize(
ConvolutionMode convolutionMode, int dilation) { INDArray inputData,
if (inputData.size(2) > Integer.MAX_VALUE) int kernel,
throw new ND4JArraySizeException(); int strides,
int padding,
ConvolutionMode convolutionMode,
int dilation) {
if (inputData.size(2) > Integer.MAX_VALUE) throw new ND4JArraySizeException();
int inH = (int) inputData.size(2); int inH = (int) inputData.size(2);
int eKernel = effectiveKernelSize(kernel, dilation); int eKernel = effectiveKernelSize(kernel, dilation);
boolean atrous = (eKernel == kernel); boolean atrous = (eKernel == kernel);
@ -171,8 +176,14 @@ public class Convolution1DUtils {
return outH; return outH;
} }
public static void validateShapes(INDArray inputData, int eKernel, int strides, int padding, public static void validateShapes(
ConvolutionMode convolutionMode, int dilation, int inShape, INDArray inputData,
int eKernel,
int strides,
int padding,
ConvolutionMode convolutionMode,
int dilation,
int inShape,
boolean atrous) { boolean atrous) {
int inH = inShape; int inH = inShape;
@ -186,15 +197,21 @@ public class Convolution1DUtils {
if (atrous) sb.append("effective "); if (atrous) sb.append("effective ");
sb.append("kernel height <= input height + 2 * padding height. \nGot "); sb.append("kernel height <= input height + 2 * padding height. \nGot ");
if (atrous) sb.append("effective "); if (atrous) sb.append("effective ");
sb.append("kernel height = ").append(eKernel).append(", input height = ").append(inH) sb.append("kernel height = ")
.append(" and padding height = ").append(padding).append(" which do not satisfy 0 < ") .append(eKernel)
.append(eKernel).append(" <= ").append(inH + 2 * padding) .append(", input height = ")
.append(inH)
.append(" and padding height = ")
.append(padding)
.append(" which do not satisfy 0 < ")
.append(eKernel)
.append(" <= ")
.append(inH + 2 * padding)
.append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation)); .append(getCommonErrorMsg(inputData, eKernel, strides, padding, dilation));
throw new DL4JInvalidInputException(sb.toString()); throw new DL4JInvalidInputException(sb.toString());
} }
if (convolutionMode == ConvolutionMode.Strict) { if (convolutionMode == ConvolutionMode.Strict) {
if ((inH - eKernel + 2 * padding) % strides != 0) { if ((inH - eKernel + 2 * padding) % strides != 0) {
double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0; double d = (inH - eKernel + 2 * padding) / ((double) strides) + 1.0;
@ -202,27 +219,41 @@ public class Convolution1DUtils {
int truncated = (int) d; int truncated = (int) d;
int sameSize = (int) Math.ceil(inH / ((double) strides)); int sameSize = (int) Math.ceil(inH / ((double) strides));
String sb = "Invalid input data or configuration: Combination of kernel size, " + String sb =
"stride and padding are not " + "Invalid input data or configuration: Combination of kernel size, "
"valid for given input height, using ConvolutionMode.Strict\n" + + "stride and padding are not "
"ConvolutionMode.Strict requires: output height = (input height - kernelSize + " + + "valid for given input height, using ConvolutionMode.Strict\n"
"2*padding)/stride + 1 to be an integer. Got: (" + + "ConvolutionMode.Strict requires: output height = (input height - kernelSize + "
inH + " - " + eKernel + " + 2*" + padding + ")/" + + "2*padding)/stride + 1 to be an integer. Got: ("
strides + " + 1 = " + + inH
str + "\n" + "See \"Constraints on strides\" at http://cs231n.github." + + " - "
"io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n" + + eKernel
"To truncate/crop the input, such that output height = floor(" + + " + 2*"
str + ") = " + + padding
truncated + ", use ConvolutionType.Truncate.\n" + + ")/"
"Alternatively use ConvolutionType.Same, which will use padding to give an " + + strides
"output height of ceil(" + + " + 1 = "
inH + "/" + strides + ")=" + sameSize + + str
getCommonErrorMsg(inputData, eKernel, strides, padding, dilation); + "\n"
+ "See \"Constraints on strides\" at http://cs231n.github."
+ "io/convolutional-networks/ and ConvolutionType enumeration Javadoc.\n"
+ "To truncate/crop the input, such that output height = floor("
+ str
+ ") = "
+ truncated
+ ", use ConvolutionType.Truncate.\n"
+ "Alternatively use ConvolutionType.Same, which will use padding to give an "
+ "output height of ceil("
+ inH
+ "/"
+ strides
+ ")="
+ sameSize
+ getCommonErrorMsg(inputData, eKernel, strides, padding, dilation);
throw new DL4JInvalidConfigException(sb); throw new DL4JInvalidConfigException(sb);
} }
} }
} }
public static int effectiveKernelSize(int kernel, int dilation) { public static int effectiveKernelSize(int kernel, int dilation) {
@ -235,9 +266,13 @@ public class Convolution1DUtils {
} }
} }
private static String getCommonErrorMsg(INDArray inputData, int kernel, int strides, int padding, int dilation) { private static String getCommonErrorMsg(
String s = "\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]=" + Arrays.toString(inputData.shape()) INDArray inputData, int kernel, int strides, int padding, int dilation) {
+ ", inputKernel=" + kernel; String s =
"\nInput size: [numExamples,inputDepth,inputHeight,inputWidth]="
+ Arrays.toString(inputData.shape())
+ ", inputKernel="
+ kernel;
if (dilation != 1) { if (dilation != 1) {
int effectiveKernel = effectiveKernelSize(kernel, dilation); int effectiveKernel = effectiveKernelSize(kernel, dilation);
s += ", effectiveKernelGivenDilation=" + effectiveKernel; s += ", effectiveKernelGivenDilation=" + effectiveKernel;
@ -245,16 +280,13 @@ public class Convolution1DUtils {
return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation; return s + ", stride=" + strides + ", padding=" + padding + ", dilation=" + dilation;
} }
/** Check that the convolution mode is consistent with the padding specification */
/**
* Check that the convolution mode is consistent with the padding specification
*/
public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) { public static void validateConvolutionModePadding(ConvolutionMode mode, int padding) {
if (mode == ConvolutionMode.Same) { if (mode == ConvolutionMode.Same) {
boolean nullPadding = padding == 0; boolean nullPadding = padding == 0;
if (!nullPadding) if (!nullPadding)
throw new IllegalArgumentException("Padding cannot be used when using the `same' convolution mode"); throw new IllegalArgumentException(
"Padding cannot be used when using the `same' convolution mode");
} }
} }
@ -268,30 +300,48 @@ public class Convolution1DUtils {
* @param dilation Dilation (length 2 array, height dimension first) * @param dilation Dilation (length 2 array, height dimension first)
* @return Top left padding (length 2 array, height dimension first) * @return Top left padding (length 2 array, height dimension first)
*/ */
public static int getSameModeTopLeftPadding(int outSize, int inSize, int kernel, int strides, int dilation) { public static int getSameModeTopLeftPadding(
int outSize, int inSize, int kernel, int strides, int dilation) {
int eKernel = effectiveKernelSize(kernel, dilation); int eKernel = effectiveKernelSize(kernel, dilation);
// Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2 // Note that padBottom is 1 bigger than this if bracketed term is not divisible by 2
int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2; int outPad = ((outSize - 1) * strides + eKernel - inSize) / 2;
Preconditions.checkState(outPad >= 0, "Invalid padding values calculated: %s - " + Preconditions.checkState(
"layer configuration is invalid? Input size %s, output size %s, kernel %s, " + outPad >= 0,
"strides %s, dilation %s", outPad, inSize, outSize, kernel, strides, dilation); "Invalid padding values calculated: %s - "
+ "layer configuration is invalid? Input size %s, output size %s, kernel %s, "
+ "strides %s, dilation %s",
outPad,
inSize,
outSize,
kernel,
strides,
dilation);
return outPad; return outPad;
} }
public static int getSameModeBottomRightPadding(int outSize, int inSize, int kernel, int strides, int dilation) { public static int getSameModeBottomRightPadding(
int outSize, int inSize, int kernel, int strides, int dilation) {
int eKernel = effectiveKernelSize(kernel, dilation); int eKernel = effectiveKernelSize(kernel, dilation);
int totalPad = ((outSize - 1) * strides + eKernel - inSize); int totalPad = ((outSize - 1) * strides + eKernel - inSize);
int tlPad = totalPad / 2; int tlPad = totalPad / 2;
int brPad = totalPad - tlPad; int brPad = totalPad - tlPad;
Preconditions.checkState(brPad >= 0, "Invalid padding values (right) calculated: %s - " + Preconditions.checkState(
"layer configuration is invalid? Input size %s, output size %s, kernel %s, " + brPad >= 0,
"strides %s, dilation %s", brPad, inSize, outSize, kernel, strides, dilation); "Invalid padding values (right) calculated: %s - "
+ "layer configuration is invalid? Input size %s, output size %s, kernel %s, "
+ "strides %s, dilation %s",
brPad,
inSize,
outSize,
kernel,
strides,
dilation);
return brPad; return brPad;
} }
/** /**
* Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for kernel size and * Perform validation on the CNN layer kernel/stride/padding. Expect int, with values > 0 for
* stride, and values >= 0 for padding. * kernel size and stride, and values >= 0 for padding.
* *
* @param kernel Kernel size to check * @param kernel Kernel size to check
* @param stride Stride to check * @param stride Stride to check
@ -300,16 +350,16 @@ public class Convolution1DUtils {
public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) { public static void validateCnn1DKernelStridePadding(int kernel, int stride, int padding) {
if (kernel <= 0) { if (kernel <= 0) {
throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + kernel); throw new IllegalStateException(
"Invalid kernel size: value must be positive (> 0). Got: " + kernel);
} }
if (stride <= 0) { if (stride <= 0) {
throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + stride); throw new IllegalStateException(
"Invalid kernel size: value must be positive (> 0). Got: " + stride);
} }
if (padding < 0) { if (padding < 0) {
throw new IllegalStateException("Invalid kernel size: value must be positive (> 0). Got: " + padding); throw new IllegalStateException(
"Invalid kernel size: value must be positive (> 0). Got: " + padding);
} }
} }
} }