2021-02-01 14:31:20 +09:00
|
|
|
/*
|
|
|
|
* ******************************************************************************
|
|
|
|
* *
|
|
|
|
* *
|
|
|
|
* * This program and the accompanying materials are made available under the
|
|
|
|
* * terms of the Apache License, Version 2.0 which is available at
|
|
|
|
* * https://www.apache.org/licenses/LICENSE-2.0.
|
|
|
|
* *
|
2021-02-01 17:47:29 +09:00
|
|
|
* * See the NOTICE file distributed with this work for additional
|
|
|
|
* * information regarding copyright ownership.
|
2021-02-01 14:31:20 +09:00
|
|
|
* * Unless required by applicable law or agreed to in writing, software
|
|
|
|
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
|
|
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
|
|
* * License for the specific language governing permissions and limitations
|
|
|
|
* * under the License.
|
|
|
|
* *
|
|
|
|
* * SPDX-License-Identifier: Apache-2.0
|
|
|
|
* *****************************************************************************
|
|
|
|
*/
|
2019-06-06 15:21:15 +03:00
|
|
|
|
|
|
|
package org.deeplearning4j.gradientcheck;
|
|
|
|
|
|
|
|
import org.deeplearning4j.BaseDL4JTest;
|
|
|
|
import org.deeplearning4j.TestUtils;
|
|
|
|
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
|
|
|
|
import org.deeplearning4j.nn.conf.inputs.InputType;
|
2019-11-26 19:24:38 +11:00
|
|
|
import org.deeplearning4j.nn.conf.layers.DenseLayer;
|
2019-06-06 15:21:15 +03:00
|
|
|
import org.deeplearning4j.nn.conf.layers.LSTM;
|
|
|
|
import org.deeplearning4j.nn.conf.layers.OutputLayer;
|
|
|
|
import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
|
|
|
|
import org.deeplearning4j.nn.conf.layers.recurrent.Bidirectional;
|
|
|
|
import org.deeplearning4j.nn.conf.layers.recurrent.LastTimeStep;
|
|
|
|
import org.deeplearning4j.nn.conf.layers.recurrent.SimpleRnn;
|
2019-11-26 19:24:38 +11:00
|
|
|
import org.deeplearning4j.nn.conf.layers.recurrent.TimeDistributed;
|
2019-06-06 15:21:15 +03:00
|
|
|
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
|
|
|
|
import org.deeplearning4j.nn.weights.WeightInit;
|
2021-03-16 11:57:24 +09:00
|
|
|
import org.junit.jupiter.api.Test;
|
2019-06-06 15:21:15 +03:00
|
|
|
import org.nd4j.linalg.activations.Activation;
|
|
|
|
import org.nd4j.linalg.api.buffer.DataType;
|
|
|
|
import org.nd4j.linalg.api.ndarray.INDArray;
|
|
|
|
import org.nd4j.linalg.factory.Nd4j;
|
|
|
|
import org.nd4j.linalg.learning.config.NoOp;
|
|
|
|
import org.nd4j.linalg.lossfunctions.LossFunctions;
|
|
|
|
|
|
|
|
import java.util.Random;
|
|
|
|
|
2021-03-16 11:57:24 +09:00
|
|
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
2019-06-06 15:21:15 +03:00
|
|
|
|
|
|
|
public class RnnGradientChecks extends BaseDL4JTest {
|
|
|
|
|
|
|
|
private static final boolean PRINT_RESULTS = true;
|
|
|
|
|
|
|
|
static {
|
|
|
|
Nd4j.setDataType(DataType.DOUBLE);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
2022-09-20 15:40:53 +02:00
|
|
|
////@Ignore("AB 2019/06/24 - Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912")
|
2019-06-06 15:21:15 +03:00
|
|
|
public void testBidirectionalWrapper() {
|
|
|
|
|
|
|
|
int nIn = 3;
|
|
|
|
int nOut = 5;
|
|
|
|
int tsLength = 4;
|
|
|
|
|
|
|
|
Bidirectional.Mode[] modes = new Bidirectional.Mode[]{Bidirectional.Mode.CONCAT, Bidirectional.Mode.ADD,
|
|
|
|
Bidirectional.Mode.AVERAGE, Bidirectional.Mode.MUL};
|
|
|
|
|
|
|
|
Random r = new Random(12345);
|
|
|
|
for (int mb : new int[]{1, 3}) {
|
|
|
|
for (boolean inputMask : new boolean[]{false, true}) {
|
|
|
|
for (boolean simple : new boolean[]{false, true}) {
|
|
|
|
for(boolean hasLayerNorm: new boolean[]{true, false}) {
|
2019-06-13 20:40:40 +10:00
|
|
|
if(!simple && hasLayerNorm)
|
|
|
|
continue;
|
2019-06-06 15:21:15 +03:00
|
|
|
|
2022-10-21 15:19:32 +02:00
|
|
|
INDArray in = Nd4j.rand(mb, nIn, tsLength);
|
2019-06-06 15:21:15 +03:00
|
|
|
INDArray labels = Nd4j.create(mb, nOut, tsLength);
|
|
|
|
for (int i = 0; i < mb; i++) {
|
|
|
|
for (int j = 0; j < tsLength; j++) {
|
|
|
|
labels.putScalar(i, r.nextInt(nOut), j, 1.0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
String maskType = (inputMask ? "inputMask" : "none");
|
|
|
|
|
|
|
|
INDArray inMask = null;
|
|
|
|
if (inputMask) {
|
|
|
|
inMask = Nd4j.ones(mb, tsLength);
|
|
|
|
for (int i = 0; i < mb; i++) {
|
|
|
|
int firstMaskedStep = tsLength - 1 - i;
|
|
|
|
if (firstMaskedStep == 0) {
|
|
|
|
firstMaskedStep = tsLength;
|
|
|
|
}
|
|
|
|
for (int j = firstMaskedStep; j < tsLength; j++) {
|
|
|
|
inMask.putScalar(i, j, 1.0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (Bidirectional.Mode m : modes) {
|
2019-06-13 20:40:40 +10:00
|
|
|
//Skip 3 of 4 test cases: from 64 to 16, which still should be good coverage
|
|
|
|
//Note RNG seed - deterministic run-to-run
|
|
|
|
if(r.nextInt(4) != 0)
|
|
|
|
continue;
|
|
|
|
|
2019-06-06 15:21:15 +03:00
|
|
|
String name = "mb=" + mb + ", maskType=" + maskType + ", mode=" + m + ", hasLayerNorm=" + hasLayerNorm + ", rnnType="
|
|
|
|
+ (simple ? "SimpleRnn" : "LSTM");
|
|
|
|
|
|
|
|
System.out.println("Starting test: " + name);
|
|
|
|
|
2023-03-23 17:39:00 +01:00
|
|
|
NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
|
2019-06-06 15:21:15 +03:00
|
|
|
.dataType(DataType.DOUBLE)
|
|
|
|
.updater(new NoOp())
|
|
|
|
.weightInit(WeightInit.XAVIER)
|
|
|
|
.list()
|
2023-04-24 18:09:11 +02:00
|
|
|
.layer(LSTM.builder().nIn(nIn).nOut(3).build())
|
|
|
|
.layer(Bidirectional.builder(m,
|
2019-06-06 15:21:15 +03:00
|
|
|
(simple ?
|
2023-04-24 18:09:11 +02:00
|
|
|
SimpleRnn.builder().nIn(3).nOut(3).hasLayerNorm(hasLayerNorm).build() :
|
|
|
|
LSTM.builder().nIn(3).nOut(3).build())).build())
|
|
|
|
.layer(RnnOutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX).build())
|
2019-06-06 15:21:15 +03:00
|
|
|
.build();
|
|
|
|
|
|
|
|
|
|
|
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
|
|
|
net.init();
|
|
|
|
|
|
|
|
|
2020-01-04 13:45:07 +11:00
|
|
|
boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(in)
|
|
|
|
.labels(labels).inputMask(inMask));
|
2019-06-06 15:21:15 +03:00
|
|
|
assertTrue(gradOK);
|
|
|
|
|
|
|
|
|
|
|
|
TestUtils.testModelSerialization(net);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
2022-09-20 15:40:53 +02:00
|
|
|
//@Ignore("AB 2019/06/24 - Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912")
|
2019-06-06 15:21:15 +03:00
|
|
|
public void testSimpleRnn() {
|
|
|
|
int nOut = 5;
|
|
|
|
|
|
|
|
double[] l1s = new double[]{0.0, 0.4};
|
|
|
|
double[] l2s = new double[]{0.0, 0.6};
|
|
|
|
|
|
|
|
Random r = new Random(12345);
|
|
|
|
for (int mb : new int[]{1, 3}) {
|
|
|
|
for (int tsLength : new int[]{1, 4}) {
|
|
|
|
for (int nIn : new int[]{3, 1}) {
|
|
|
|
for (int layerSize : new int[]{4, 1}) {
|
|
|
|
for (boolean inputMask : new boolean[]{false, true}) {
|
|
|
|
for (boolean hasLayerNorm : new boolean[]{true, false}) {
|
|
|
|
for (int l = 0; l < l1s.length; l++) {
|
2019-06-13 20:40:40 +10:00
|
|
|
//Only run 1 of 5 (on average - note RNG seed for deterministic testing) - 25 of 128 test cases (to minimize test time)
|
|
|
|
if(r.nextInt(5) != 0)
|
|
|
|
continue;
|
2019-06-06 15:21:15 +03:00
|
|
|
|
2022-10-21 15:19:32 +02:00
|
|
|
INDArray in = Nd4j.rand(mb, nIn, tsLength);
|
2019-06-06 15:21:15 +03:00
|
|
|
INDArray labels = Nd4j.create(mb, nOut, tsLength);
|
|
|
|
for (int i = 0; i < mb; i++) {
|
|
|
|
for (int j = 0; j < tsLength; j++) {
|
|
|
|
labels.putScalar(i, r.nextInt(nOut), j, 1.0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
String maskType = (inputMask ? "inputMask" : "none");
|
|
|
|
|
|
|
|
INDArray inMask = null;
|
|
|
|
if (inputMask) {
|
|
|
|
inMask = Nd4j.ones(mb, tsLength);
|
|
|
|
for (int i = 0; i < mb; i++) {
|
|
|
|
int firstMaskedStep = tsLength - 1 - i;
|
|
|
|
if (firstMaskedStep == 0) {
|
|
|
|
firstMaskedStep = tsLength;
|
|
|
|
}
|
|
|
|
for (int j = firstMaskedStep; j < tsLength; j++) {
|
|
|
|
inMask.putScalar(i, j, 0.0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
String name = "testSimpleRnn() - mb=" + mb + ", tsLength = " + tsLength + ", maskType=" +
|
|
|
|
maskType + ", l1=" + l1s[l] + ", l2=" + l2s[l] + ", hasLayerNorm=" + hasLayerNorm;
|
|
|
|
|
|
|
|
System.out.println("Starting test: " + name);
|
|
|
|
|
2023-03-23 17:39:00 +01:00
|
|
|
NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
|
2019-06-06 15:21:15 +03:00
|
|
|
.dataType(DataType.DOUBLE)
|
|
|
|
.updater(new NoOp())
|
|
|
|
.weightInit(WeightInit.XAVIER)
|
|
|
|
.activation(Activation.TANH)
|
|
|
|
.l1(l1s[l])
|
|
|
|
.l2(l2s[l])
|
|
|
|
.list()
|
2023-04-24 18:09:11 +02:00
|
|
|
.layer(SimpleRnn.builder().nIn(nIn).nOut(layerSize).hasLayerNorm(hasLayerNorm).build())
|
|
|
|
.layer(SimpleRnn.builder().nIn(layerSize).nOut(layerSize).hasLayerNorm(hasLayerNorm).build())
|
|
|
|
.layer(RnnOutputLayer.builder().nIn(layerSize).nOut(nOut)
|
2019-06-06 15:21:15 +03:00
|
|
|
.activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT)
|
|
|
|
.build())
|
|
|
|
.build();
|
|
|
|
|
|
|
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
|
|
|
net.init();
|
|
|
|
|
|
|
|
|
2020-01-04 13:45:07 +11:00
|
|
|
boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(in)
|
|
|
|
.labels(labels).inputMask(inMask));
|
2019-06-06 15:21:15 +03:00
|
|
|
assertTrue(gradOK);
|
|
|
|
TestUtils.testModelSerialization(net);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
2022-09-20 15:40:53 +02:00
|
|
|
//@Ignore("AB 2019/06/24 - Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912")
|
2019-06-06 15:21:15 +03:00
|
|
|
public void testLastTimeStepLayer(){
|
|
|
|
int nIn = 3;
|
|
|
|
int nOut = 5;
|
|
|
|
int tsLength = 4;
|
|
|
|
int layerSize = 8;
|
|
|
|
|
|
|
|
Random r = new Random(12345);
|
|
|
|
for (int mb : new int[]{1, 3}) {
|
|
|
|
for (boolean inputMask : new boolean[]{false, true}) {
|
|
|
|
for (boolean simple : new boolean[]{false, true}) {
|
|
|
|
for (boolean hasLayerNorm : new boolean[]{true, false}) {
|
2019-06-13 20:40:40 +10:00
|
|
|
if(!simple && hasLayerNorm)
|
|
|
|
continue;
|
2019-06-06 15:21:15 +03:00
|
|
|
|
|
|
|
|
2022-10-21 15:19:32 +02:00
|
|
|
INDArray in = Nd4j.rand(mb, nIn, tsLength);
|
2019-06-06 15:21:15 +03:00
|
|
|
INDArray labels = Nd4j.create(mb, nOut);
|
|
|
|
for (int i = 0; i < mb; i++) {
|
|
|
|
labels.putScalar(i, r.nextInt(nOut), 1.0);
|
|
|
|
}
|
|
|
|
String maskType = (inputMask ? "inputMask" : "none");
|
|
|
|
|
|
|
|
INDArray inMask = null;
|
|
|
|
if (inputMask) {
|
|
|
|
inMask = Nd4j.ones(mb, tsLength);
|
|
|
|
for (int i = 0; i < mb; i++) {
|
|
|
|
int firstMaskedStep = tsLength - 1 - i;
|
|
|
|
if (firstMaskedStep == 0) {
|
|
|
|
firstMaskedStep = tsLength;
|
|
|
|
}
|
|
|
|
for (int j = firstMaskedStep; j < tsLength; j++) {
|
|
|
|
inMask.putScalar(i, j, 0.0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
String name = "testLastTimeStepLayer() - mb=" + mb + ", tsLength = " + tsLength + ", maskType=" + maskType
|
|
|
|
+ ", hasLayerNorm=" + hasLayerNorm + ", rnnType=" + (simple ? "SimpleRnn" : "LSTM");
|
|
|
|
if (PRINT_RESULTS) {
|
|
|
|
System.out.println("Starting test: " + name);
|
|
|
|
}
|
|
|
|
|
2023-03-23 17:39:00 +01:00
|
|
|
NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
|
2019-06-06 15:21:15 +03:00
|
|
|
.dataType(DataType.DOUBLE)
|
|
|
|
.activation(Activation.TANH)
|
|
|
|
.updater(new NoOp())
|
|
|
|
.weightInit(WeightInit.XAVIER)
|
2023-04-24 18:09:11 +02:00
|
|
|
|
|
|
|
.layer(simple ? SimpleRnn.builder().nOut(layerSize).hasLayerNorm(hasLayerNorm).build() :
|
|
|
|
LSTM.builder().nOut(layerSize).build())
|
|
|
|
.layer(LastTimeStep.builder().underlying(simple ? SimpleRnn.builder().nOut(layerSize).hasLayerNorm(hasLayerNorm).build() :
|
|
|
|
LSTM.builder().nOut(layerSize).build()).build())
|
|
|
|
.layer(OutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX)
|
2019-06-06 15:21:15 +03:00
|
|
|
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
|
2023-03-23 17:39:00 +01:00
|
|
|
.inputType(InputType.recurrent(nIn))
|
2019-06-06 15:21:15 +03:00
|
|
|
.build();
|
|
|
|
|
|
|
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
|
|
|
net.init();
|
|
|
|
|
2020-01-04 13:45:07 +11:00
|
|
|
boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(in)
|
|
|
|
.labels(labels).inputMask(inMask).subset(true).maxPerParam(16));
|
2022-09-20 15:40:53 +02:00
|
|
|
assertTrue( gradOK, name);
|
2019-06-06 15:21:15 +03:00
|
|
|
TestUtils.testModelSerialization(net);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-11-26 19:24:38 +11:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testTimeDistributedDense() {
|
|
|
|
int nIn = 3;
|
|
|
|
int nOut = 5;
|
|
|
|
int tsLength = 4;
|
|
|
|
int layerSize = 8;
|
|
|
|
|
|
|
|
Random r = new Random(12345);
|
|
|
|
for (int mb : new int[]{1, 3}) {
|
|
|
|
for (boolean inputMask : new boolean[]{false, true}) {
|
|
|
|
|
|
|
|
|
2022-10-21 15:19:32 +02:00
|
|
|
INDArray in = Nd4j.rand(mb, nIn, tsLength);
|
2019-11-26 19:24:38 +11:00
|
|
|
INDArray labels = TestUtils.randomOneHotTimeSeries(mb, nOut, tsLength);
|
|
|
|
String maskType = (inputMask ? "inputMask" : "none");
|
|
|
|
|
|
|
|
INDArray inMask = null;
|
|
|
|
if (inputMask) {
|
|
|
|
inMask = Nd4j.ones(mb, tsLength);
|
|
|
|
for (int i = 0; i < mb; i++) {
|
|
|
|
int firstMaskedStep = tsLength - 1 - i;
|
|
|
|
if (firstMaskedStep == 0) {
|
|
|
|
firstMaskedStep = tsLength;
|
|
|
|
}
|
|
|
|
for (int j = firstMaskedStep; j < tsLength; j++) {
|
|
|
|
inMask.putScalar(i, j, 0.0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
String name = "testLastTimeStepLayer() - mb=" + mb + ", tsLength = " + tsLength + ", maskType=" + maskType;
|
|
|
|
if (PRINT_RESULTS) {
|
|
|
|
System.out.println("Starting test: " + name);
|
|
|
|
}
|
|
|
|
|
2023-03-23 17:39:00 +01:00
|
|
|
NeuralNetConfiguration conf = NeuralNetConfiguration.builder()
|
2019-11-26 19:24:38 +11:00
|
|
|
.dataType(DataType.DOUBLE)
|
|
|
|
.activation(Activation.TANH)
|
|
|
|
.updater(new NoOp())
|
|
|
|
.weightInit(WeightInit.XAVIER)
|
|
|
|
.list()
|
2023-04-24 18:09:11 +02:00
|
|
|
.layer(LSTM.builder().nOut(layerSize).build())
|
|
|
|
.layer(TimeDistributed.builder().underlying(DenseLayer.builder().nOut(layerSize).activation(Activation.SOFTMAX).build()).build())
|
|
|
|
.layer(RnnOutputLayer.builder().nOut(nOut).activation(Activation.SOFTMAX)
|
2019-11-26 19:24:38 +11:00
|
|
|
.lossFunction(LossFunctions.LossFunction.MCXENT).build())
|
2023-03-23 17:39:00 +01:00
|
|
|
.inputType(InputType.recurrent(nIn))
|
2019-11-26 19:24:38 +11:00
|
|
|
.build();
|
|
|
|
|
|
|
|
MultiLayerNetwork net = new MultiLayerNetwork(conf);
|
|
|
|
net.init();
|
|
|
|
|
2020-01-04 13:45:07 +11:00
|
|
|
boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(in)
|
|
|
|
.labels(labels).inputMask(inMask).subset(true).maxPerParam(16));
|
2021-03-16 11:57:24 +09:00
|
|
|
assertTrue(gradOK, name);
|
2019-11-26 19:24:38 +11:00
|
|
|
TestUtils.testModelSerialization(net);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-06-06 15:21:15 +03:00
|
|
|
}
|