GRU and GRUBp (#410)
* GRU and GRUBp ops added and tested Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com> * minor polishing Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com> * few requested changes Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com> * regenerated namespace + small fix in RnnOpValidation Signed-off-by: Andrii Tuzhykov <andrewtuzhykov@gmail.com> * Fix bad character in RnnOpValidation Signed-off-by: Alex Black <blacka101@gmail.com> Co-authored-by: Alex Black <blacka101@gmail.com>master
parent
5e779574cb
commit
bd376ca993
|
@ -35,6 +35,48 @@ public class SDRNN extends SDOps {
|
|||
super(sameDiff);
|
||||
}
|
||||
|
||||
/**
|
||||
* The GRU operation. Gated Recurrent Unit - Cho et al. 2014.<br>
|
||||
*
|
||||
* @param x input [time, bS, nIn] (NUMERIC type)
|
||||
* @param hLast initial cell output (at time step = 0) [bS, nOut] (NUMERIC type)
|
||||
* @param Wx input-to-hidden weights, [nIn, 3*nOut] (NUMERIC type)
|
||||
* @param Wh hidden-to-hidden weights, [nOut, 3*nOut] (NUMERIC type)
|
||||
* @param biases biases, [3*nOut] (NUMERIC type)
|
||||
* @return h cell outputs [time, bS, nOut], that is per each time step (NUMERIC type)
|
||||
*/
|
||||
public SDVariable gru(SDVariable x, SDVariable hLast, SDVariable Wx, SDVariable Wh,
|
||||
SDVariable biases) {
|
||||
SDValidation.validateNumerical("gru", "x", x);
|
||||
SDValidation.validateNumerical("gru", "hLast", hLast);
|
||||
SDValidation.validateNumerical("gru", "Wx", Wx);
|
||||
SDValidation.validateNumerical("gru", "Wh", Wh);
|
||||
SDValidation.validateNumerical("gru", "biases", biases);
|
||||
return new org.nd4j.linalg.api.ops.impl.layers.recurrent.GRU(sd,x, hLast, Wx, Wh, biases).outputVariable();
|
||||
}
|
||||
|
||||
/**
|
||||
* The GRU operation. Gated Recurrent Unit - Cho et al. 2014.<br>
|
||||
*
|
||||
* @param name name May be null. Name for the output variable
|
||||
* @param x input [time, bS, nIn] (NUMERIC type)
|
||||
* @param hLast initial cell output (at time step = 0) [bS, nOut] (NUMERIC type)
|
||||
* @param Wx input-to-hidden weights, [nIn, 3*nOut] (NUMERIC type)
|
||||
* @param Wh hidden-to-hidden weights, [nOut, 3*nOut] (NUMERIC type)
|
||||
* @param biases biases, [3*nOut] (NUMERIC type)
|
||||
* @return h cell outputs [time, bS, nOut], that is per each time step (NUMERIC type)
|
||||
*/
|
||||
public SDVariable gru(String name, SDVariable x, SDVariable hLast, SDVariable Wx, SDVariable Wh,
|
||||
SDVariable biases) {
|
||||
SDValidation.validateNumerical("gru", "x", x);
|
||||
SDValidation.validateNumerical("gru", "hLast", hLast);
|
||||
SDValidation.validateNumerical("gru", "Wx", Wx);
|
||||
SDValidation.validateNumerical("gru", "Wh", Wh);
|
||||
SDValidation.validateNumerical("gru", "biases", biases);
|
||||
SDVariable out = new org.nd4j.linalg.api.ops.impl.layers.recurrent.GRU(sd,x, hLast, Wx, Wh, biases).outputVariable();
|
||||
return sd.updateVariableNameAndReference(out, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* The GRU cell. Does a single time step operation<br>
|
||||
*
|
||||
|
@ -42,9 +84,9 @@ public class SDRNN extends SDOps {
|
|||
* @param hLast Output of the previous cell/time step, with shape [batchSize, numUnits] (NUMERIC type)
|
||||
* @param GRUWeights Configuration Object
|
||||
*/
|
||||
public SDVariable[] gru(SDVariable x, SDVariable hLast, GRUWeights GRUWeights) {
|
||||
SDValidation.validateNumerical("gru", "x", x);
|
||||
SDValidation.validateNumerical("gru", "hLast", hLast);
|
||||
public SDVariable[] gruCell(SDVariable x, SDVariable hLast, GRUWeights GRUWeights) {
|
||||
SDValidation.validateNumerical("gruCell", "x", x);
|
||||
SDValidation.validateNumerical("gruCell", "hLast", hLast);
|
||||
return new org.nd4j.linalg.api.ops.impl.layers.recurrent.GRUCell(sd,x, hLast, GRUWeights).outputVariables();
|
||||
}
|
||||
|
||||
|
@ -56,9 +98,10 @@ public class SDRNN extends SDOps {
|
|||
* @param hLast Output of the previous cell/time step, with shape [batchSize, numUnits] (NUMERIC type)
|
||||
* @param GRUWeights Configuration Object
|
||||
*/
|
||||
public SDVariable[] gru(String[] names, SDVariable x, SDVariable hLast, GRUWeights GRUWeights) {
|
||||
SDValidation.validateNumerical("gru", "x", x);
|
||||
SDValidation.validateNumerical("gru", "hLast", hLast);
|
||||
public SDVariable[] gruCell(String[] names, SDVariable x, SDVariable hLast,
|
||||
GRUWeights GRUWeights) {
|
||||
SDValidation.validateNumerical("gruCell", "x", x);
|
||||
SDValidation.validateNumerical("gruCell", "hLast", hLast);
|
||||
SDVariable[] out = new org.nd4j.linalg.api.ops.impl.layers.recurrent.GRUCell(sd,x, hLast, GRUWeights).outputVariables();
|
||||
return sd.updateVariableNamesAndReferences(out, names);
|
||||
}
|
||||
|
|
|
@ -144,6 +144,8 @@ public class ImportClassMapping {
|
|||
org.nd4j.linalg.api.ops.impl.layers.convolution.SpaceToDepth.class,
|
||||
org.nd4j.linalg.api.ops.impl.layers.convolution.Upsampling2d.class,
|
||||
org.nd4j.linalg.api.ops.impl.layers.convolution.Upsampling2dDerivative.class,
|
||||
org.nd4j.linalg.api.ops.impl.layers.recurrent.GRU.class,
|
||||
org.nd4j.linalg.api.ops.impl.layers.recurrent.GRUBp.class,
|
||||
org.nd4j.linalg.api.ops.impl.layers.recurrent.GRUCell.class,
|
||||
org.nd4j.linalg.api.ops.impl.layers.recurrent.LSTMBlockCell.class,
|
||||
org.nd4j.linalg.api.ops.impl.layers.recurrent.LSTMCell.class,
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
/* ******************************************************************************
|
||||
* Copyright (c) 2020 Konduit K.K.
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Apache License, Version 2.0 which is available at
|
||||
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
******************************************************************************/
|
||||
package org.nd4j.linalg.api.ops.impl.layers.recurrent;
|
||||
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.NonNull;
|
||||
import org.nd4j.autodiff.samediff.SDVariable;
|
||||
import org.nd4j.autodiff.samediff.SameDiff;
|
||||
import org.nd4j.base.Preconditions;
|
||||
import org.nd4j.linalg.api.buffer.DataType;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
@NoArgsConstructor
|
||||
public class GRU extends DynamicCustomOp {
|
||||
|
||||
|
||||
public GRU(@NonNull SameDiff sameDiff, @NonNull SDVariable x, @NonNull SDVariable hI, @NonNull SDVariable Wx, @NonNull SDVariable Wh, @NonNull SDVariable biases) {
|
||||
super(null, sameDiff, new SDVariable[]{x, hI, Wx, Wh, biases});
|
||||
|
||||
}
|
||||
|
||||
public GRU(@NonNull INDArray x, @NonNull INDArray hI, @NonNull INDArray Wx, @NonNull INDArray Wh, @NonNull INDArray biases) {
|
||||
super(new INDArray[]{x, hI, Wx, Wh, biases}, null);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes) {
|
||||
Preconditions.checkState(inputDataTypes != null && inputDataTypes.size() == 5, "Expected 5 inputs to GRU: initial cell output, input-to-hidden weights, hidden-to-hidden weights and biases got %s", inputDataTypes);
|
||||
DataType dt = inputDataTypes.get(1);
|
||||
for (int i = 0; i < inputDataTypes.size(); i++) {
|
||||
Preconditions.checkState(inputDataTypes.get(i).isFPType(), "All input types must be a floating point type, got %s", dt);
|
||||
}
|
||||
Preconditions.checkState(dt.isFPType(), "Input type 1 must be a floating point type, got %s", dt);
|
||||
return Collections.singletonList(dt);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<SDVariable> doDiff(List<SDVariable> grads) {
|
||||
return Arrays.asList(new GRUBp(sameDiff, arg(0), arg(1), arg(2), arg(3),
|
||||
arg(4), grads.get(0)).outputVariables());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String opName() {
|
||||
return "gru";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/* ******************************************************************************
|
||||
* Copyright (c) 2020 Konduit K.K.
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Apache License, Version 2.0 which is available at
|
||||
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
******************************************************************************/
|
||||
package org.nd4j.linalg.api.ops.impl.layers.recurrent;
|
||||
|
||||
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.NonNull;
|
||||
import org.nd4j.autodiff.samediff.SDVariable;
|
||||
import org.nd4j.autodiff.samediff.SameDiff;
|
||||
import org.nd4j.base.Preconditions;
|
||||
import org.nd4j.linalg.api.buffer.DataType;
|
||||
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
@NoArgsConstructor
|
||||
public class GRUBp extends DynamicCustomOp {
|
||||
|
||||
|
||||
public GRUBp(@NonNull SameDiff sameDiff, @NonNull SDVariable x, @NonNull SDVariable hI, @NonNull SDVariable Wx, @NonNull SDVariable Wh, @NonNull SDVariable biases, @NonNull SDVariable dLdh) {
|
||||
super(null, sameDiff, new SDVariable[]{x, hI, Wx, Wh, biases, dLdh});
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes) {
|
||||
DataType dt = inputDataTypes.get(1);
|
||||
List<DataType> list = new ArrayList<DataType>();
|
||||
list.add(dt);
|
||||
list.add(dt);
|
||||
list.add(dt);
|
||||
list.add(dt);
|
||||
list.add(dt);
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String opName() {
|
||||
return "gru_bp";
|
||||
}
|
||||
}
|
|
@ -34,6 +34,25 @@ public class NDRNN {
|
|||
public NDRNN() {
|
||||
}
|
||||
|
||||
/**
|
||||
* The GRU operation. Gated Recurrent Unit - Cho et al. 2014.<br>
|
||||
*
|
||||
* @param x input [time, bS, nIn] (NUMERIC type)
|
||||
* @param hLast initial cell output (at time step = 0) [bS, nOut] (NUMERIC type)
|
||||
* @param Wx input-to-hidden weights, [nIn, 3*nOut] (NUMERIC type)
|
||||
* @param Wh hidden-to-hidden weights, [nOut, 3*nOut] (NUMERIC type)
|
||||
* @param biases biases, [3*nOut] (NUMERIC type)
|
||||
* @return h cell outputs [time, bS, nOut], that is per each time step (NUMERIC type)
|
||||
*/
|
||||
public INDArray gru(INDArray x, INDArray hLast, INDArray Wx, INDArray Wh, INDArray biases) {
|
||||
NDValidation.validateNumerical("gru", "x", x);
|
||||
NDValidation.validateNumerical("gru", "hLast", hLast);
|
||||
NDValidation.validateNumerical("gru", "Wx", Wx);
|
||||
NDValidation.validateNumerical("gru", "Wh", Wh);
|
||||
NDValidation.validateNumerical("gru", "biases", biases);
|
||||
return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.layers.recurrent.GRU(x, hLast, Wx, Wh, biases))[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* The GRU cell. Does a single time step operation<br>
|
||||
*
|
||||
|
@ -41,9 +60,9 @@ public class NDRNN {
|
|||
* @param hLast Output of the previous cell/time step, with shape [batchSize, numUnits] (NUMERIC type)
|
||||
* @param GRUWeights Configuration Object
|
||||
*/
|
||||
public INDArray[] gru(INDArray x, INDArray hLast, GRUWeights GRUWeights) {
|
||||
NDValidation.validateNumerical("gru", "x", x);
|
||||
NDValidation.validateNumerical("gru", "hLast", hLast);
|
||||
public INDArray[] gruCell(INDArray x, INDArray hLast, GRUWeights GRUWeights) {
|
||||
NDValidation.validateNumerical("gruCell", "x", x);
|
||||
NDValidation.validateNumerical("gruCell", "hLast", hLast);
|
||||
return Nd4j.exec(new org.nd4j.linalg.api.ops.impl.layers.recurrent.GRUCell(x, hLast, GRUWeights));
|
||||
}
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.nd4j.linalg.api.ops.impl.layers.convolution.DepthwiseConv2D;
|
|||
import org.nd4j.linalg.api.ops.impl.layers.convolution.Pooling2D;
|
||||
import org.nd4j.linalg.api.ops.impl.layers.convolution.Pooling2DDerivative;
|
||||
import org.nd4j.linalg.api.ops.impl.layers.convolution.config.*;
|
||||
import org.nd4j.linalg.api.ops.impl.layers.recurrent.GRU;
|
||||
import org.nd4j.linalg.api.ops.impl.layers.recurrent.config.LSTMActivations;
|
||||
import org.nd4j.linalg.api.ops.impl.layers.recurrent.config.LSTMDataFormat;
|
||||
import org.nd4j.linalg.api.ops.impl.layers.recurrent.config.LSTMDirectionMode;
|
||||
|
@ -1701,6 +1702,35 @@ public class LayerOpValidation extends BaseOpValidation {
|
|||
assertNull(err);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void GRUTestCase() {
|
||||
int bS = 5;
|
||||
int nIn = 4;
|
||||
int nOut = 6;
|
||||
int time = 2;
|
||||
|
||||
SameDiff sd = SameDiff.create();
|
||||
|
||||
SDVariable in = sd.var("in", Nd4j.randn(DataType.DOUBLE, time, bS, nIn).muli(10));
|
||||
SDVariable hLast = sd.var("cLast", Nd4j.zeros(DataType.DOUBLE, bS, nOut));
|
||||
SDVariable Wx = sd.var("Wx", Nd4j.randn(DataType.DOUBLE, nIn, 3*nOut));
|
||||
SDVariable Wh = sd.var("Wh", Nd4j.randn(DataType.DOUBLE, nOut, 3*nOut));
|
||||
SDVariable biases = sd.var("bias", Nd4j.randn(DataType.DOUBLE, 3*nOut));
|
||||
|
||||
SDVariable out = new GRU(sd, in, hLast, Wx, Wh,biases).outputVariable();
|
||||
|
||||
long[] outShapes = new long[]{time,bS, nOut};
|
||||
assertArrayEquals(new long[]{time,bS, nOut}, out.eval().shape());
|
||||
|
||||
sd.setLossVariables(out.std(true));
|
||||
String err = OpValidation.validate(new TestCase(sd)
|
||||
.gradientCheck(true)
|
||||
);
|
||||
|
||||
assertNull(err);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -227,7 +227,7 @@ public class RnnOpValidation extends BaseOpValidation {
|
|||
.cBias(bc)
|
||||
.build();
|
||||
|
||||
SDVariable[] v = sd.rnn().gru(x, hLast, weights);
|
||||
SDVariable[] v = sd.rnn().gruCell(x, hLast, weights);
|
||||
List<String> toExec = new ArrayList<>();
|
||||
for(SDVariable sdv : v){
|
||||
toExec.add(sdv.name());
|
||||
|
|
Loading…
Reference in New Issue