Op validation improvements + encoding fix (#49)

* Op validation updates

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Small logging fix

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* #7986 Fix minor character encoding issues

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Small ignore fix

Signed-off-by: AlexDBlack <blacka101@gmail.com>
master
Alex Black 2019-07-08 17:58:48 +10:00 committed by AlexDBlack
parent 88ea9a49eb
commit cc65c01118
10 changed files with 236 additions and 44 deletions

View File

@ -145,7 +145,7 @@ DECLARE_SHAPE_FN(sru) {
CUSTOM_OP_IMPL(sru_bp, 8, 4, true, 0, 0) { CUSTOM_OP_IMPL(sru_bp, 8, 4, true, 0, 0) {
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K] auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K] auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0 auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
auto c = INPUT_VARIABLE(4); // C, [bS x K x N] auto c = INPUT_VARIABLE(4); // C, [bS x K x N]
auto inGradCt = INPUT_VARIABLE(5); // [bS x K] auto inGradCt = INPUT_VARIABLE(5); // [bS x K]
@ -331,7 +331,7 @@ CUSTOM_OP_IMPL(sru_bi, 5, 2, true, 0, 0) {
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize] auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize]
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 4*inSize] auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 4*inSize]
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0 auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
NDArray* mask = block.width() > 4 ? INPUT_VARIABLE(4) : nullptr; // optional, 2d tensor of dropout mask [bS x 2*inSize] NDArray* mask = block.width() > 4 ? INPUT_VARIABLE(4) : nullptr; // optional, 2d tensor of dropout mask [bS x 2*inSize]
@ -431,7 +431,7 @@ CUSTOM_OP_IMPL(sru_bi_bp, 8, 4, true, 0, 0) {
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize] auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize]
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 4*inSize] auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 4*inSize]
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0 auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
auto ct = INPUT_VARIABLE(4); // C, [time x bS x 2*inSize] auto ct = INPUT_VARIABLE(4); // C, [time x bS x 2*inSize]
auto inGradC0 = INPUT_VARIABLE(5); // [bS x 2*inSize] auto inGradC0 = INPUT_VARIABLE(5); // [bS x 2*inSize]
@ -553,7 +553,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K] * 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K] * 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K] * 3: 2d tensor of previous cell state [bS x K]
* 4: optional, 2d tensor of dropout mask [bS x K] * 4: optional, 2d tensor of dropout mask [bS x K]
* *
@ -572,7 +572,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K] * 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K] * 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K] * 3: 2d tensor of previous cell state [bS x K]
* 4: optional, 2d tensor of dropout mask [bS x K] * 4: optional, 2d tensor of dropout mask [bS x K]
* *
@ -592,7 +592,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K] * 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K] * 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K] * 3: 2d tensor of previous cell state [bS x K]
* 4: 3d tensor of cell state [bS x K x N] * 4: 3d tensor of cell state [bS x K x N]
* 5: 2d tensor of cell state gradients [bS x K] * 5: 2d tensor of cell state gradients [bS x K]
@ -622,7 +622,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
// auto input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features // auto input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
// auto weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K] // auto weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
// auto bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K] // auto bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
// auto init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0 // auto init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
// NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x K] // NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x K]
@ -710,7 +710,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
// CUSTOM_OP_IMPL(sru_old, 5, 2, false, 0, 0) { // CUSTOM_OP_IMPL(sru_old, 5, 2, false, 0, 0) {
// auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features // auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
// auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x inSize] // auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x inSize]
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*inSize] // auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*inSize]
// auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0 // auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
// NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x inSize] // NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x inSize]
@ -820,7 +820,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
// auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features // auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
// auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3*inSize x inSize] // auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3*inSize x inSize]
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*inSize] // auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*inSize]
// auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0 // auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
// auto c = INPUT_VARIABLE(4); // C, [bS x inSize x time] // auto c = INPUT_VARIABLE(4); // C, [bS x inSize x time]
// auto inGradCt = INPUT_VARIABLE(5); // [bS x inSize] // auto inGradCt = INPUT_VARIABLE(5); // [bS x inSize]

View File

@ -34,7 +34,7 @@ CUSTOM_OP_IMPL(sruCell, 4, 2, false, 0, 0) {
auto xt = INPUT_VARIABLE(0); // input [bS x inSize], bS - batch size, inSize - number of features auto xt = INPUT_VARIABLE(0); // input [bS x inSize], bS - batch size, inSize - number of features
auto ct_1 = INPUT_VARIABLE(1); // previous cell state ct [bS x inSize], that is at previous time step t-1 auto ct_1 = INPUT_VARIABLE(1); // previous cell state ct [bS x inSize], that is at previous time step t-1
auto w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize] auto w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize]
auto b = INPUT_VARIABLE(3); // biases [1 × 2*inSize] auto b = INPUT_VARIABLE(3); // biases [1 x 2*inSize]
auto ht = OUTPUT_VARIABLE(0); // current cell output [bS x inSize], that is at current time step t auto ht = OUTPUT_VARIABLE(0); // current cell output [bS x inSize], that is at current time step t
auto ct = OUTPUT_VARIABLE(1); // current cell state [bS x inSize], that is at current time step t auto ct = OUTPUT_VARIABLE(1); // current cell state [bS x inSize], that is at current time step t

View File

@ -33,7 +33,7 @@ namespace ops {
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K] * 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K] * 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K] * 3: 2d tensor of previous cell state [bS x K]
* 4: optional, 2d tensor of dropout mask [bS x K] * 4: optional, 2d tensor of dropout mask [bS x K]
* *
@ -52,7 +52,7 @@ namespace ops {
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [2K x 6K] * 1: 2d tensor of weights [2K x 6K]
* 2: row of biases with twice length [1 × 4K] * 2: row of biases with twice length [1 x 4K]
* 3: 2d tensor of previous cell state [bS x 2K] * 3: 2d tensor of previous cell state [bS x 2K]
* 4: optional, 2d tensor of dropout mask [bS x 2K] * 4: optional, 2d tensor of dropout mask [bS x 2K]
* *
@ -72,7 +72,7 @@ namespace ops {
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K] * 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K] * 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K] * 3: 2d tensor of previous cell state [bS x K]
* 4: 3d tensor of cell state [bS x K x N] * 4: 3d tensor of cell state [bS x K x N]
* 5: 2d tensor of cell state gradients [bS x K] * 5: 2d tensor of cell state gradients [bS x K]
@ -96,7 +96,7 @@ namespace ops {
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [2K x 6K] * 1: 2d tensor of weights [2K x 6K]
* 2: row of biases with twice length [1 × 4K] * 2: row of biases with twice length [1 x 4K]
* 3: 2d tensor of previous cell state [bS x 2K] * 3: 2d tensor of previous cell state [bS x 2K]
* 4: 3d tensor of cell state [N x bS x 2K] * 4: 3d tensor of cell state [N x bS x 2K]
* 5: 2d tensor of cell state gradients [bS x 2K] * 5: 2d tensor of cell state gradients [bS x 2K]
@ -239,7 +239,7 @@ namespace ops {
* 0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features * 0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features
* 1: previous cell state [batchSize x inSize], that is at previous time step t-1 * 1: previous cell state [batchSize x inSize], that is at previous time step t-1
* 2: weights [inSize x 3*inSize] * 2: weights [inSize x 3*inSize]
* 3: biases [1 × 2*inSize] * 3: biases [1 x 2*inSize]
* *
* Output arrays: * Output arrays:
* 0: current cell output [batchSize x inSize], that is at current time step t * 0: current cell output [batchSize x inSize], that is at current time step t

View File

@ -110,7 +110,7 @@ static void sruBI_(NDArray* x, const NDArray* w, const NDArray* b, const NDArray
// x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features // x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
// w 2d tensor of weights [2*inSize x 6*inSize] // w 2d tensor of weights [2*inSize x 6*inSize]
// b row of biases with twice length [1 × 4*inSize] // b row of biases with twice length [1 x 4*inSize]
// c0 2d tensor of initial state [bS x 2*inSize] at time t=0 // c0 2d tensor of initial state [bS x 2*inSize] at time t=0
// mask optional, 2d tensor of dropout mask [bS x 2*inSize] // mask optional, 2d tensor of dropout mask [bS x 2*inSize]
@ -193,7 +193,7 @@ static void sruBIBP_(NDArray* x, const NDArray* w, const NDArray* b, const NDArr
// x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features // x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
// w 2d tensor of weights [2*inSize x 6*inSize] // w 2d tensor of weights [2*inSize x 6*inSize]
// b row of biases with twice length [1 × 4*inSize] // b row of biases with twice length [1 x 4*inSize]
// c0 2d tensor of initial state [bS x 2*inSize] at time t=0 // c0 2d tensor of initial state [bS x 2*inSize] at time t=0
// ct [time x bS x 2*inSize] // ct [time x bS x 2*inSize]
// inGradC0 [bS x 2*inSize] // inGradC0 [bS x 2*inSize]

View File

@ -35,9 +35,24 @@ import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.CustomOpDescriptor; import org.nd4j.linalg.api.ops.CustomOpDescriptor;
import org.nd4j.linalg.api.ops.DefaultOpConverter; import org.nd4j.linalg.api.ops.DefaultOpConverter;
import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.api.ops.DynamicCustomOp;
import org.nd4j.linalg.api.ops.custom.BarnesEdgeForces;
import org.nd4j.linalg.api.ops.custom.BarnesHutGains;
import org.nd4j.linalg.api.ops.custom.BarnesHutSymmetrize;
import org.nd4j.linalg.api.ops.custom.SpTreeCell;
import org.nd4j.linalg.api.ops.impl.broadcast.bool.*; import org.nd4j.linalg.api.ops.impl.broadcast.bool.*;
import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
import org.nd4j.linalg.api.ops.impl.loss.bp.*;
import org.nd4j.linalg.api.ops.impl.meta.InvertedPredicateMetaOp;
import org.nd4j.linalg.api.ops.impl.meta.PostulateMetaOp;
import org.nd4j.linalg.api.ops.impl.meta.PredicateMetaOp;
import org.nd4j.linalg.api.ops.impl.meta.ReduceMetaOp;
import org.nd4j.linalg.api.ops.impl.nlp.CbowRound;
import org.nd4j.linalg.api.ops.impl.nlp.SkipGramRound;
import org.nd4j.linalg.api.ops.impl.reduce.MmulBp;
import org.nd4j.linalg.api.ops.impl.reduce.bool.All; import org.nd4j.linalg.api.ops.impl.reduce.bool.All;
import org.nd4j.linalg.api.ops.impl.reduce.bool.Any; import org.nd4j.linalg.api.ops.impl.reduce.bool.Any;
import org.nd4j.linalg.api.ops.impl.reduce.bool.IsInf;
import org.nd4j.linalg.api.ops.impl.reduce.bool.IsNaN;
import org.nd4j.linalg.api.ops.impl.reduce.longer.MatchCondition; import org.nd4j.linalg.api.ops.impl.reduce.longer.MatchCondition;
import org.nd4j.linalg.api.ops.impl.reduce3.EqualsWithEps; import org.nd4j.linalg.api.ops.impl.reduce3.EqualsWithEps;
import org.nd4j.linalg.api.ops.impl.reduce.NormalizeMoments; import org.nd4j.linalg.api.ops.impl.reduce.NormalizeMoments;
@ -49,21 +64,26 @@ import org.nd4j.linalg.api.ops.impl.layers.convolution.*;
import org.nd4j.linalg.api.ops.impl.scalar.PowDerivative; import org.nd4j.linalg.api.ops.impl.scalar.PowDerivative;
import org.nd4j.linalg.api.ops.impl.scalar.ScalarRemainder; import org.nd4j.linalg.api.ops.impl.scalar.ScalarRemainder;
import org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue; import org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue;
import org.nd4j.linalg.api.ops.impl.shape.ConfusionMatrix; import org.nd4j.linalg.api.ops.impl.shape.*;
import org.nd4j.linalg.api.ops.impl.shape.Eye;
import org.nd4j.linalg.api.ops.impl.shape.MergeSum;
import org.nd4j.linalg.api.ops.impl.shape.OneHot;
import org.nd4j.linalg.api.ops.impl.shape.bp.ConcatBp; import org.nd4j.linalg.api.ops.impl.shape.bp.ConcatBp;
import org.nd4j.linalg.api.ops.impl.shape.bp.SliceBp; import org.nd4j.linalg.api.ops.impl.shape.bp.SliceBp;
import org.nd4j.linalg.api.ops.impl.shape.bp.StridedSliceBp; import org.nd4j.linalg.api.ops.impl.shape.bp.StridedSliceBp;
import org.nd4j.linalg.api.ops.impl.shape.bp.TileBp; import org.nd4j.linalg.api.ops.impl.shape.bp.TileBp;
import org.nd4j.linalg.api.ops.impl.transforms.custom.InvertPermutation; import org.nd4j.linalg.api.ops.impl.transforms.Assert;
import org.nd4j.linalg.api.ops.impl.transforms.bool.BooleanNot;
import org.nd4j.linalg.api.ops.impl.transforms.bool.MatchConditionTransform;
import org.nd4j.linalg.api.ops.impl.transforms.custom.*;
import org.nd4j.linalg.api.ops.impl.transforms.floating.Histogram; import org.nd4j.linalg.api.ops.impl.transforms.floating.Histogram;
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.BinaryMinimalRelativeError; import org.nd4j.linalg.api.ops.impl.transforms.pairwise.BinaryMinimalRelativeError;
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.bp.*; import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.bp.*;
import org.nd4j.linalg.api.ops.impl.transforms.gradient.*; import org.nd4j.linalg.api.ops.impl.transforms.gradient.*;
import org.nd4j.linalg.api.ops.impl.transforms.gradient.SigmoidDerivative; import org.nd4j.linalg.api.ops.impl.transforms.gradient.SigmoidDerivative;
import org.nd4j.linalg.api.ops.impl.transforms.gradient.TanhDerivative; import org.nd4j.linalg.api.ops.impl.transforms.gradient.TanhDerivative;
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.bool.Not;
import org.nd4j.linalg.api.ops.impl.transforms.segment.UnsortedSegmentMax;
import org.nd4j.linalg.api.ops.impl.transforms.segment.bp.*;
import org.nd4j.linalg.api.ops.impl.transforms.strict.GELUDerivative;
import org.nd4j.linalg.api.ops.impl.transforms.strict.PreciseGELUDerivative;
import org.nd4j.linalg.api.ops.impl.transforms.strict.SwishDerivative; import org.nd4j.linalg.api.ops.impl.transforms.strict.SwishDerivative;
import org.nd4j.linalg.api.ops.impl.transforms.strict.TanDerivative; import org.nd4j.linalg.api.ops.impl.transforms.strict.TanDerivative;
import org.nd4j.linalg.api.ops.persistence.RestoreV2; import org.nd4j.linalg.api.ops.persistence.RestoreV2;
@ -71,6 +91,7 @@ import org.nd4j.linalg.api.ops.persistence.SaveV2;
import org.nd4j.linalg.api.ops.random.compat.RandomStandardNormal; import org.nd4j.linalg.api.ops.random.compat.RandomStandardNormal;
import org.nd4j.linalg.api.ops.random.custom.DistributionUniform; import org.nd4j.linalg.api.ops.random.custom.DistributionUniform;
import org.nd4j.linalg.api.ops.random.impl.*; import org.nd4j.linalg.api.ops.random.impl.*;
import org.nd4j.linalg.api.ops.random.impl.Linspace;
import org.nd4j.linalg.api.shape.LongShapeDescriptor; import org.nd4j.linalg.api.shape.LongShapeDescriptor;
import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.function.Function; import org.nd4j.linalg.function.Function;
@ -681,10 +702,22 @@ public class OpValidation {
} }
} }
int countLibnd4jIgnored = 0;
if(logUnmappedLibnd4jOps ){ if(logUnmappedLibnd4jOps ){
Set<String> ignoreLibnd4j = excludeFromLibnd4jCustomOpMapping();
log.info(" --- Libnd4j Ops Not Mapped ---"); log.info(" --- Libnd4j Ops Not Mapped ---");
for(long l : nonMappedLibnd4jOps){ for(long l : nonMappedLibnd4jOps){
Pair<List<String>,CustomOpDescriptor> p = dedupedCustomOps.get(l); Pair<List<String>,CustomOpDescriptor> p = dedupedCustomOps.get(l);
boolean foundIgnore = false;
for(String s : p.getFirst()){
if(ignoreLibnd4j.contains(s)){
foundIgnore = true;
countLibnd4jIgnored++;
break;
}
}
if(foundIgnore)
continue;
log.info("Not mapped libnd4j custom op: {} (hash: {})", p.getFirst(), l); log.info("Not mapped libnd4j custom op: {} (hash: {})", p.getFirst(), l);
} }
} }
@ -712,6 +745,7 @@ public class OpValidation {
} }
if(logUnmappedTFOps){ if(logUnmappedTFOps){
log.info(" --- TF Ops Not Mapped for Import ---");
Map<String,OpDef> allTFOps; Map<String,OpDef> allTFOps;
try{ try{
allTFOps = TensorflowDescriptorParser.opDescs(); allTFOps = TensorflowDescriptorParser.opDescs();
@ -760,7 +794,7 @@ public class OpValidation {
String fracTfStr = String.format("%.2f", 100.0 * tfFrac); String fracTfStr = String.format("%.2f", 100.0 * tfFrac);
int countLibnd4jMapped = countTotalLibnd4jOps - nonMappedLibnd4jOps.size(); int countLibnd4jMapped = countTotalLibnd4jOps - nonMappedLibnd4jOps.size();
String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)countTotalLibnd4jOps)); String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)(countTotalLibnd4jOps - countLibnd4jIgnored)));
String fracTFMappedTested = String.format("%.2f", 100.0 * tfOpsWithImportTests / (double)(totalTFMappedOps-tfImportIgnored)); String fracTFMappedTested = String.format("%.2f", 100.0 * tfOpsWithImportTests / (double)(totalTFMappedOps-tfImportIgnored));
@ -772,7 +806,7 @@ public class OpValidation {
log.info("({} ops excluded from fwd+gradient tests)", excludedFromAllTestCoverage.size()); log.info("({} ops excluded from fwd+gradient tests)", excludedFromAllTestCoverage.size());
log.info("TF mapped ops: {} of {} ({}%)", countTfMapped, countTf, fracTfStr); log.info("TF mapped ops: {} of {} ({}%)", countTfMapped, countTf, fracTfStr);
log.info("SD ops with TF import mapping + test {} of {} ({}%) - {} ignored for coverage", tfOpsWithImportTests, (totalTFMappedOps-tfImportIgnored), fracTFMappedTested, tfImportIgnored); log.info("SD ops with TF import mapping + test {} of {} ({}%) - {} ignored for coverage", tfOpsWithImportTests, (totalTFMappedOps-tfImportIgnored), fracTFMappedTested, tfImportIgnored);
log.info("Libnd4j mapped ops: {} of {} ({}%)", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j); log.info("Libnd4j mapped ops: {} of {} ({}%) - {} excluded for coverage", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j, countLibnd4jIgnored);
log.info("*****************************************************"); log.info("*****************************************************");
} }
@ -832,9 +866,12 @@ public class OpValidation {
CumProdBp.class, CumProdBp.class,
DotBp.class, DotBp.class,
SquaredNormBp.class, SquaredNormBp.class,
SoftmaxBp.class,
CubeDerivative.class, CubeDerivative.class,
ELUDerivative.class, ELUDerivative.class,
GELUDerivative.class,
PreciseGELUDerivative.class,
HardSigmoidDerivative.class, HardSigmoidDerivative.class,
HardTanhDerivative.class, HardTanhDerivative.class,
LeakyReLUDerivative.class, LeakyReLUDerivative.class,
@ -872,13 +909,54 @@ public class OpValidation {
SliceBp.class, SliceBp.class,
StridedSliceBp.class, StridedSliceBp.class,
MmulBp.class,
DotProductAttentionBp.class,
MultiHeadDotProductAttentionBp.class,
LayerNormBp.class,
StandardizeBp.class,
DynamicPartitionBp.class,
//We can't use these dropout ops in SameDiff: https://github.com/deeplearning4j/deeplearning4j/issues/5650 AbsoluteDifferenceLossBp.class,
DropOut.class, CosineDistanceLossBp.class,
DropOutInverted.class, HingeLossBp.class,
AlphaDropOut.class, HuberLossBp.class,
Choice.class, LogLossBp.class,
ProbablisticMerge.class LogPoissonLossBp.class,
MeanPairwiseSquaredErrorLossBp.class,
MeanSquaredErrorLossBp.class,
SigmoidCrossEntropyLossBp.class,
SoftmaxCrossEntropyLossBp.class,
SparseSoftmaxCrossEntropyLossWithLogitsBp.class,
SegmentMaxBp.class,
SegmentMeanBp.class,
SegmentMinBp.class,
SegmentProdBp.class,
SegmentSumBp.class,
UnsortedSegmentMaxBp.class,
UnsortedSegmentMeanBp.class,
UnsortedSegmentMinBp.class,
UnsortedSegmentProdBp.class,
UnsortedSegmentSqrtNBp.class,
UnsortedSegmentSumBp.class,
//Not intended for general users; only used in DL4J SameDiff integration + tested adequately there
ExternalErrorsFunction.class,
//Meta-Ops: not available in SameDiff
InvertedPredicateMetaOp.class,
PostulateMetaOp.class,
PredicateMetaOp.class,
ReduceMetaOp.class,
//Ops not intended to be used in SameDiff:
BarnesEdgeForces.class,
BarnesHutGains.class,
BarnesHutSymmetrize.class,
SpTreeCell.class,
CbowRound.class,
SkipGramRound.class
); );
return new HashSet<>(list); return new HashSet<>(list);
@ -907,9 +985,21 @@ public class OpValidation {
InvertPermutation.class, //Uses integer indices InvertPermutation.class, //Uses integer indices
ConfusionMatrix.class, //Integer indices ConfusionMatrix.class, //Integer indices
Linspace.class, //No input array Linspace.class, //No input array
//Exclude boolean operations: Assert.class,
//Exclude boolean operations, boolean reductions, etc:
Any.class, Any.class,
All.class, All.class,
IsInf.class,
org.nd4j.linalg.api.ops.impl.transforms.bool.IsInf.class,
IsNaN.class,
org.nd4j.linalg.api.ops.impl.transforms.bool.IsNaN.class,
BooleanNot.class,
Not.class,
MatchConditionTransform.class,
InTopK.class,
IsNonDecreasing.class,
IsStrictlyIncreasing.class,
IsNumericTensor.class,
//Exclude index accumulations (index out, not real-valued) //Exclude index accumulations (index out, not real-valued)
FirstIndex.class, FirstIndex.class,
IAMax.class, IAMax.class,
@ -917,6 +1007,12 @@ public class OpValidation {
IMax.class, IMax.class,
IMin.class, IMin.class,
LastIndex.class, LastIndex.class,
//Exclude ops that output integer types only:
Shape.class,
ShapeN.class,
SizeAt.class,
//Exclude Random ops //Exclude Random ops
RandomStandardNormal.class, RandomStandardNormal.class,
DistributionUniform.class, DistributionUniform.class,
@ -949,7 +1045,12 @@ public class OpValidation {
ProdBp.class, ProdBp.class,
StandardDeviationBp.class, StandardDeviationBp.class,
SumBp.class, SumBp.class,
VarianceBp.class VarianceBp.class,
LogicalAnd.class,
LogicalNot.class,
LogicalOr.class,
LogicalXor.class
); );
return new HashSet<>(list); return new HashSet<>(list);
@ -981,6 +1082,72 @@ public class OpValidation {
"BatchSelfAdjointEigV2", //Deprecated in favor of "SelfAdjointEigV2" "BatchSelfAdjointEigV2", //Deprecated in favor of "SelfAdjointEigV2"
"BatchSvd", //Deprecated in favor of "Svd" "BatchSvd", //Deprecated in favor of "Svd"
//These we will likely neven support importing
"ExperimentalBytesProducedStatsDataset",
"ExperimentalCSVDataset",
"ExperimentalDatasetCardinality",
"ExperimentalDatasetToTFRecord",
"ExperimentalDenseToSparseBatchDataset",
"ExperimentalDirectedInterleaveDataset",
"ExperimentalGroupByReducerDataset",
"ExperimentalGroupByWindowDataset",
"ExperimentalIdentityIndexedDataset",
"ExperimentalIgnoreErrorsDataset",
"ExperimentalIndexedDatasetGet",
"ExperimentalIndexedDatasetMaterialize",
"ExperimentalIteratorGetDevice",
"ExperimentalLMDBDataset",
"ExperimentalLatencyStatsDataset",
"ExperimentalMapAndBatchDataset",
"ExperimentalMapDataset",
"ExperimentalMatchingFilesDataset",
"ExperimentalMaterializedIndexDatasetHandle",
"ExperimentalMaxIntraOpParallelismDataset",
"ExperimentalNonSerializableDataset",
"ExperimentalNumaMapAndBatchDataset",
"ExperimentalParallelInterleaveDataset",
"ExperimentalParseExampleDataset",
"ExperimentalPrivateThreadPoolDataset",
"ExperimentalRandomDataset",
"ExperimentalScanDataset",
"ExperimentalSetStatsAggregatorDataset",
"ExperimentalSleepDataset",
"ExperimentalSlidingWindowDataset",
"ExperimentalSqlDataset",
"ExperimentalStatsAggregatorHandle",
"ExperimentalStatsAggregatorSummary",
"ExperimentalThreadPoolDataset",
"ExperimentalThreadPoolHandle",
"ExperimentalUnbatchDataset",
"ExperimentalUniqueDataset",
"DebugIdentity",
"NcclAllReduce",
"NcclBroadcast",
"NcclReduce",
//Can't import these without embedding entire python runtime and dependencies
"PyFunc",
"PyFuncStateless",
//"QuantizedX" ops are deprecated / no longer supported ("standard" ops have quantized support in many cases)
"QuantizedAdd",
"QuantizedAvgPool",
"QuantizedBatchNormWithGlobalNormalization",
"QuantizedBiasAdd",
"QuantizedConcat",
"QuantizedConv2D",
"QuantizedInstanceNorm",
"QuantizedMatMul",
"QuantizedMaxPool",
"QuantizedMul",
"QuantizedRelu",
"QuantizedRelu6",
"QuantizedReluX",
"QuantizedReshape",
"QuantizedResizeBilinear",
//All of the following ops - not available in TF (can't find them) - op mapping is wrong? //All of the following ops - not available in TF (can't find them) - op mapping is wrong?
//TODO: Check these and remove the import mapping from the Java classes if they are indeed bad //TODO: Check these and remove the import mapping from the Java classes if they are indeed bad
"HardTanh", "HardTanh",
@ -993,12 +1160,37 @@ public class OpValidation {
"absargmin", "absargmin",
"entropy_shannon", //This is a thing, but quite different from our op: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/bayesflow/entropy/entropy_shannon "entropy_shannon", //This is a thing, but quite different from our op: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/bayesflow/entropy/entropy_shannon
"count_zero" "count_zero"
); );
return new HashSet<>(list); return new HashSet<>(list);
} }
/**
* These ops are ones we will never map at Java level for one reason or another
*/
private static Set<String> excludeFromLibnd4jCustomOpMapping(){
Set<String> out = new HashSet<>();
Collections.addAll(out,
//Test and misc ops:
"TestOp2i2o", "testop2i2o",
"firas_sparse",
"test_output_reshape",
"test_scalar",
"testcustom",
"testreduction",
//"to_x" ops - we'll use cast instead in SameDiff (which supports all dtypes)
"to_double",
"to_float16",
"to_float32",
"to_int32",
"to_int64",
"to_uint32",
"to_uint64"
);
return out;
}
} }

View File

@ -702,7 +702,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
builder.append("\nCost array: ").append(Arrays.toString(costArray.dup().data().asFloat())); builder.append("\nCost array: ").append(Arrays.toString(costArray.dup().data().asFloat()));
} }
//Note that we could report micro-averaged too - but these are the same as accuracy //Note that we could report micro-averaged too - but these are the same as accuracy
//"Note that for “micro<EFBFBD>?-averaging in a multiclass setting with all labels included will produce equal precision, recall and F," //"Note that for “micro-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
//http://scikit-learn.org/stable/modules/model_evaluation.html //http://scikit-learn.org/stable/modules/model_evaluation.html
builder.append("\n\n"); builder.append("\n\n");
@ -884,7 +884,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
/** /**
* When calculating the (macro) average precision, how many classes are excluded from the average due to * When calculating the (macro) average precision, how many classes are excluded from the average due to
* no predictions i.e., precision would be the edge case of 0/0 * no predictions - i.e., precision would be the edge case of 0/0
* *
* @return Number of classes excluded from the average precision * @return Number of classes excluded from the average precision
*/ */
@ -894,7 +894,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
/** /**
* When calculating the (macro) average Recall, how many classes are excluded from the average due to * When calculating the (macro) average Recall, how many classes are excluded from the average due to
* no predictions i.e., recall would be the edge case of 0/0 * no predictions - i.e., recall would be the edge case of 0/0
* *
* @return Number of classes excluded from the average recall * @return Number of classes excluded from the average recall
*/ */
@ -904,7 +904,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
/** /**
* When calculating the (macro) average F1, how many classes are excluded from the average due to * When calculating the (macro) average F1, how many classes are excluded from the average due to
* no predictions i.e., F1 would be calculated from a precision or recall of 0/0 * no predictions - i.e., F1 would be calculated from a precision or recall of 0/0
* *
* @return Number of classes excluded from the average F1 * @return Number of classes excluded from the average F1
*/ */
@ -914,7 +914,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
/** /**
* When calculating the (macro) average FBeta, how many classes are excluded from the average due to * When calculating the (macro) average FBeta, how many classes are excluded from the average due to
* no predictions i.e., FBeta would be calculated from a precision or recall of 0/0 * no predictions - i.e., FBeta would be calculated from a precision or recall of 0/0
* *
* @return Number of classes excluded from the average FBeta * @return Number of classes excluded from the average FBeta
*/ */

View File

@ -83,7 +83,7 @@ public class ScatterUpdate implements CustomOp {
*/ */
@Override @Override
public String opName() { public String opName() {
return op.opName(); return "scatter_update";
} }
/** /**

View File

@ -28,7 +28,7 @@ public class SRUCellConfiguration {
NDArray<T>* xt = INPUT_VARIABLE(0); // input [batchSize x inSize], batchSize - batch size, inSize - number of features NDArray<T>* xt = INPUT_VARIABLE(0); // input [batchSize x inSize], batchSize - batch size, inSize - number of features
NDArray<T>* ct_1 = INPUT_VARIABLE(1); // previous cell state ct [batchSize x inSize], that is at previous time step t-1 NDArray<T>* ct_1 = INPUT_VARIABLE(1); // previous cell state ct [batchSize x inSize], that is at previous time step t-1
NDArray<T>* w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize] NDArray<T>* w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize]
NDArray<T>* b = INPUT_VARIABLE(3); // biases [1 × 2*inSize] NDArray<T>* b = INPUT_VARIABLE(3); // biases [1 x 2*inSize]
NDArray<T>* ht = OUTPUT_VARIABLE(0); // current cell output [batchSize x inSize], that is at current time step t NDArray<T>* ht = OUTPUT_VARIABLE(0); // current cell output [batchSize x inSize], that is at current time step t
NDArray<T>* ct = OUTPUT_VARIABLE(1); // current cell state [batchSize x inSize], that is at current time step t NDArray<T>* ct = OUTPUT_VARIABLE(1); // current cell state [batchSize x inSize], that is at current time step t

View File

@ -26,7 +26,7 @@ public class SRUConfiguration {
/** /**
* NDArray<T>* input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features * NDArray<T>* input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
NDArray<T>* weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K] NDArray<T>* weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
NDArray<T>* bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K] NDArray<T>* bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
NDArray<T>* init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0 NDArray<T>* init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
*/ */

View File

@ -103,7 +103,7 @@ public class AdaDeltaUpdater implements GradientUpdater<AdaDelta> {
double epsilon = config.getEpsilon(); double epsilon = config.getEpsilon();
//Line 4 of Algorithm 1: https://arxiv.org/pdf/1212.5701v1.pdf //Line 4 of Algorithm 1: https://arxiv.org/pdf/1212.5701v1.pdf
//E[g^2]_t = rho * E[g^2]_{t1} + (1-rho)*g^2_t //E[g^2]_t = rho * E[g^2]_{t-1} + (1-rho)*g^2_t
msg.muli(rho).addi(gradient.mul(gradient).muli(1 - rho)); msg.muli(rho).addi(gradient.mul(gradient).muli(1 - rho));
//Calculate update: //Calculate update: