Op validation improvements + encoding fix (#49)
* Op validation updates Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small logging fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7986 Fix minor character encoding issues Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small ignore fix Signed-off-by: AlexDBlack <blacka101@gmail.com>master
parent
88ea9a49eb
commit
cc65c01118
|
@ -145,7 +145,7 @@ DECLARE_SHAPE_FN(sru) {
|
||||||
CUSTOM_OP_IMPL(sru_bp, 8, 4, true, 0, 0) {
|
CUSTOM_OP_IMPL(sru_bp, 8, 4, true, 0, 0) {
|
||||||
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||||
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
|
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
|
||||||
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K]
|
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
|
||||||
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
|
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
|
||||||
auto c = INPUT_VARIABLE(4); // C, [bS x K x N]
|
auto c = INPUT_VARIABLE(4); // C, [bS x K x N]
|
||||||
auto inGradCt = INPUT_VARIABLE(5); // [bS x K]
|
auto inGradCt = INPUT_VARIABLE(5); // [bS x K]
|
||||||
|
@ -331,7 +331,7 @@ CUSTOM_OP_IMPL(sru_bi, 5, 2, true, 0, 0) {
|
||||||
|
|
||||||
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
||||||
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize]
|
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize]
|
||||||
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 4*inSize]
|
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 4*inSize]
|
||||||
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
|
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
|
||||||
NDArray* mask = block.width() > 4 ? INPUT_VARIABLE(4) : nullptr; // optional, 2d tensor of dropout mask [bS x 2*inSize]
|
NDArray* mask = block.width() > 4 ? INPUT_VARIABLE(4) : nullptr; // optional, 2d tensor of dropout mask [bS x 2*inSize]
|
||||||
|
|
||||||
|
@ -431,7 +431,7 @@ CUSTOM_OP_IMPL(sru_bi_bp, 8, 4, true, 0, 0) {
|
||||||
|
|
||||||
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
||||||
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize]
|
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize]
|
||||||
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 4*inSize]
|
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 4*inSize]
|
||||||
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
|
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
|
||||||
auto ct = INPUT_VARIABLE(4); // C, [time x bS x 2*inSize]
|
auto ct = INPUT_VARIABLE(4); // C, [time x bS x 2*inSize]
|
||||||
auto inGradC0 = INPUT_VARIABLE(5); // [bS x 2*inSize]
|
auto inGradC0 = INPUT_VARIABLE(5); // [bS x 2*inSize]
|
||||||
|
@ -553,7 +553,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
||||||
* Input arrays:
|
* Input arrays:
|
||||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||||
* 1: 2d tensor of weights [3K x K]
|
* 1: 2d tensor of weights [3K x K]
|
||||||
* 2: row of biases with twice length [1 × 2K]
|
* 2: row of biases with twice length [1 x 2K]
|
||||||
* 3: 2d tensor of previous cell state [bS x K]
|
* 3: 2d tensor of previous cell state [bS x K]
|
||||||
* 4: optional, 2d tensor of dropout mask [bS x K]
|
* 4: optional, 2d tensor of dropout mask [bS x K]
|
||||||
*
|
*
|
||||||
|
@ -572,7 +572,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
||||||
* Input arrays:
|
* Input arrays:
|
||||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||||
* 1: 2d tensor of weights [3K x K]
|
* 1: 2d tensor of weights [3K x K]
|
||||||
* 2: row of biases with twice length [1 × 2K]
|
* 2: row of biases with twice length [1 x 2K]
|
||||||
* 3: 2d tensor of previous cell state [bS x K]
|
* 3: 2d tensor of previous cell state [bS x K]
|
||||||
* 4: optional, 2d tensor of dropout mask [bS x K]
|
* 4: optional, 2d tensor of dropout mask [bS x K]
|
||||||
*
|
*
|
||||||
|
@ -592,7 +592,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
||||||
* Input arrays:
|
* Input arrays:
|
||||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||||
* 1: 2d tensor of weights [3K x K]
|
* 1: 2d tensor of weights [3K x K]
|
||||||
* 2: row of biases with twice length [1 × 2K]
|
* 2: row of biases with twice length [1 x 2K]
|
||||||
* 3: 2d tensor of previous cell state [bS x K]
|
* 3: 2d tensor of previous cell state [bS x K]
|
||||||
* 4: 3d tensor of cell state [bS x K x N]
|
* 4: 3d tensor of cell state [bS x K x N]
|
||||||
* 5: 2d tensor of cell state gradients [bS x K]
|
* 5: 2d tensor of cell state gradients [bS x K]
|
||||||
|
@ -622,7 +622,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
||||||
|
|
||||||
// auto input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
// auto input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||||
// auto weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
|
// auto weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
|
||||||
// auto bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K]
|
// auto bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
|
||||||
// auto init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
|
// auto init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
|
||||||
// NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x K]
|
// NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x K]
|
||||||
|
|
||||||
|
@ -710,7 +710,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
||||||
// CUSTOM_OP_IMPL(sru_old, 5, 2, false, 0, 0) {
|
// CUSTOM_OP_IMPL(sru_old, 5, 2, false, 0, 0) {
|
||||||
// auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
|
// auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
|
||||||
// auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x inSize]
|
// auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x inSize]
|
||||||
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*inSize]
|
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*inSize]
|
||||||
// auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
|
// auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
|
||||||
// NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x inSize]
|
// NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x inSize]
|
||||||
|
|
||||||
|
@ -820,7 +820,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
||||||
|
|
||||||
// auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
|
// auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
|
||||||
// auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3*inSize x inSize]
|
// auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3*inSize x inSize]
|
||||||
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*inSize]
|
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*inSize]
|
||||||
// auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
|
// auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
|
||||||
// auto c = INPUT_VARIABLE(4); // C, [bS x inSize x time]
|
// auto c = INPUT_VARIABLE(4); // C, [bS x inSize x time]
|
||||||
// auto inGradCt = INPUT_VARIABLE(5); // [bS x inSize]
|
// auto inGradCt = INPUT_VARIABLE(5); // [bS x inSize]
|
||||||
|
|
|
@ -34,7 +34,7 @@ CUSTOM_OP_IMPL(sruCell, 4, 2, false, 0, 0) {
|
||||||
auto xt = INPUT_VARIABLE(0); // input [bS x inSize], bS - batch size, inSize - number of features
|
auto xt = INPUT_VARIABLE(0); // input [bS x inSize], bS - batch size, inSize - number of features
|
||||||
auto ct_1 = INPUT_VARIABLE(1); // previous cell state ct [bS x inSize], that is at previous time step t-1
|
auto ct_1 = INPUT_VARIABLE(1); // previous cell state ct [bS x inSize], that is at previous time step t-1
|
||||||
auto w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize]
|
auto w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize]
|
||||||
auto b = INPUT_VARIABLE(3); // biases [1 × 2*inSize]
|
auto b = INPUT_VARIABLE(3); // biases [1 x 2*inSize]
|
||||||
|
|
||||||
auto ht = OUTPUT_VARIABLE(0); // current cell output [bS x inSize], that is at current time step t
|
auto ht = OUTPUT_VARIABLE(0); // current cell output [bS x inSize], that is at current time step t
|
||||||
auto ct = OUTPUT_VARIABLE(1); // current cell state [bS x inSize], that is at current time step t
|
auto ct = OUTPUT_VARIABLE(1); // current cell state [bS x inSize], that is at current time step t
|
||||||
|
|
|
@ -33,7 +33,7 @@ namespace ops {
|
||||||
* Input arrays:
|
* Input arrays:
|
||||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||||
* 1: 2d tensor of weights [3K x K]
|
* 1: 2d tensor of weights [3K x K]
|
||||||
* 2: row of biases with twice length [1 × 2K]
|
* 2: row of biases with twice length [1 x 2K]
|
||||||
* 3: 2d tensor of previous cell state [bS x K]
|
* 3: 2d tensor of previous cell state [bS x K]
|
||||||
* 4: optional, 2d tensor of dropout mask [bS x K]
|
* 4: optional, 2d tensor of dropout mask [bS x K]
|
||||||
*
|
*
|
||||||
|
@ -52,7 +52,7 @@ namespace ops {
|
||||||
* Input arrays:
|
* Input arrays:
|
||||||
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
|
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
|
||||||
* 1: 2d tensor of weights [2K x 6K]
|
* 1: 2d tensor of weights [2K x 6K]
|
||||||
* 2: row of biases with twice length [1 × 4K]
|
* 2: row of biases with twice length [1 x 4K]
|
||||||
* 3: 2d tensor of previous cell state [bS x 2K]
|
* 3: 2d tensor of previous cell state [bS x 2K]
|
||||||
* 4: optional, 2d tensor of dropout mask [bS x 2K]
|
* 4: optional, 2d tensor of dropout mask [bS x 2K]
|
||||||
*
|
*
|
||||||
|
@ -72,7 +72,7 @@ namespace ops {
|
||||||
* Input arrays:
|
* Input arrays:
|
||||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||||
* 1: 2d tensor of weights [3K x K]
|
* 1: 2d tensor of weights [3K x K]
|
||||||
* 2: row of biases with twice length [1 × 2K]
|
* 2: row of biases with twice length [1 x 2K]
|
||||||
* 3: 2d tensor of previous cell state [bS x K]
|
* 3: 2d tensor of previous cell state [bS x K]
|
||||||
* 4: 3d tensor of cell state [bS x K x N]
|
* 4: 3d tensor of cell state [bS x K x N]
|
||||||
* 5: 2d tensor of cell state gradients [bS x K]
|
* 5: 2d tensor of cell state gradients [bS x K]
|
||||||
|
@ -96,7 +96,7 @@ namespace ops {
|
||||||
* Input arrays:
|
* Input arrays:
|
||||||
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
|
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
|
||||||
* 1: 2d tensor of weights [2K x 6K]
|
* 1: 2d tensor of weights [2K x 6K]
|
||||||
* 2: row of biases with twice length [1 × 4K]
|
* 2: row of biases with twice length [1 x 4K]
|
||||||
* 3: 2d tensor of previous cell state [bS x 2K]
|
* 3: 2d tensor of previous cell state [bS x 2K]
|
||||||
* 4: 3d tensor of cell state [N x bS x 2K]
|
* 4: 3d tensor of cell state [N x bS x 2K]
|
||||||
* 5: 2d tensor of cell state gradients [bS x 2K]
|
* 5: 2d tensor of cell state gradients [bS x 2K]
|
||||||
|
@ -239,7 +239,7 @@ namespace ops {
|
||||||
* 0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features
|
* 0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features
|
||||||
* 1: previous cell state [batchSize x inSize], that is at previous time step t-1
|
* 1: previous cell state [batchSize x inSize], that is at previous time step t-1
|
||||||
* 2: weights [inSize x 3*inSize]
|
* 2: weights [inSize x 3*inSize]
|
||||||
* 3: biases [1 × 2*inSize]
|
* 3: biases [1 x 2*inSize]
|
||||||
*
|
*
|
||||||
* Output arrays:
|
* Output arrays:
|
||||||
* 0: current cell output [batchSize x inSize], that is at current time step t
|
* 0: current cell output [batchSize x inSize], that is at current time step t
|
||||||
|
|
|
@ -110,7 +110,7 @@ static void sruBI_(NDArray* x, const NDArray* w, const NDArray* b, const NDArray
|
||||||
|
|
||||||
// x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
// x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
||||||
// w 2d tensor of weights [2*inSize x 6*inSize]
|
// w 2d tensor of weights [2*inSize x 6*inSize]
|
||||||
// b row of biases with twice length [1 × 4*inSize]
|
// b row of biases with twice length [1 x 4*inSize]
|
||||||
// c0 2d tensor of initial state [bS x 2*inSize] at time t=0
|
// c0 2d tensor of initial state [bS x 2*inSize] at time t=0
|
||||||
// mask optional, 2d tensor of dropout mask [bS x 2*inSize]
|
// mask optional, 2d tensor of dropout mask [bS x 2*inSize]
|
||||||
|
|
||||||
|
@ -193,7 +193,7 @@ static void sruBIBP_(NDArray* x, const NDArray* w, const NDArray* b, const NDArr
|
||||||
|
|
||||||
// x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
// x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
||||||
// w 2d tensor of weights [2*inSize x 6*inSize]
|
// w 2d tensor of weights [2*inSize x 6*inSize]
|
||||||
// b row of biases with twice length [1 × 4*inSize]
|
// b row of biases with twice length [1 x 4*inSize]
|
||||||
// c0 2d tensor of initial state [bS x 2*inSize] at time t=0
|
// c0 2d tensor of initial state [bS x 2*inSize] at time t=0
|
||||||
// ct [time x bS x 2*inSize]
|
// ct [time x bS x 2*inSize]
|
||||||
// inGradC0 [bS x 2*inSize]
|
// inGradC0 [bS x 2*inSize]
|
||||||
|
|
|
@ -35,9 +35,24 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
||||||
import org.nd4j.linalg.api.ops.CustomOpDescriptor;
|
import org.nd4j.linalg.api.ops.CustomOpDescriptor;
|
||||||
import org.nd4j.linalg.api.ops.DefaultOpConverter;
|
import org.nd4j.linalg.api.ops.DefaultOpConverter;
|
||||||
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
||||||
|
import org.nd4j.linalg.api.ops.custom.BarnesEdgeForces;
|
||||||
|
import org.nd4j.linalg.api.ops.custom.BarnesHutGains;
|
||||||
|
import org.nd4j.linalg.api.ops.custom.BarnesHutSymmetrize;
|
||||||
|
import org.nd4j.linalg.api.ops.custom.SpTreeCell;
|
||||||
import org.nd4j.linalg.api.ops.impl.broadcast.bool.*;
|
import org.nd4j.linalg.api.ops.impl.broadcast.bool.*;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.loss.bp.*;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.meta.InvertedPredicateMetaOp;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.meta.PostulateMetaOp;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.meta.PredicateMetaOp;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.meta.ReduceMetaOp;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.nlp.CbowRound;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.nlp.SkipGramRound;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.reduce.MmulBp;
|
||||||
import org.nd4j.linalg.api.ops.impl.reduce.bool.All;
|
import org.nd4j.linalg.api.ops.impl.reduce.bool.All;
|
||||||
import org.nd4j.linalg.api.ops.impl.reduce.bool.Any;
|
import org.nd4j.linalg.api.ops.impl.reduce.bool.Any;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.reduce.bool.IsInf;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.reduce.bool.IsNaN;
|
||||||
import org.nd4j.linalg.api.ops.impl.reduce.longer.MatchCondition;
|
import org.nd4j.linalg.api.ops.impl.reduce.longer.MatchCondition;
|
||||||
import org.nd4j.linalg.api.ops.impl.reduce3.EqualsWithEps;
|
import org.nd4j.linalg.api.ops.impl.reduce3.EqualsWithEps;
|
||||||
import org.nd4j.linalg.api.ops.impl.reduce.NormalizeMoments;
|
import org.nd4j.linalg.api.ops.impl.reduce.NormalizeMoments;
|
||||||
|
@ -49,21 +64,26 @@ import org.nd4j.linalg.api.ops.impl.layers.convolution.*;
|
||||||
import org.nd4j.linalg.api.ops.impl.scalar.PowDerivative;
|
import org.nd4j.linalg.api.ops.impl.scalar.PowDerivative;
|
||||||
import org.nd4j.linalg.api.ops.impl.scalar.ScalarRemainder;
|
import org.nd4j.linalg.api.ops.impl.scalar.ScalarRemainder;
|
||||||
import org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue;
|
import org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue;
|
||||||
import org.nd4j.linalg.api.ops.impl.shape.ConfusionMatrix;
|
import org.nd4j.linalg.api.ops.impl.shape.*;
|
||||||
import org.nd4j.linalg.api.ops.impl.shape.Eye;
|
|
||||||
import org.nd4j.linalg.api.ops.impl.shape.MergeSum;
|
|
||||||
import org.nd4j.linalg.api.ops.impl.shape.OneHot;
|
|
||||||
import org.nd4j.linalg.api.ops.impl.shape.bp.ConcatBp;
|
import org.nd4j.linalg.api.ops.impl.shape.bp.ConcatBp;
|
||||||
import org.nd4j.linalg.api.ops.impl.shape.bp.SliceBp;
|
import org.nd4j.linalg.api.ops.impl.shape.bp.SliceBp;
|
||||||
import org.nd4j.linalg.api.ops.impl.shape.bp.StridedSliceBp;
|
import org.nd4j.linalg.api.ops.impl.shape.bp.StridedSliceBp;
|
||||||
import org.nd4j.linalg.api.ops.impl.shape.bp.TileBp;
|
import org.nd4j.linalg.api.ops.impl.shape.bp.TileBp;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.custom.InvertPermutation;
|
import org.nd4j.linalg.api.ops.impl.transforms.Assert;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.transforms.bool.BooleanNot;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.transforms.bool.MatchConditionTransform;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.transforms.custom.*;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.floating.Histogram;
|
import org.nd4j.linalg.api.ops.impl.transforms.floating.Histogram;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.BinaryMinimalRelativeError;
|
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.BinaryMinimalRelativeError;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.bp.*;
|
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.bp.*;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.gradient.*;
|
import org.nd4j.linalg.api.ops.impl.transforms.gradient.*;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.gradient.SigmoidDerivative;
|
import org.nd4j.linalg.api.ops.impl.transforms.gradient.SigmoidDerivative;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.gradient.TanhDerivative;
|
import org.nd4j.linalg.api.ops.impl.transforms.gradient.TanhDerivative;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.bool.Not;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.transforms.segment.UnsortedSegmentMax;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.transforms.segment.bp.*;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.transforms.strict.GELUDerivative;
|
||||||
|
import org.nd4j.linalg.api.ops.impl.transforms.strict.PreciseGELUDerivative;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.strict.SwishDerivative;
|
import org.nd4j.linalg.api.ops.impl.transforms.strict.SwishDerivative;
|
||||||
import org.nd4j.linalg.api.ops.impl.transforms.strict.TanDerivative;
|
import org.nd4j.linalg.api.ops.impl.transforms.strict.TanDerivative;
|
||||||
import org.nd4j.linalg.api.ops.persistence.RestoreV2;
|
import org.nd4j.linalg.api.ops.persistence.RestoreV2;
|
||||||
|
@ -71,6 +91,7 @@ import org.nd4j.linalg.api.ops.persistence.SaveV2;
|
||||||
import org.nd4j.linalg.api.ops.random.compat.RandomStandardNormal;
|
import org.nd4j.linalg.api.ops.random.compat.RandomStandardNormal;
|
||||||
import org.nd4j.linalg.api.ops.random.custom.DistributionUniform;
|
import org.nd4j.linalg.api.ops.random.custom.DistributionUniform;
|
||||||
import org.nd4j.linalg.api.ops.random.impl.*;
|
import org.nd4j.linalg.api.ops.random.impl.*;
|
||||||
|
import org.nd4j.linalg.api.ops.random.impl.Linspace;
|
||||||
import org.nd4j.linalg.api.shape.LongShapeDescriptor;
|
import org.nd4j.linalg.api.shape.LongShapeDescriptor;
|
||||||
import org.nd4j.linalg.factory.Nd4j;
|
import org.nd4j.linalg.factory.Nd4j;
|
||||||
import org.nd4j.linalg.function.Function;
|
import org.nd4j.linalg.function.Function;
|
||||||
|
@ -681,10 +702,22 @@ public class OpValidation {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int countLibnd4jIgnored = 0;
|
||||||
if(logUnmappedLibnd4jOps ){
|
if(logUnmappedLibnd4jOps ){
|
||||||
|
Set<String> ignoreLibnd4j = excludeFromLibnd4jCustomOpMapping();
|
||||||
log.info(" --- Libnd4j Ops Not Mapped ---");
|
log.info(" --- Libnd4j Ops Not Mapped ---");
|
||||||
for(long l : nonMappedLibnd4jOps){
|
for(long l : nonMappedLibnd4jOps){
|
||||||
Pair<List<String>,CustomOpDescriptor> p = dedupedCustomOps.get(l);
|
Pair<List<String>,CustomOpDescriptor> p = dedupedCustomOps.get(l);
|
||||||
|
boolean foundIgnore = false;
|
||||||
|
for(String s : p.getFirst()){
|
||||||
|
if(ignoreLibnd4j.contains(s)){
|
||||||
|
foundIgnore = true;
|
||||||
|
countLibnd4jIgnored++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(foundIgnore)
|
||||||
|
continue;
|
||||||
log.info("Not mapped libnd4j custom op: {} (hash: {})", p.getFirst(), l);
|
log.info("Not mapped libnd4j custom op: {} (hash: {})", p.getFirst(), l);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -712,6 +745,7 @@ public class OpValidation {
|
||||||
}
|
}
|
||||||
|
|
||||||
if(logUnmappedTFOps){
|
if(logUnmappedTFOps){
|
||||||
|
log.info(" --- TF Ops Not Mapped for Import ---");
|
||||||
Map<String,OpDef> allTFOps;
|
Map<String,OpDef> allTFOps;
|
||||||
try{
|
try{
|
||||||
allTFOps = TensorflowDescriptorParser.opDescs();
|
allTFOps = TensorflowDescriptorParser.opDescs();
|
||||||
|
@ -760,7 +794,7 @@ public class OpValidation {
|
||||||
String fracTfStr = String.format("%.2f", 100.0 * tfFrac);
|
String fracTfStr = String.format("%.2f", 100.0 * tfFrac);
|
||||||
|
|
||||||
int countLibnd4jMapped = countTotalLibnd4jOps - nonMappedLibnd4jOps.size();
|
int countLibnd4jMapped = countTotalLibnd4jOps - nonMappedLibnd4jOps.size();
|
||||||
String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)countTotalLibnd4jOps));
|
String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)(countTotalLibnd4jOps - countLibnd4jIgnored)));
|
||||||
|
|
||||||
String fracTFMappedTested = String.format("%.2f", 100.0 * tfOpsWithImportTests / (double)(totalTFMappedOps-tfImportIgnored));
|
String fracTFMappedTested = String.format("%.2f", 100.0 * tfOpsWithImportTests / (double)(totalTFMappedOps-tfImportIgnored));
|
||||||
|
|
||||||
|
@ -772,7 +806,7 @@ public class OpValidation {
|
||||||
log.info("({} ops excluded from fwd+gradient tests)", excludedFromAllTestCoverage.size());
|
log.info("({} ops excluded from fwd+gradient tests)", excludedFromAllTestCoverage.size());
|
||||||
log.info("TF mapped ops: {} of {} ({}%)", countTfMapped, countTf, fracTfStr);
|
log.info("TF mapped ops: {} of {} ({}%)", countTfMapped, countTf, fracTfStr);
|
||||||
log.info("SD ops with TF import mapping + test {} of {} ({}%) - {} ignored for coverage", tfOpsWithImportTests, (totalTFMappedOps-tfImportIgnored), fracTFMappedTested, tfImportIgnored);
|
log.info("SD ops with TF import mapping + test {} of {} ({}%) - {} ignored for coverage", tfOpsWithImportTests, (totalTFMappedOps-tfImportIgnored), fracTFMappedTested, tfImportIgnored);
|
||||||
log.info("Libnd4j mapped ops: {} of {} ({}%)", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j);
|
log.info("Libnd4j mapped ops: {} of {} ({}%) - {} excluded for coverage", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j, countLibnd4jIgnored);
|
||||||
log.info("*****************************************************");
|
log.info("*****************************************************");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -832,9 +866,12 @@ public class OpValidation {
|
||||||
CumProdBp.class,
|
CumProdBp.class,
|
||||||
DotBp.class,
|
DotBp.class,
|
||||||
SquaredNormBp.class,
|
SquaredNormBp.class,
|
||||||
|
SoftmaxBp.class,
|
||||||
|
|
||||||
CubeDerivative.class,
|
CubeDerivative.class,
|
||||||
ELUDerivative.class,
|
ELUDerivative.class,
|
||||||
|
GELUDerivative.class,
|
||||||
|
PreciseGELUDerivative.class,
|
||||||
HardSigmoidDerivative.class,
|
HardSigmoidDerivative.class,
|
||||||
HardTanhDerivative.class,
|
HardTanhDerivative.class,
|
||||||
LeakyReLUDerivative.class,
|
LeakyReLUDerivative.class,
|
||||||
|
@ -872,13 +909,54 @@ public class OpValidation {
|
||||||
|
|
||||||
SliceBp.class,
|
SliceBp.class,
|
||||||
StridedSliceBp.class,
|
StridedSliceBp.class,
|
||||||
|
MmulBp.class,
|
||||||
|
DotProductAttentionBp.class,
|
||||||
|
MultiHeadDotProductAttentionBp.class,
|
||||||
|
LayerNormBp.class,
|
||||||
|
StandardizeBp.class,
|
||||||
|
DynamicPartitionBp.class,
|
||||||
|
|
||||||
//We can't use these dropout ops in SameDiff: https://github.com/deeplearning4j/deeplearning4j/issues/5650
|
AbsoluteDifferenceLossBp.class,
|
||||||
DropOut.class,
|
CosineDistanceLossBp.class,
|
||||||
DropOutInverted.class,
|
HingeLossBp.class,
|
||||||
AlphaDropOut.class,
|
HuberLossBp.class,
|
||||||
Choice.class,
|
LogLossBp.class,
|
||||||
ProbablisticMerge.class
|
LogPoissonLossBp.class,
|
||||||
|
MeanPairwiseSquaredErrorLossBp.class,
|
||||||
|
MeanSquaredErrorLossBp.class,
|
||||||
|
SigmoidCrossEntropyLossBp.class,
|
||||||
|
SoftmaxCrossEntropyLossBp.class,
|
||||||
|
SparseSoftmaxCrossEntropyLossWithLogitsBp.class,
|
||||||
|
|
||||||
|
SegmentMaxBp.class,
|
||||||
|
SegmentMeanBp.class,
|
||||||
|
SegmentMinBp.class,
|
||||||
|
SegmentProdBp.class,
|
||||||
|
SegmentSumBp.class,
|
||||||
|
UnsortedSegmentMaxBp.class,
|
||||||
|
UnsortedSegmentMeanBp.class,
|
||||||
|
UnsortedSegmentMinBp.class,
|
||||||
|
UnsortedSegmentProdBp.class,
|
||||||
|
UnsortedSegmentSqrtNBp.class,
|
||||||
|
UnsortedSegmentSumBp.class,
|
||||||
|
|
||||||
|
//Not intended for general users; only used in DL4J SameDiff integration + tested adequately there
|
||||||
|
ExternalErrorsFunction.class,
|
||||||
|
|
||||||
|
//Meta-Ops: not available in SameDiff
|
||||||
|
InvertedPredicateMetaOp.class,
|
||||||
|
PostulateMetaOp.class,
|
||||||
|
PredicateMetaOp.class,
|
||||||
|
ReduceMetaOp.class,
|
||||||
|
|
||||||
|
|
||||||
|
//Ops not intended to be used in SameDiff:
|
||||||
|
BarnesEdgeForces.class,
|
||||||
|
BarnesHutGains.class,
|
||||||
|
BarnesHutSymmetrize.class,
|
||||||
|
SpTreeCell.class,
|
||||||
|
CbowRound.class,
|
||||||
|
SkipGramRound.class
|
||||||
);
|
);
|
||||||
|
|
||||||
return new HashSet<>(list);
|
return new HashSet<>(list);
|
||||||
|
@ -907,9 +985,21 @@ public class OpValidation {
|
||||||
InvertPermutation.class, //Uses integer indices
|
InvertPermutation.class, //Uses integer indices
|
||||||
ConfusionMatrix.class, //Integer indices
|
ConfusionMatrix.class, //Integer indices
|
||||||
Linspace.class, //No input array
|
Linspace.class, //No input array
|
||||||
//Exclude boolean operations:
|
Assert.class,
|
||||||
|
//Exclude boolean operations, boolean reductions, etc:
|
||||||
Any.class,
|
Any.class,
|
||||||
All.class,
|
All.class,
|
||||||
|
IsInf.class,
|
||||||
|
org.nd4j.linalg.api.ops.impl.transforms.bool.IsInf.class,
|
||||||
|
IsNaN.class,
|
||||||
|
org.nd4j.linalg.api.ops.impl.transforms.bool.IsNaN.class,
|
||||||
|
BooleanNot.class,
|
||||||
|
Not.class,
|
||||||
|
MatchConditionTransform.class,
|
||||||
|
InTopK.class,
|
||||||
|
IsNonDecreasing.class,
|
||||||
|
IsStrictlyIncreasing.class,
|
||||||
|
IsNumericTensor.class,
|
||||||
//Exclude index accumulations (index out, not real-valued)
|
//Exclude index accumulations (index out, not real-valued)
|
||||||
FirstIndex.class,
|
FirstIndex.class,
|
||||||
IAMax.class,
|
IAMax.class,
|
||||||
|
@ -917,6 +1007,12 @@ public class OpValidation {
|
||||||
IMax.class,
|
IMax.class,
|
||||||
IMin.class,
|
IMin.class,
|
||||||
LastIndex.class,
|
LastIndex.class,
|
||||||
|
|
||||||
|
//Exclude ops that output integer types only:
|
||||||
|
Shape.class,
|
||||||
|
ShapeN.class,
|
||||||
|
SizeAt.class,
|
||||||
|
|
||||||
//Exclude Random ops
|
//Exclude Random ops
|
||||||
RandomStandardNormal.class,
|
RandomStandardNormal.class,
|
||||||
DistributionUniform.class,
|
DistributionUniform.class,
|
||||||
|
@ -949,7 +1045,12 @@ public class OpValidation {
|
||||||
ProdBp.class,
|
ProdBp.class,
|
||||||
StandardDeviationBp.class,
|
StandardDeviationBp.class,
|
||||||
SumBp.class,
|
SumBp.class,
|
||||||
VarianceBp.class
|
VarianceBp.class,
|
||||||
|
|
||||||
|
LogicalAnd.class,
|
||||||
|
LogicalNot.class,
|
||||||
|
LogicalOr.class,
|
||||||
|
LogicalXor.class
|
||||||
);
|
);
|
||||||
|
|
||||||
return new HashSet<>(list);
|
return new HashSet<>(list);
|
||||||
|
@ -981,6 +1082,72 @@ public class OpValidation {
|
||||||
"BatchSelfAdjointEigV2", //Deprecated in favor of "SelfAdjointEigV2"
|
"BatchSelfAdjointEigV2", //Deprecated in favor of "SelfAdjointEigV2"
|
||||||
"BatchSvd", //Deprecated in favor of "Svd"
|
"BatchSvd", //Deprecated in favor of "Svd"
|
||||||
|
|
||||||
|
//These we will likely neven support importing
|
||||||
|
"ExperimentalBytesProducedStatsDataset",
|
||||||
|
"ExperimentalCSVDataset",
|
||||||
|
"ExperimentalDatasetCardinality",
|
||||||
|
"ExperimentalDatasetToTFRecord",
|
||||||
|
"ExperimentalDenseToSparseBatchDataset",
|
||||||
|
"ExperimentalDirectedInterleaveDataset",
|
||||||
|
"ExperimentalGroupByReducerDataset",
|
||||||
|
"ExperimentalGroupByWindowDataset",
|
||||||
|
"ExperimentalIdentityIndexedDataset",
|
||||||
|
"ExperimentalIgnoreErrorsDataset",
|
||||||
|
"ExperimentalIndexedDatasetGet",
|
||||||
|
"ExperimentalIndexedDatasetMaterialize",
|
||||||
|
"ExperimentalIteratorGetDevice",
|
||||||
|
"ExperimentalLMDBDataset",
|
||||||
|
"ExperimentalLatencyStatsDataset",
|
||||||
|
"ExperimentalMapAndBatchDataset",
|
||||||
|
"ExperimentalMapDataset",
|
||||||
|
"ExperimentalMatchingFilesDataset",
|
||||||
|
"ExperimentalMaterializedIndexDatasetHandle",
|
||||||
|
"ExperimentalMaxIntraOpParallelismDataset",
|
||||||
|
"ExperimentalNonSerializableDataset",
|
||||||
|
"ExperimentalNumaMapAndBatchDataset",
|
||||||
|
"ExperimentalParallelInterleaveDataset",
|
||||||
|
"ExperimentalParseExampleDataset",
|
||||||
|
"ExperimentalPrivateThreadPoolDataset",
|
||||||
|
"ExperimentalRandomDataset",
|
||||||
|
"ExperimentalScanDataset",
|
||||||
|
"ExperimentalSetStatsAggregatorDataset",
|
||||||
|
"ExperimentalSleepDataset",
|
||||||
|
"ExperimentalSlidingWindowDataset",
|
||||||
|
"ExperimentalSqlDataset",
|
||||||
|
"ExperimentalStatsAggregatorHandle",
|
||||||
|
"ExperimentalStatsAggregatorSummary",
|
||||||
|
"ExperimentalThreadPoolDataset",
|
||||||
|
"ExperimentalThreadPoolHandle",
|
||||||
|
"ExperimentalUnbatchDataset",
|
||||||
|
"ExperimentalUniqueDataset",
|
||||||
|
|
||||||
|
"DebugIdentity",
|
||||||
|
"NcclAllReduce",
|
||||||
|
"NcclBroadcast",
|
||||||
|
"NcclReduce",
|
||||||
|
|
||||||
|
//Can't import these without embedding entire python runtime and dependencies
|
||||||
|
"PyFunc",
|
||||||
|
"PyFuncStateless",
|
||||||
|
|
||||||
|
//"QuantizedX" ops are deprecated / no longer supported ("standard" ops have quantized support in many cases)
|
||||||
|
"QuantizedAdd",
|
||||||
|
"QuantizedAvgPool",
|
||||||
|
"QuantizedBatchNormWithGlobalNormalization",
|
||||||
|
"QuantizedBiasAdd",
|
||||||
|
"QuantizedConcat",
|
||||||
|
"QuantizedConv2D",
|
||||||
|
"QuantizedInstanceNorm",
|
||||||
|
"QuantizedMatMul",
|
||||||
|
"QuantizedMaxPool",
|
||||||
|
"QuantizedMul",
|
||||||
|
"QuantizedRelu",
|
||||||
|
"QuantizedRelu6",
|
||||||
|
"QuantizedReluX",
|
||||||
|
"QuantizedReshape",
|
||||||
|
"QuantizedResizeBilinear",
|
||||||
|
|
||||||
|
|
||||||
//All of the following ops - not available in TF (can't find them) - op mapping is wrong?
|
//All of the following ops - not available in TF (can't find them) - op mapping is wrong?
|
||||||
//TODO: Check these and remove the import mapping from the Java classes if they are indeed bad
|
//TODO: Check these and remove the import mapping from the Java classes if they are indeed bad
|
||||||
"HardTanh",
|
"HardTanh",
|
||||||
|
@ -993,12 +1160,37 @@ public class OpValidation {
|
||||||
"absargmin",
|
"absargmin",
|
||||||
"entropy_shannon", //This is a thing, but quite different from our op: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/bayesflow/entropy/entropy_shannon
|
"entropy_shannon", //This is a thing, but quite different from our op: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/bayesflow/entropy/entropy_shannon
|
||||||
"count_zero"
|
"count_zero"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
return new HashSet<>(list);
|
return new HashSet<>(list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* These ops are ones we will never map at Java level for one reason or another
|
||||||
|
*/
|
||||||
|
private static Set<String> excludeFromLibnd4jCustomOpMapping(){
|
||||||
|
Set<String> out = new HashSet<>();
|
||||||
|
Collections.addAll(out,
|
||||||
|
//Test and misc ops:
|
||||||
|
"TestOp2i2o", "testop2i2o",
|
||||||
|
"firas_sparse",
|
||||||
|
"test_output_reshape",
|
||||||
|
"test_scalar",
|
||||||
|
"testcustom",
|
||||||
|
"testreduction",
|
||||||
|
|
||||||
|
//"to_x" ops - we'll use cast instead in SameDiff (which supports all dtypes)
|
||||||
|
"to_double",
|
||||||
|
"to_float16",
|
||||||
|
"to_float32",
|
||||||
|
"to_int32",
|
||||||
|
"to_int64",
|
||||||
|
"to_uint32",
|
||||||
|
"to_uint64"
|
||||||
|
);
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -702,7 +702,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
|
||||||
builder.append("\nCost array: ").append(Arrays.toString(costArray.dup().data().asFloat()));
|
builder.append("\nCost array: ").append(Arrays.toString(costArray.dup().data().asFloat()));
|
||||||
}
|
}
|
||||||
//Note that we could report micro-averaged too - but these are the same as accuracy
|
//Note that we could report micro-averaged too - but these are the same as accuracy
|
||||||
//"Note that for “micro<EFBFBD>?-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
|
//"Note that for “micro-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
|
||||||
//http://scikit-learn.org/stable/modules/model_evaluation.html
|
//http://scikit-learn.org/stable/modules/model_evaluation.html
|
||||||
|
|
||||||
builder.append("\n\n");
|
builder.append("\n\n");
|
||||||
|
@ -884,7 +884,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When calculating the (macro) average precision, how many classes are excluded from the average due to
|
* When calculating the (macro) average precision, how many classes are excluded from the average due to
|
||||||
* no predictions – i.e., precision would be the edge case of 0/0
|
* no predictions - i.e., precision would be the edge case of 0/0
|
||||||
*
|
*
|
||||||
* @return Number of classes excluded from the average precision
|
* @return Number of classes excluded from the average precision
|
||||||
*/
|
*/
|
||||||
|
@ -894,7 +894,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When calculating the (macro) average Recall, how many classes are excluded from the average due to
|
* When calculating the (macro) average Recall, how many classes are excluded from the average due to
|
||||||
* no predictions – i.e., recall would be the edge case of 0/0
|
* no predictions - i.e., recall would be the edge case of 0/0
|
||||||
*
|
*
|
||||||
* @return Number of classes excluded from the average recall
|
* @return Number of classes excluded from the average recall
|
||||||
*/
|
*/
|
||||||
|
@ -904,7 +904,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When calculating the (macro) average F1, how many classes are excluded from the average due to
|
* When calculating the (macro) average F1, how many classes are excluded from the average due to
|
||||||
* no predictions – i.e., F1 would be calculated from a precision or recall of 0/0
|
* no predictions - i.e., F1 would be calculated from a precision or recall of 0/0
|
||||||
*
|
*
|
||||||
* @return Number of classes excluded from the average F1
|
* @return Number of classes excluded from the average F1
|
||||||
*/
|
*/
|
||||||
|
@ -914,7 +914,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When calculating the (macro) average FBeta, how many classes are excluded from the average due to
|
* When calculating the (macro) average FBeta, how many classes are excluded from the average due to
|
||||||
* no predictions – i.e., FBeta would be calculated from a precision or recall of 0/0
|
* no predictions - i.e., FBeta would be calculated from a precision or recall of 0/0
|
||||||
*
|
*
|
||||||
* @return Number of classes excluded from the average FBeta
|
* @return Number of classes excluded from the average FBeta
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -83,7 +83,7 @@ public class ScatterUpdate implements CustomOp {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String opName() {
|
public String opName() {
|
||||||
return op.opName();
|
return "scatter_update";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -28,7 +28,7 @@ public class SRUCellConfiguration {
|
||||||
NDArray<T>* xt = INPUT_VARIABLE(0); // input [batchSize x inSize], batchSize - batch size, inSize - number of features
|
NDArray<T>* xt = INPUT_VARIABLE(0); // input [batchSize x inSize], batchSize - batch size, inSize - number of features
|
||||||
NDArray<T>* ct_1 = INPUT_VARIABLE(1); // previous cell state ct [batchSize x inSize], that is at previous time step t-1
|
NDArray<T>* ct_1 = INPUT_VARIABLE(1); // previous cell state ct [batchSize x inSize], that is at previous time step t-1
|
||||||
NDArray<T>* w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize]
|
NDArray<T>* w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize]
|
||||||
NDArray<T>* b = INPUT_VARIABLE(3); // biases [1 × 2*inSize]
|
NDArray<T>* b = INPUT_VARIABLE(3); // biases [1 x 2*inSize]
|
||||||
|
|
||||||
NDArray<T>* ht = OUTPUT_VARIABLE(0); // current cell output [batchSize x inSize], that is at current time step t
|
NDArray<T>* ht = OUTPUT_VARIABLE(0); // current cell output [batchSize x inSize], that is at current time step t
|
||||||
NDArray<T>* ct = OUTPUT_VARIABLE(1); // current cell state [batchSize x inSize], that is at current time step t
|
NDArray<T>* ct = OUTPUT_VARIABLE(1); // current cell state [batchSize x inSize], that is at current time step t
|
||||||
|
|
|
@ -26,7 +26,7 @@ public class SRUConfiguration {
|
||||||
/**
|
/**
|
||||||
* NDArray<T>* input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
* NDArray<T>* input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||||
NDArray<T>* weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
|
NDArray<T>* weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
|
||||||
NDArray<T>* bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K]
|
NDArray<T>* bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
|
||||||
NDArray<T>* init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
|
NDArray<T>* init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -103,7 +103,7 @@ public class AdaDeltaUpdater implements GradientUpdater<AdaDelta> {
|
||||||
double epsilon = config.getEpsilon();
|
double epsilon = config.getEpsilon();
|
||||||
|
|
||||||
//Line 4 of Algorithm 1: https://arxiv.org/pdf/1212.5701v1.pdf
|
//Line 4 of Algorithm 1: https://arxiv.org/pdf/1212.5701v1.pdf
|
||||||
//E[g^2]_t = rho * E[g^2]_{t−1} + (1-rho)*g^2_t
|
//E[g^2]_t = rho * E[g^2]_{t-1} + (1-rho)*g^2_t
|
||||||
msg.muli(rho).addi(gradient.mul(gradient).muli(1 - rho));
|
msg.muli(rho).addi(gradient.mul(gradient).muli(1 - rho));
|
||||||
|
|
||||||
//Calculate update:
|
//Calculate update:
|
||||||
|
|
Loading…
Reference in New Issue