Op validation improvements + encoding fix (#49)
* Op validation updates Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small logging fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7986 Fix minor character encoding issues Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small ignore fix Signed-off-by: AlexDBlack <blacka101@gmail.com>master
parent
88ea9a49eb
commit
cc65c01118
|
@ -145,7 +145,7 @@ DECLARE_SHAPE_FN(sru) {
|
|||
CUSTOM_OP_IMPL(sru_bp, 8, 4, true, 0, 0) {
|
||||
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
|
||||
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K]
|
||||
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
|
||||
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
|
||||
auto c = INPUT_VARIABLE(4); // C, [bS x K x N]
|
||||
auto inGradCt = INPUT_VARIABLE(5); // [bS x K]
|
||||
|
@ -331,7 +331,7 @@ CUSTOM_OP_IMPL(sru_bi, 5, 2, true, 0, 0) {
|
|||
|
||||
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
||||
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize]
|
||||
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 4*inSize]
|
||||
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 4*inSize]
|
||||
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
|
||||
NDArray* mask = block.width() > 4 ? INPUT_VARIABLE(4) : nullptr; // optional, 2d tensor of dropout mask [bS x 2*inSize]
|
||||
|
||||
|
@ -431,7 +431,7 @@ CUSTOM_OP_IMPL(sru_bi_bp, 8, 4, true, 0, 0) {
|
|||
|
||||
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
||||
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize]
|
||||
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 4*inSize]
|
||||
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 4*inSize]
|
||||
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
|
||||
auto ct = INPUT_VARIABLE(4); // C, [time x bS x 2*inSize]
|
||||
auto inGradC0 = INPUT_VARIABLE(5); // [bS x 2*inSize]
|
||||
|
@ -553,7 +553,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [3K x K]
|
||||
* 2: row of biases with twice length [1 × 2K]
|
||||
* 2: row of biases with twice length [1 x 2K]
|
||||
* 3: 2d tensor of previous cell state [bS x K]
|
||||
* 4: optional, 2d tensor of dropout mask [bS x K]
|
||||
*
|
||||
|
@ -572,7 +572,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [3K x K]
|
||||
* 2: row of biases with twice length [1 × 2K]
|
||||
* 2: row of biases with twice length [1 x 2K]
|
||||
* 3: 2d tensor of previous cell state [bS x K]
|
||||
* 4: optional, 2d tensor of dropout mask [bS x K]
|
||||
*
|
||||
|
@ -592,7 +592,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [3K x K]
|
||||
* 2: row of biases with twice length [1 × 2K]
|
||||
* 2: row of biases with twice length [1 x 2K]
|
||||
* 3: 2d tensor of previous cell state [bS x K]
|
||||
* 4: 3d tensor of cell state [bS x K x N]
|
||||
* 5: 2d tensor of cell state gradients [bS x K]
|
||||
|
@ -622,7 +622,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
|||
|
||||
// auto input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||
// auto weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
|
||||
// auto bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K]
|
||||
// auto bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
|
||||
// auto init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
|
||||
// NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x K]
|
||||
|
||||
|
@ -710,7 +710,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
|||
// CUSTOM_OP_IMPL(sru_old, 5, 2, false, 0, 0) {
|
||||
// auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
|
||||
// auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x inSize]
|
||||
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*inSize]
|
||||
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*inSize]
|
||||
// auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
|
||||
// NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x inSize]
|
||||
|
||||
|
@ -820,7 +820,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
|
|||
|
||||
// auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
|
||||
// auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3*inSize x inSize]
|
||||
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*inSize]
|
||||
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*inSize]
|
||||
// auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
|
||||
// auto c = INPUT_VARIABLE(4); // C, [bS x inSize x time]
|
||||
// auto inGradCt = INPUT_VARIABLE(5); // [bS x inSize]
|
||||
|
|
|
@ -34,7 +34,7 @@ CUSTOM_OP_IMPL(sruCell, 4, 2, false, 0, 0) {
|
|||
auto xt = INPUT_VARIABLE(0); // input [bS x inSize], bS - batch size, inSize - number of features
|
||||
auto ct_1 = INPUT_VARIABLE(1); // previous cell state ct [bS x inSize], that is at previous time step t-1
|
||||
auto w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize]
|
||||
auto b = INPUT_VARIABLE(3); // biases [1 × 2*inSize]
|
||||
auto b = INPUT_VARIABLE(3); // biases [1 x 2*inSize]
|
||||
|
||||
auto ht = OUTPUT_VARIABLE(0); // current cell output [bS x inSize], that is at current time step t
|
||||
auto ct = OUTPUT_VARIABLE(1); // current cell state [bS x inSize], that is at current time step t
|
||||
|
|
|
@ -33,7 +33,7 @@ namespace ops {
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [3K x K]
|
||||
* 2: row of biases with twice length [1 × 2K]
|
||||
* 2: row of biases with twice length [1 x 2K]
|
||||
* 3: 2d tensor of previous cell state [bS x K]
|
||||
* 4: optional, 2d tensor of dropout mask [bS x K]
|
||||
*
|
||||
|
@ -52,7 +52,7 @@ namespace ops {
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [2K x 6K]
|
||||
* 2: row of biases with twice length [1 × 4K]
|
||||
* 2: row of biases with twice length [1 x 4K]
|
||||
* 3: 2d tensor of previous cell state [bS x 2K]
|
||||
* 4: optional, 2d tensor of dropout mask [bS x 2K]
|
||||
*
|
||||
|
@ -72,7 +72,7 @@ namespace ops {
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [3K x K]
|
||||
* 2: row of biases with twice length [1 × 2K]
|
||||
* 2: row of biases with twice length [1 x 2K]
|
||||
* 3: 2d tensor of previous cell state [bS x K]
|
||||
* 4: 3d tensor of cell state [bS x K x N]
|
||||
* 5: 2d tensor of cell state gradients [bS x K]
|
||||
|
@ -96,7 +96,7 @@ namespace ops {
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [2K x 6K]
|
||||
* 2: row of biases with twice length [1 × 4K]
|
||||
* 2: row of biases with twice length [1 x 4K]
|
||||
* 3: 2d tensor of previous cell state [bS x 2K]
|
||||
* 4: 3d tensor of cell state [N x bS x 2K]
|
||||
* 5: 2d tensor of cell state gradients [bS x 2K]
|
||||
|
@ -239,7 +239,7 @@ namespace ops {
|
|||
* 0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features
|
||||
* 1: previous cell state [batchSize x inSize], that is at previous time step t-1
|
||||
* 2: weights [inSize x 3*inSize]
|
||||
* 3: biases [1 × 2*inSize]
|
||||
* 3: biases [1 x 2*inSize]
|
||||
*
|
||||
* Output arrays:
|
||||
* 0: current cell output [batchSize x inSize], that is at current time step t
|
||||
|
|
|
@ -110,7 +110,7 @@ static void sruBI_(NDArray* x, const NDArray* w, const NDArray* b, const NDArray
|
|||
|
||||
// x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
||||
// w 2d tensor of weights [2*inSize x 6*inSize]
|
||||
// b row of biases with twice length [1 × 4*inSize]
|
||||
// b row of biases with twice length [1 x 4*inSize]
|
||||
// c0 2d tensor of initial state [bS x 2*inSize] at time t=0
|
||||
// mask optional, 2d tensor of dropout mask [bS x 2*inSize]
|
||||
|
||||
|
@ -193,7 +193,7 @@ static void sruBIBP_(NDArray* x, const NDArray* w, const NDArray* b, const NDArr
|
|||
|
||||
// x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
|
||||
// w 2d tensor of weights [2*inSize x 6*inSize]
|
||||
// b row of biases with twice length [1 × 4*inSize]
|
||||
// b row of biases with twice length [1 x 4*inSize]
|
||||
// c0 2d tensor of initial state [bS x 2*inSize] at time t=0
|
||||
// ct [time x bS x 2*inSize]
|
||||
// inGradC0 [bS x 2*inSize]
|
||||
|
|
|
@ -35,9 +35,24 @@ import org.nd4j.linalg.api.ndarray.INDArray;
|
|||
import org.nd4j.linalg.api.ops.CustomOpDescriptor;
|
||||
import org.nd4j.linalg.api.ops.DefaultOpConverter;
|
||||
import org.nd4j.linalg.api.ops.DynamicCustomOp;
|
||||
import org.nd4j.linalg.api.ops.custom.BarnesEdgeForces;
|
||||
import org.nd4j.linalg.api.ops.custom.BarnesHutGains;
|
||||
import org.nd4j.linalg.api.ops.custom.BarnesHutSymmetrize;
|
||||
import org.nd4j.linalg.api.ops.custom.SpTreeCell;
|
||||
import org.nd4j.linalg.api.ops.impl.broadcast.bool.*;
|
||||
import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
|
||||
import org.nd4j.linalg.api.ops.impl.loss.bp.*;
|
||||
import org.nd4j.linalg.api.ops.impl.meta.InvertedPredicateMetaOp;
|
||||
import org.nd4j.linalg.api.ops.impl.meta.PostulateMetaOp;
|
||||
import org.nd4j.linalg.api.ops.impl.meta.PredicateMetaOp;
|
||||
import org.nd4j.linalg.api.ops.impl.meta.ReduceMetaOp;
|
||||
import org.nd4j.linalg.api.ops.impl.nlp.CbowRound;
|
||||
import org.nd4j.linalg.api.ops.impl.nlp.SkipGramRound;
|
||||
import org.nd4j.linalg.api.ops.impl.reduce.MmulBp;
|
||||
import org.nd4j.linalg.api.ops.impl.reduce.bool.All;
|
||||
import org.nd4j.linalg.api.ops.impl.reduce.bool.Any;
|
||||
import org.nd4j.linalg.api.ops.impl.reduce.bool.IsInf;
|
||||
import org.nd4j.linalg.api.ops.impl.reduce.bool.IsNaN;
|
||||
import org.nd4j.linalg.api.ops.impl.reduce.longer.MatchCondition;
|
||||
import org.nd4j.linalg.api.ops.impl.reduce3.EqualsWithEps;
|
||||
import org.nd4j.linalg.api.ops.impl.reduce.NormalizeMoments;
|
||||
|
@ -49,21 +64,26 @@ import org.nd4j.linalg.api.ops.impl.layers.convolution.*;
|
|||
import org.nd4j.linalg.api.ops.impl.scalar.PowDerivative;
|
||||
import org.nd4j.linalg.api.ops.impl.scalar.ScalarRemainder;
|
||||
import org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue;
|
||||
import org.nd4j.linalg.api.ops.impl.shape.ConfusionMatrix;
|
||||
import org.nd4j.linalg.api.ops.impl.shape.Eye;
|
||||
import org.nd4j.linalg.api.ops.impl.shape.MergeSum;
|
||||
import org.nd4j.linalg.api.ops.impl.shape.OneHot;
|
||||
import org.nd4j.linalg.api.ops.impl.shape.*;
|
||||
import org.nd4j.linalg.api.ops.impl.shape.bp.ConcatBp;
|
||||
import org.nd4j.linalg.api.ops.impl.shape.bp.SliceBp;
|
||||
import org.nd4j.linalg.api.ops.impl.shape.bp.StridedSliceBp;
|
||||
import org.nd4j.linalg.api.ops.impl.shape.bp.TileBp;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.custom.InvertPermutation;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.Assert;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.bool.BooleanNot;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.bool.MatchConditionTransform;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.custom.*;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.floating.Histogram;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.BinaryMinimalRelativeError;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.bp.*;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.gradient.*;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.gradient.SigmoidDerivative;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.gradient.TanhDerivative;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.bool.Not;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.segment.UnsortedSegmentMax;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.segment.bp.*;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.strict.GELUDerivative;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.strict.PreciseGELUDerivative;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.strict.SwishDerivative;
|
||||
import org.nd4j.linalg.api.ops.impl.transforms.strict.TanDerivative;
|
||||
import org.nd4j.linalg.api.ops.persistence.RestoreV2;
|
||||
|
@ -71,6 +91,7 @@ import org.nd4j.linalg.api.ops.persistence.SaveV2;
|
|||
import org.nd4j.linalg.api.ops.random.compat.RandomStandardNormal;
|
||||
import org.nd4j.linalg.api.ops.random.custom.DistributionUniform;
|
||||
import org.nd4j.linalg.api.ops.random.impl.*;
|
||||
import org.nd4j.linalg.api.ops.random.impl.Linspace;
|
||||
import org.nd4j.linalg.api.shape.LongShapeDescriptor;
|
||||
import org.nd4j.linalg.factory.Nd4j;
|
||||
import org.nd4j.linalg.function.Function;
|
||||
|
@ -681,10 +702,22 @@ public class OpValidation {
|
|||
}
|
||||
}
|
||||
|
||||
int countLibnd4jIgnored = 0;
|
||||
if(logUnmappedLibnd4jOps ){
|
||||
Set<String> ignoreLibnd4j = excludeFromLibnd4jCustomOpMapping();
|
||||
log.info(" --- Libnd4j Ops Not Mapped ---");
|
||||
for(long l : nonMappedLibnd4jOps){
|
||||
Pair<List<String>,CustomOpDescriptor> p = dedupedCustomOps.get(l);
|
||||
boolean foundIgnore = false;
|
||||
for(String s : p.getFirst()){
|
||||
if(ignoreLibnd4j.contains(s)){
|
||||
foundIgnore = true;
|
||||
countLibnd4jIgnored++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(foundIgnore)
|
||||
continue;
|
||||
log.info("Not mapped libnd4j custom op: {} (hash: {})", p.getFirst(), l);
|
||||
}
|
||||
}
|
||||
|
@ -712,6 +745,7 @@ public class OpValidation {
|
|||
}
|
||||
|
||||
if(logUnmappedTFOps){
|
||||
log.info(" --- TF Ops Not Mapped for Import ---");
|
||||
Map<String,OpDef> allTFOps;
|
||||
try{
|
||||
allTFOps = TensorflowDescriptorParser.opDescs();
|
||||
|
@ -760,7 +794,7 @@ public class OpValidation {
|
|||
String fracTfStr = String.format("%.2f", 100.0 * tfFrac);
|
||||
|
||||
int countLibnd4jMapped = countTotalLibnd4jOps - nonMappedLibnd4jOps.size();
|
||||
String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)countTotalLibnd4jOps));
|
||||
String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)(countTotalLibnd4jOps - countLibnd4jIgnored)));
|
||||
|
||||
String fracTFMappedTested = String.format("%.2f", 100.0 * tfOpsWithImportTests / (double)(totalTFMappedOps-tfImportIgnored));
|
||||
|
||||
|
@ -772,7 +806,7 @@ public class OpValidation {
|
|||
log.info("({} ops excluded from fwd+gradient tests)", excludedFromAllTestCoverage.size());
|
||||
log.info("TF mapped ops: {} of {} ({}%)", countTfMapped, countTf, fracTfStr);
|
||||
log.info("SD ops with TF import mapping + test {} of {} ({}%) - {} ignored for coverage", tfOpsWithImportTests, (totalTFMappedOps-tfImportIgnored), fracTFMappedTested, tfImportIgnored);
|
||||
log.info("Libnd4j mapped ops: {} of {} ({}%)", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j);
|
||||
log.info("Libnd4j mapped ops: {} of {} ({}%) - {} excluded for coverage", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j, countLibnd4jIgnored);
|
||||
log.info("*****************************************************");
|
||||
}
|
||||
|
||||
|
@ -832,9 +866,12 @@ public class OpValidation {
|
|||
CumProdBp.class,
|
||||
DotBp.class,
|
||||
SquaredNormBp.class,
|
||||
SoftmaxBp.class,
|
||||
|
||||
CubeDerivative.class,
|
||||
ELUDerivative.class,
|
||||
GELUDerivative.class,
|
||||
PreciseGELUDerivative.class,
|
||||
HardSigmoidDerivative.class,
|
||||
HardTanhDerivative.class,
|
||||
LeakyReLUDerivative.class,
|
||||
|
@ -872,13 +909,54 @@ public class OpValidation {
|
|||
|
||||
SliceBp.class,
|
||||
StridedSliceBp.class,
|
||||
MmulBp.class,
|
||||
DotProductAttentionBp.class,
|
||||
MultiHeadDotProductAttentionBp.class,
|
||||
LayerNormBp.class,
|
||||
StandardizeBp.class,
|
||||
DynamicPartitionBp.class,
|
||||
|
||||
//We can't use these dropout ops in SameDiff: https://github.com/deeplearning4j/deeplearning4j/issues/5650
|
||||
DropOut.class,
|
||||
DropOutInverted.class,
|
||||
AlphaDropOut.class,
|
||||
Choice.class,
|
||||
ProbablisticMerge.class
|
||||
AbsoluteDifferenceLossBp.class,
|
||||
CosineDistanceLossBp.class,
|
||||
HingeLossBp.class,
|
||||
HuberLossBp.class,
|
||||
LogLossBp.class,
|
||||
LogPoissonLossBp.class,
|
||||
MeanPairwiseSquaredErrorLossBp.class,
|
||||
MeanSquaredErrorLossBp.class,
|
||||
SigmoidCrossEntropyLossBp.class,
|
||||
SoftmaxCrossEntropyLossBp.class,
|
||||
SparseSoftmaxCrossEntropyLossWithLogitsBp.class,
|
||||
|
||||
SegmentMaxBp.class,
|
||||
SegmentMeanBp.class,
|
||||
SegmentMinBp.class,
|
||||
SegmentProdBp.class,
|
||||
SegmentSumBp.class,
|
||||
UnsortedSegmentMaxBp.class,
|
||||
UnsortedSegmentMeanBp.class,
|
||||
UnsortedSegmentMinBp.class,
|
||||
UnsortedSegmentProdBp.class,
|
||||
UnsortedSegmentSqrtNBp.class,
|
||||
UnsortedSegmentSumBp.class,
|
||||
|
||||
//Not intended for general users; only used in DL4J SameDiff integration + tested adequately there
|
||||
ExternalErrorsFunction.class,
|
||||
|
||||
//Meta-Ops: not available in SameDiff
|
||||
InvertedPredicateMetaOp.class,
|
||||
PostulateMetaOp.class,
|
||||
PredicateMetaOp.class,
|
||||
ReduceMetaOp.class,
|
||||
|
||||
|
||||
//Ops not intended to be used in SameDiff:
|
||||
BarnesEdgeForces.class,
|
||||
BarnesHutGains.class,
|
||||
BarnesHutSymmetrize.class,
|
||||
SpTreeCell.class,
|
||||
CbowRound.class,
|
||||
SkipGramRound.class
|
||||
);
|
||||
|
||||
return new HashSet<>(list);
|
||||
|
@ -907,9 +985,21 @@ public class OpValidation {
|
|||
InvertPermutation.class, //Uses integer indices
|
||||
ConfusionMatrix.class, //Integer indices
|
||||
Linspace.class, //No input array
|
||||
//Exclude boolean operations:
|
||||
Assert.class,
|
||||
//Exclude boolean operations, boolean reductions, etc:
|
||||
Any.class,
|
||||
All.class,
|
||||
IsInf.class,
|
||||
org.nd4j.linalg.api.ops.impl.transforms.bool.IsInf.class,
|
||||
IsNaN.class,
|
||||
org.nd4j.linalg.api.ops.impl.transforms.bool.IsNaN.class,
|
||||
BooleanNot.class,
|
||||
Not.class,
|
||||
MatchConditionTransform.class,
|
||||
InTopK.class,
|
||||
IsNonDecreasing.class,
|
||||
IsStrictlyIncreasing.class,
|
||||
IsNumericTensor.class,
|
||||
//Exclude index accumulations (index out, not real-valued)
|
||||
FirstIndex.class,
|
||||
IAMax.class,
|
||||
|
@ -917,6 +1007,12 @@ public class OpValidation {
|
|||
IMax.class,
|
||||
IMin.class,
|
||||
LastIndex.class,
|
||||
|
||||
//Exclude ops that output integer types only:
|
||||
Shape.class,
|
||||
ShapeN.class,
|
||||
SizeAt.class,
|
||||
|
||||
//Exclude Random ops
|
||||
RandomStandardNormal.class,
|
||||
DistributionUniform.class,
|
||||
|
@ -949,7 +1045,12 @@ public class OpValidation {
|
|||
ProdBp.class,
|
||||
StandardDeviationBp.class,
|
||||
SumBp.class,
|
||||
VarianceBp.class
|
||||
VarianceBp.class,
|
||||
|
||||
LogicalAnd.class,
|
||||
LogicalNot.class,
|
||||
LogicalOr.class,
|
||||
LogicalXor.class
|
||||
);
|
||||
|
||||
return new HashSet<>(list);
|
||||
|
@ -981,6 +1082,72 @@ public class OpValidation {
|
|||
"BatchSelfAdjointEigV2", //Deprecated in favor of "SelfAdjointEigV2"
|
||||
"BatchSvd", //Deprecated in favor of "Svd"
|
||||
|
||||
//These we will likely neven support importing
|
||||
"ExperimentalBytesProducedStatsDataset",
|
||||
"ExperimentalCSVDataset",
|
||||
"ExperimentalDatasetCardinality",
|
||||
"ExperimentalDatasetToTFRecord",
|
||||
"ExperimentalDenseToSparseBatchDataset",
|
||||
"ExperimentalDirectedInterleaveDataset",
|
||||
"ExperimentalGroupByReducerDataset",
|
||||
"ExperimentalGroupByWindowDataset",
|
||||
"ExperimentalIdentityIndexedDataset",
|
||||
"ExperimentalIgnoreErrorsDataset",
|
||||
"ExperimentalIndexedDatasetGet",
|
||||
"ExperimentalIndexedDatasetMaterialize",
|
||||
"ExperimentalIteratorGetDevice",
|
||||
"ExperimentalLMDBDataset",
|
||||
"ExperimentalLatencyStatsDataset",
|
||||
"ExperimentalMapAndBatchDataset",
|
||||
"ExperimentalMapDataset",
|
||||
"ExperimentalMatchingFilesDataset",
|
||||
"ExperimentalMaterializedIndexDatasetHandle",
|
||||
"ExperimentalMaxIntraOpParallelismDataset",
|
||||
"ExperimentalNonSerializableDataset",
|
||||
"ExperimentalNumaMapAndBatchDataset",
|
||||
"ExperimentalParallelInterleaveDataset",
|
||||
"ExperimentalParseExampleDataset",
|
||||
"ExperimentalPrivateThreadPoolDataset",
|
||||
"ExperimentalRandomDataset",
|
||||
"ExperimentalScanDataset",
|
||||
"ExperimentalSetStatsAggregatorDataset",
|
||||
"ExperimentalSleepDataset",
|
||||
"ExperimentalSlidingWindowDataset",
|
||||
"ExperimentalSqlDataset",
|
||||
"ExperimentalStatsAggregatorHandle",
|
||||
"ExperimentalStatsAggregatorSummary",
|
||||
"ExperimentalThreadPoolDataset",
|
||||
"ExperimentalThreadPoolHandle",
|
||||
"ExperimentalUnbatchDataset",
|
||||
"ExperimentalUniqueDataset",
|
||||
|
||||
"DebugIdentity",
|
||||
"NcclAllReduce",
|
||||
"NcclBroadcast",
|
||||
"NcclReduce",
|
||||
|
||||
//Can't import these without embedding entire python runtime and dependencies
|
||||
"PyFunc",
|
||||
"PyFuncStateless",
|
||||
|
||||
//"QuantizedX" ops are deprecated / no longer supported ("standard" ops have quantized support in many cases)
|
||||
"QuantizedAdd",
|
||||
"QuantizedAvgPool",
|
||||
"QuantizedBatchNormWithGlobalNormalization",
|
||||
"QuantizedBiasAdd",
|
||||
"QuantizedConcat",
|
||||
"QuantizedConv2D",
|
||||
"QuantizedInstanceNorm",
|
||||
"QuantizedMatMul",
|
||||
"QuantizedMaxPool",
|
||||
"QuantizedMul",
|
||||
"QuantizedRelu",
|
||||
"QuantizedRelu6",
|
||||
"QuantizedReluX",
|
||||
"QuantizedReshape",
|
||||
"QuantizedResizeBilinear",
|
||||
|
||||
|
||||
//All of the following ops - not available in TF (can't find them) - op mapping is wrong?
|
||||
//TODO: Check these and remove the import mapping from the Java classes if they are indeed bad
|
||||
"HardTanh",
|
||||
|
@ -993,12 +1160,37 @@ public class OpValidation {
|
|||
"absargmin",
|
||||
"entropy_shannon", //This is a thing, but quite different from our op: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/bayesflow/entropy/entropy_shannon
|
||||
"count_zero"
|
||||
|
||||
|
||||
|
||||
);
|
||||
|
||||
return new HashSet<>(list);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* These ops are ones we will never map at Java level for one reason or another
|
||||
*/
|
||||
private static Set<String> excludeFromLibnd4jCustomOpMapping(){
|
||||
Set<String> out = new HashSet<>();
|
||||
Collections.addAll(out,
|
||||
//Test and misc ops:
|
||||
"TestOp2i2o", "testop2i2o",
|
||||
"firas_sparse",
|
||||
"test_output_reshape",
|
||||
"test_scalar",
|
||||
"testcustom",
|
||||
"testreduction",
|
||||
|
||||
//"to_x" ops - we'll use cast instead in SameDiff (which supports all dtypes)
|
||||
"to_double",
|
||||
"to_float16",
|
||||
"to_float32",
|
||||
"to_int32",
|
||||
"to_int64",
|
||||
"to_uint32",
|
||||
"to_uint64"
|
||||
);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -702,7 +702,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
|
|||
builder.append("\nCost array: ").append(Arrays.toString(costArray.dup().data().asFloat()));
|
||||
}
|
||||
//Note that we could report micro-averaged too - but these are the same as accuracy
|
||||
//"Note that for “micro<EFBFBD>?-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
|
||||
//"Note that for “micro-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
|
||||
//http://scikit-learn.org/stable/modules/model_evaluation.html
|
||||
|
||||
builder.append("\n\n");
|
||||
|
@ -884,7 +884,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
|
|||
|
||||
/**
|
||||
* When calculating the (macro) average precision, how many classes are excluded from the average due to
|
||||
* no predictions – i.e., precision would be the edge case of 0/0
|
||||
* no predictions - i.e., precision would be the edge case of 0/0
|
||||
*
|
||||
* @return Number of classes excluded from the average precision
|
||||
*/
|
||||
|
@ -894,7 +894,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
|
|||
|
||||
/**
|
||||
* When calculating the (macro) average Recall, how many classes are excluded from the average due to
|
||||
* no predictions – i.e., recall would be the edge case of 0/0
|
||||
* no predictions - i.e., recall would be the edge case of 0/0
|
||||
*
|
||||
* @return Number of classes excluded from the average recall
|
||||
*/
|
||||
|
@ -904,7 +904,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
|
|||
|
||||
/**
|
||||
* When calculating the (macro) average F1, how many classes are excluded from the average due to
|
||||
* no predictions – i.e., F1 would be calculated from a precision or recall of 0/0
|
||||
* no predictions - i.e., F1 would be calculated from a precision or recall of 0/0
|
||||
*
|
||||
* @return Number of classes excluded from the average F1
|
||||
*/
|
||||
|
@ -914,7 +914,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
|
|||
|
||||
/**
|
||||
* When calculating the (macro) average FBeta, how many classes are excluded from the average due to
|
||||
* no predictions – i.e., FBeta would be calculated from a precision or recall of 0/0
|
||||
* no predictions - i.e., FBeta would be calculated from a precision or recall of 0/0
|
||||
*
|
||||
* @return Number of classes excluded from the average FBeta
|
||||
*/
|
||||
|
|
|
@ -83,7 +83,7 @@ public class ScatterUpdate implements CustomOp {
|
|||
*/
|
||||
@Override
|
||||
public String opName() {
|
||||
return op.opName();
|
||||
return "scatter_update";
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -28,7 +28,7 @@ public class SRUCellConfiguration {
|
|||
NDArray<T>* xt = INPUT_VARIABLE(0); // input [batchSize x inSize], batchSize - batch size, inSize - number of features
|
||||
NDArray<T>* ct_1 = INPUT_VARIABLE(1); // previous cell state ct [batchSize x inSize], that is at previous time step t-1
|
||||
NDArray<T>* w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize]
|
||||
NDArray<T>* b = INPUT_VARIABLE(3); // biases [1 × 2*inSize]
|
||||
NDArray<T>* b = INPUT_VARIABLE(3); // biases [1 x 2*inSize]
|
||||
|
||||
NDArray<T>* ht = OUTPUT_VARIABLE(0); // current cell output [batchSize x inSize], that is at current time step t
|
||||
NDArray<T>* ct = OUTPUT_VARIABLE(1); // current cell state [batchSize x inSize], that is at current time step t
|
||||
|
|
|
@ -26,7 +26,7 @@ public class SRUConfiguration {
|
|||
/**
|
||||
* NDArray<T>* input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||
NDArray<T>* weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
|
||||
NDArray<T>* bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K]
|
||||
NDArray<T>* bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
|
||||
NDArray<T>* init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
|
||||
|
||||
*/
|
||||
|
|
|
@ -103,7 +103,7 @@ public class AdaDeltaUpdater implements GradientUpdater<AdaDelta> {
|
|||
double epsilon = config.getEpsilon();
|
||||
|
||||
//Line 4 of Algorithm 1: https://arxiv.org/pdf/1212.5701v1.pdf
|
||||
//E[g^2]_t = rho * E[g^2]_{t−1} + (1-rho)*g^2_t
|
||||
//E[g^2]_t = rho * E[g^2]_{t-1} + (1-rho)*g^2_t
|
||||
msg.muli(rho).addi(gradient.mul(gradient).muli(1 - rho));
|
||||
|
||||
//Calculate update:
|
||||
|
|
Loading…
Reference in New Issue