Op validation improvements + encoding fix (#49)

* Op validation updates

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Small logging fix

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* #7986 Fix minor character encoding issues

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Small ignore fix

Signed-off-by: AlexDBlack <blacka101@gmail.com>
master
Alex Black 2019-07-08 17:58:48 +10:00 committed by AlexDBlack
parent 88ea9a49eb
commit cc65c01118
10 changed files with 236 additions and 44 deletions

View File

@ -145,7 +145,7 @@ DECLARE_SHAPE_FN(sru) {
CUSTOM_OP_IMPL(sru_bp, 8, 4, true, 0, 0) {
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K]
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
auto c = INPUT_VARIABLE(4); // C, [bS x K x N]
auto inGradCt = INPUT_VARIABLE(5); // [bS x K]
@ -331,7 +331,7 @@ CUSTOM_OP_IMPL(sru_bi, 5, 2, true, 0, 0) {
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize]
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 4*inSize]
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 4*inSize]
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
NDArray* mask = block.width() > 4 ? INPUT_VARIABLE(4) : nullptr; // optional, 2d tensor of dropout mask [bS x 2*inSize]
@ -431,7 +431,7 @@ CUSTOM_OP_IMPL(sru_bi_bp, 8, 4, true, 0, 0) {
auto x = INPUT_VARIABLE(0); // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [2*inSize x 6*inSize]
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 4*inSize]
auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 4*inSize]
auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
auto ct = INPUT_VARIABLE(4); // C, [time x bS x 2*inSize]
auto inGradC0 = INPUT_VARIABLE(5); // [bS x 2*inSize]
@ -553,7 +553,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
* Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K]
* 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K]
* 4: optional, 2d tensor of dropout mask [bS x K]
*
@ -572,7 +572,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
* Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K]
* 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K]
* 4: optional, 2d tensor of dropout mask [bS x K]
*
@ -592,7 +592,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
* Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K]
* 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K]
* 4: 3d tensor of cell state [bS x K x N]
* 5: 2d tensor of cell state gradients [bS x K]
@ -622,7 +622,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
// auto input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
// auto weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
// auto bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K]
// auto bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
// auto init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
// NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x K]
@ -710,7 +710,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
// CUSTOM_OP_IMPL(sru_old, 5, 2, false, 0, 0) {
// auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
// auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x inSize]
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*inSize]
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*inSize]
// auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
// NDArray* mask = nullptr; // optional, 2d tensor of dropout mask [bS x inSize]
@ -820,7 +820,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
// auto x = INPUT_VARIABLE(0); // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
// auto w = INPUT_VARIABLE(1); // W, 2d tensor of weights [3*inSize x inSize]
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*inSize]
// auto b = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*inSize]
// auto c0 = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
// auto c = INPUT_VARIABLE(4); // C, [bS x inSize x time]
// auto inGradCt = INPUT_VARIABLE(5); // [bS x inSize]

View File

@ -34,7 +34,7 @@ CUSTOM_OP_IMPL(sruCell, 4, 2, false, 0, 0) {
auto xt = INPUT_VARIABLE(0); // input [bS x inSize], bS - batch size, inSize - number of features
auto ct_1 = INPUT_VARIABLE(1); // previous cell state ct [bS x inSize], that is at previous time step t-1
auto w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize]
auto b = INPUT_VARIABLE(3); // biases [1 × 2*inSize]
auto b = INPUT_VARIABLE(3); // biases [1 x 2*inSize]
auto ht = OUTPUT_VARIABLE(0); // current cell output [bS x inSize], that is at current time step t
auto ct = OUTPUT_VARIABLE(1); // current cell state [bS x inSize], that is at current time step t

View File

@ -33,7 +33,7 @@ namespace ops {
* Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K]
* 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K]
* 4: optional, 2d tensor of dropout mask [bS x K]
*
@ -52,7 +52,7 @@ namespace ops {
* Input arrays:
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [2K x 6K]
* 2: row of biases with twice length [1 × 4K]
* 2: row of biases with twice length [1 x 4K]
* 3: 2d tensor of previous cell state [bS x 2K]
* 4: optional, 2d tensor of dropout mask [bS x 2K]
*
@ -72,7 +72,7 @@ namespace ops {
* Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K]
* 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K]
* 4: 3d tensor of cell state [bS x K x N]
* 5: 2d tensor of cell state gradients [bS x K]
@ -96,7 +96,7 @@ namespace ops {
* Input arrays:
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [2K x 6K]
* 2: row of biases with twice length [1 × 4K]
* 2: row of biases with twice length [1 x 4K]
* 3: 2d tensor of previous cell state [bS x 2K]
* 4: 3d tensor of cell state [N x bS x 2K]
* 5: 2d tensor of cell state gradients [bS x 2K]
@ -239,7 +239,7 @@ namespace ops {
* 0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features
* 1: previous cell state [batchSize x inSize], that is at previous time step t-1
* 2: weights [inSize x 3*inSize]
* 3: biases [1 × 2*inSize]
* 3: biases [1 x 2*inSize]
*
* Output arrays:
* 0: current cell output [batchSize x inSize], that is at current time step t

View File

@ -110,7 +110,7 @@ static void sruBI_(NDArray* x, const NDArray* w, const NDArray* b, const NDArray
// x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
// w 2d tensor of weights [2*inSize x 6*inSize]
// b row of biases with twice length [1 × 4*inSize]
// b row of biases with twice length [1 x 4*inSize]
// c0 2d tensor of initial state [bS x 2*inSize] at time t=0
// mask optional, 2d tensor of dropout mask [bS x 2*inSize]
@ -193,7 +193,7 @@ static void sruBIBP_(NDArray* x, const NDArray* w, const NDArray* b, const NDArr
// x input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
// w 2d tensor of weights [2*inSize x 6*inSize]
// b row of biases with twice length [1 × 4*inSize]
// b row of biases with twice length [1 x 4*inSize]
// c0 2d tensor of initial state [bS x 2*inSize] at time t=0
// ct [time x bS x 2*inSize]
// inGradC0 [bS x 2*inSize]

View File

@ -35,9 +35,24 @@ import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.CustomOpDescriptor;
import org.nd4j.linalg.api.ops.DefaultOpConverter;
import org.nd4j.linalg.api.ops.DynamicCustomOp;
import org.nd4j.linalg.api.ops.custom.BarnesEdgeForces;
import org.nd4j.linalg.api.ops.custom.BarnesHutGains;
import org.nd4j.linalg.api.ops.custom.BarnesHutSymmetrize;
import org.nd4j.linalg.api.ops.custom.SpTreeCell;
import org.nd4j.linalg.api.ops.impl.broadcast.bool.*;
import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
import org.nd4j.linalg.api.ops.impl.loss.bp.*;
import org.nd4j.linalg.api.ops.impl.meta.InvertedPredicateMetaOp;
import org.nd4j.linalg.api.ops.impl.meta.PostulateMetaOp;
import org.nd4j.linalg.api.ops.impl.meta.PredicateMetaOp;
import org.nd4j.linalg.api.ops.impl.meta.ReduceMetaOp;
import org.nd4j.linalg.api.ops.impl.nlp.CbowRound;
import org.nd4j.linalg.api.ops.impl.nlp.SkipGramRound;
import org.nd4j.linalg.api.ops.impl.reduce.MmulBp;
import org.nd4j.linalg.api.ops.impl.reduce.bool.All;
import org.nd4j.linalg.api.ops.impl.reduce.bool.Any;
import org.nd4j.linalg.api.ops.impl.reduce.bool.IsInf;
import org.nd4j.linalg.api.ops.impl.reduce.bool.IsNaN;
import org.nd4j.linalg.api.ops.impl.reduce.longer.MatchCondition;
import org.nd4j.linalg.api.ops.impl.reduce3.EqualsWithEps;
import org.nd4j.linalg.api.ops.impl.reduce.NormalizeMoments;
@ -49,21 +64,26 @@ import org.nd4j.linalg.api.ops.impl.layers.convolution.*;
import org.nd4j.linalg.api.ops.impl.scalar.PowDerivative;
import org.nd4j.linalg.api.ops.impl.scalar.ScalarRemainder;
import org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue;
import org.nd4j.linalg.api.ops.impl.shape.ConfusionMatrix;
import org.nd4j.linalg.api.ops.impl.shape.Eye;
import org.nd4j.linalg.api.ops.impl.shape.MergeSum;
import org.nd4j.linalg.api.ops.impl.shape.OneHot;
import org.nd4j.linalg.api.ops.impl.shape.*;
import org.nd4j.linalg.api.ops.impl.shape.bp.ConcatBp;
import org.nd4j.linalg.api.ops.impl.shape.bp.SliceBp;
import org.nd4j.linalg.api.ops.impl.shape.bp.StridedSliceBp;
import org.nd4j.linalg.api.ops.impl.shape.bp.TileBp;
import org.nd4j.linalg.api.ops.impl.transforms.custom.InvertPermutation;
import org.nd4j.linalg.api.ops.impl.transforms.Assert;
import org.nd4j.linalg.api.ops.impl.transforms.bool.BooleanNot;
import org.nd4j.linalg.api.ops.impl.transforms.bool.MatchConditionTransform;
import org.nd4j.linalg.api.ops.impl.transforms.custom.*;
import org.nd4j.linalg.api.ops.impl.transforms.floating.Histogram;
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.BinaryMinimalRelativeError;
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.bp.*;
import org.nd4j.linalg.api.ops.impl.transforms.gradient.*;
import org.nd4j.linalg.api.ops.impl.transforms.gradient.SigmoidDerivative;
import org.nd4j.linalg.api.ops.impl.transforms.gradient.TanhDerivative;
import org.nd4j.linalg.api.ops.impl.transforms.pairwise.bool.Not;
import org.nd4j.linalg.api.ops.impl.transforms.segment.UnsortedSegmentMax;
import org.nd4j.linalg.api.ops.impl.transforms.segment.bp.*;
import org.nd4j.linalg.api.ops.impl.transforms.strict.GELUDerivative;
import org.nd4j.linalg.api.ops.impl.transforms.strict.PreciseGELUDerivative;
import org.nd4j.linalg.api.ops.impl.transforms.strict.SwishDerivative;
import org.nd4j.linalg.api.ops.impl.transforms.strict.TanDerivative;
import org.nd4j.linalg.api.ops.persistence.RestoreV2;
@ -71,6 +91,7 @@ import org.nd4j.linalg.api.ops.persistence.SaveV2;
import org.nd4j.linalg.api.ops.random.compat.RandomStandardNormal;
import org.nd4j.linalg.api.ops.random.custom.DistributionUniform;
import org.nd4j.linalg.api.ops.random.impl.*;
import org.nd4j.linalg.api.ops.random.impl.Linspace;
import org.nd4j.linalg.api.shape.LongShapeDescriptor;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.function.Function;
@ -681,10 +702,22 @@ public class OpValidation {
}
}
int countLibnd4jIgnored = 0;
if(logUnmappedLibnd4jOps ){
Set<String> ignoreLibnd4j = excludeFromLibnd4jCustomOpMapping();
log.info(" --- Libnd4j Ops Not Mapped ---");
for(long l : nonMappedLibnd4jOps){
Pair<List<String>,CustomOpDescriptor> p = dedupedCustomOps.get(l);
boolean foundIgnore = false;
for(String s : p.getFirst()){
if(ignoreLibnd4j.contains(s)){
foundIgnore = true;
countLibnd4jIgnored++;
break;
}
}
if(foundIgnore)
continue;
log.info("Not mapped libnd4j custom op: {} (hash: {})", p.getFirst(), l);
}
}
@ -712,6 +745,7 @@ public class OpValidation {
}
if(logUnmappedTFOps){
log.info(" --- TF Ops Not Mapped for Import ---");
Map<String,OpDef> allTFOps;
try{
allTFOps = TensorflowDescriptorParser.opDescs();
@ -760,7 +794,7 @@ public class OpValidation {
String fracTfStr = String.format("%.2f", 100.0 * tfFrac);
int countLibnd4jMapped = countTotalLibnd4jOps - nonMappedLibnd4jOps.size();
String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)countTotalLibnd4jOps));
String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)(countTotalLibnd4jOps - countLibnd4jIgnored)));
String fracTFMappedTested = String.format("%.2f", 100.0 * tfOpsWithImportTests / (double)(totalTFMappedOps-tfImportIgnored));
@ -772,7 +806,7 @@ public class OpValidation {
log.info("({} ops excluded from fwd+gradient tests)", excludedFromAllTestCoverage.size());
log.info("TF mapped ops: {} of {} ({}%)", countTfMapped, countTf, fracTfStr);
log.info("SD ops with TF import mapping + test {} of {} ({}%) - {} ignored for coverage", tfOpsWithImportTests, (totalTFMappedOps-tfImportIgnored), fracTFMappedTested, tfImportIgnored);
log.info("Libnd4j mapped ops: {} of {} ({}%)", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j);
log.info("Libnd4j mapped ops: {} of {} ({}%) - {} excluded for coverage", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j, countLibnd4jIgnored);
log.info("*****************************************************");
}
@ -832,9 +866,12 @@ public class OpValidation {
CumProdBp.class,
DotBp.class,
SquaredNormBp.class,
SoftmaxBp.class,
CubeDerivative.class,
ELUDerivative.class,
GELUDerivative.class,
PreciseGELUDerivative.class,
HardSigmoidDerivative.class,
HardTanhDerivative.class,
LeakyReLUDerivative.class,
@ -872,13 +909,54 @@ public class OpValidation {
SliceBp.class,
StridedSliceBp.class,
MmulBp.class,
DotProductAttentionBp.class,
MultiHeadDotProductAttentionBp.class,
LayerNormBp.class,
StandardizeBp.class,
DynamicPartitionBp.class,
//We can't use these dropout ops in SameDiff: https://github.com/deeplearning4j/deeplearning4j/issues/5650
DropOut.class,
DropOutInverted.class,
AlphaDropOut.class,
Choice.class,
ProbablisticMerge.class
AbsoluteDifferenceLossBp.class,
CosineDistanceLossBp.class,
HingeLossBp.class,
HuberLossBp.class,
LogLossBp.class,
LogPoissonLossBp.class,
MeanPairwiseSquaredErrorLossBp.class,
MeanSquaredErrorLossBp.class,
SigmoidCrossEntropyLossBp.class,
SoftmaxCrossEntropyLossBp.class,
SparseSoftmaxCrossEntropyLossWithLogitsBp.class,
SegmentMaxBp.class,
SegmentMeanBp.class,
SegmentMinBp.class,
SegmentProdBp.class,
SegmentSumBp.class,
UnsortedSegmentMaxBp.class,
UnsortedSegmentMeanBp.class,
UnsortedSegmentMinBp.class,
UnsortedSegmentProdBp.class,
UnsortedSegmentSqrtNBp.class,
UnsortedSegmentSumBp.class,
//Not intended for general users; only used in DL4J SameDiff integration + tested adequately there
ExternalErrorsFunction.class,
//Meta-Ops: not available in SameDiff
InvertedPredicateMetaOp.class,
PostulateMetaOp.class,
PredicateMetaOp.class,
ReduceMetaOp.class,
//Ops not intended to be used in SameDiff:
BarnesEdgeForces.class,
BarnesHutGains.class,
BarnesHutSymmetrize.class,
SpTreeCell.class,
CbowRound.class,
SkipGramRound.class
);
return new HashSet<>(list);
@ -907,9 +985,21 @@ public class OpValidation {
InvertPermutation.class, //Uses integer indices
ConfusionMatrix.class, //Integer indices
Linspace.class, //No input array
//Exclude boolean operations:
Assert.class,
//Exclude boolean operations, boolean reductions, etc:
Any.class,
All.class,
IsInf.class,
org.nd4j.linalg.api.ops.impl.transforms.bool.IsInf.class,
IsNaN.class,
org.nd4j.linalg.api.ops.impl.transforms.bool.IsNaN.class,
BooleanNot.class,
Not.class,
MatchConditionTransform.class,
InTopK.class,
IsNonDecreasing.class,
IsStrictlyIncreasing.class,
IsNumericTensor.class,
//Exclude index accumulations (index out, not real-valued)
FirstIndex.class,
IAMax.class,
@ -917,6 +1007,12 @@ public class OpValidation {
IMax.class,
IMin.class,
LastIndex.class,
//Exclude ops that output integer types only:
Shape.class,
ShapeN.class,
SizeAt.class,
//Exclude Random ops
RandomStandardNormal.class,
DistributionUniform.class,
@ -949,7 +1045,12 @@ public class OpValidation {
ProdBp.class,
StandardDeviationBp.class,
SumBp.class,
VarianceBp.class
VarianceBp.class,
LogicalAnd.class,
LogicalNot.class,
LogicalOr.class,
LogicalXor.class
);
return new HashSet<>(list);
@ -981,6 +1082,72 @@ public class OpValidation {
"BatchSelfAdjointEigV2", //Deprecated in favor of "SelfAdjointEigV2"
"BatchSvd", //Deprecated in favor of "Svd"
//These we will likely neven support importing
"ExperimentalBytesProducedStatsDataset",
"ExperimentalCSVDataset",
"ExperimentalDatasetCardinality",
"ExperimentalDatasetToTFRecord",
"ExperimentalDenseToSparseBatchDataset",
"ExperimentalDirectedInterleaveDataset",
"ExperimentalGroupByReducerDataset",
"ExperimentalGroupByWindowDataset",
"ExperimentalIdentityIndexedDataset",
"ExperimentalIgnoreErrorsDataset",
"ExperimentalIndexedDatasetGet",
"ExperimentalIndexedDatasetMaterialize",
"ExperimentalIteratorGetDevice",
"ExperimentalLMDBDataset",
"ExperimentalLatencyStatsDataset",
"ExperimentalMapAndBatchDataset",
"ExperimentalMapDataset",
"ExperimentalMatchingFilesDataset",
"ExperimentalMaterializedIndexDatasetHandle",
"ExperimentalMaxIntraOpParallelismDataset",
"ExperimentalNonSerializableDataset",
"ExperimentalNumaMapAndBatchDataset",
"ExperimentalParallelInterleaveDataset",
"ExperimentalParseExampleDataset",
"ExperimentalPrivateThreadPoolDataset",
"ExperimentalRandomDataset",
"ExperimentalScanDataset",
"ExperimentalSetStatsAggregatorDataset",
"ExperimentalSleepDataset",
"ExperimentalSlidingWindowDataset",
"ExperimentalSqlDataset",
"ExperimentalStatsAggregatorHandle",
"ExperimentalStatsAggregatorSummary",
"ExperimentalThreadPoolDataset",
"ExperimentalThreadPoolHandle",
"ExperimentalUnbatchDataset",
"ExperimentalUniqueDataset",
"DebugIdentity",
"NcclAllReduce",
"NcclBroadcast",
"NcclReduce",
//Can't import these without embedding entire python runtime and dependencies
"PyFunc",
"PyFuncStateless",
//"QuantizedX" ops are deprecated / no longer supported ("standard" ops have quantized support in many cases)
"QuantizedAdd",
"QuantizedAvgPool",
"QuantizedBatchNormWithGlobalNormalization",
"QuantizedBiasAdd",
"QuantizedConcat",
"QuantizedConv2D",
"QuantizedInstanceNorm",
"QuantizedMatMul",
"QuantizedMaxPool",
"QuantizedMul",
"QuantizedRelu",
"QuantizedRelu6",
"QuantizedReluX",
"QuantizedReshape",
"QuantizedResizeBilinear",
//All of the following ops - not available in TF (can't find them) - op mapping is wrong?
//TODO: Check these and remove the import mapping from the Java classes if they are indeed bad
"HardTanh",
@ -993,12 +1160,37 @@ public class OpValidation {
"absargmin",
"entropy_shannon", //This is a thing, but quite different from our op: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/bayesflow/entropy/entropy_shannon
"count_zero"
);
return new HashSet<>(list);
}
/**
* These ops are ones we will never map at Java level for one reason or another
*/
private static Set<String> excludeFromLibnd4jCustomOpMapping(){
Set<String> out = new HashSet<>();
Collections.addAll(out,
//Test and misc ops:
"TestOp2i2o", "testop2i2o",
"firas_sparse",
"test_output_reshape",
"test_scalar",
"testcustom",
"testreduction",
//"to_x" ops - we'll use cast instead in SameDiff (which supports all dtypes)
"to_double",
"to_float16",
"to_float32",
"to_int32",
"to_int64",
"to_uint32",
"to_uint64"
);
return out;
}
}

View File

@ -702,7 +702,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
builder.append("\nCost array: ").append(Arrays.toString(costArray.dup().data().asFloat()));
}
//Note that we could report micro-averaged too - but these are the same as accuracy
//"Note that for “micro<EFBFBD>?-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
//"Note that for “micro-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
//http://scikit-learn.org/stable/modules/model_evaluation.html
builder.append("\n\n");
@ -884,7 +884,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
/**
* When calculating the (macro) average precision, how many classes are excluded from the average due to
* no predictions i.e., precision would be the edge case of 0/0
* no predictions - i.e., precision would be the edge case of 0/0
*
* @return Number of classes excluded from the average precision
*/
@ -894,7 +894,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
/**
* When calculating the (macro) average Recall, how many classes are excluded from the average due to
* no predictions i.e., recall would be the edge case of 0/0
* no predictions - i.e., recall would be the edge case of 0/0
*
* @return Number of classes excluded from the average recall
*/
@ -904,7 +904,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
/**
* When calculating the (macro) average F1, how many classes are excluded from the average due to
* no predictions i.e., F1 would be calculated from a precision or recall of 0/0
* no predictions - i.e., F1 would be calculated from a precision or recall of 0/0
*
* @return Number of classes excluded from the average F1
*/
@ -914,7 +914,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
/**
* When calculating the (macro) average FBeta, how many classes are excluded from the average due to
* no predictions i.e., FBeta would be calculated from a precision or recall of 0/0
* no predictions - i.e., FBeta would be calculated from a precision or recall of 0/0
*
* @return Number of classes excluded from the average FBeta
*/

View File

@ -83,7 +83,7 @@ public class ScatterUpdate implements CustomOp {
*/
@Override
public String opName() {
return op.opName();
return "scatter_update";
}
/**

View File

@ -28,7 +28,7 @@ public class SRUCellConfiguration {
NDArray<T>* xt = INPUT_VARIABLE(0); // input [batchSize x inSize], batchSize - batch size, inSize - number of features
NDArray<T>* ct_1 = INPUT_VARIABLE(1); // previous cell state ct [batchSize x inSize], that is at previous time step t-1
NDArray<T>* w = INPUT_VARIABLE(2); // weights [inSize x 3*inSize]
NDArray<T>* b = INPUT_VARIABLE(3); // biases [1 × 2*inSize]
NDArray<T>* b = INPUT_VARIABLE(3); // biases [1 x 2*inSize]
NDArray<T>* ht = OUTPUT_VARIABLE(0); // current cell output [batchSize x inSize], that is at current time step t
NDArray<T>* ct = OUTPUT_VARIABLE(1); // current cell state [batchSize x inSize], that is at current time step t

View File

@ -26,7 +26,7 @@ public class SRUConfiguration {
/**
* NDArray<T>* input = INPUT_VARIABLE(0); // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
NDArray<T>* weights = INPUT_VARIABLE(1); // W, 2d tensor of weights [3K x K]
NDArray<T>* bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 × 2*K]
NDArray<T>* bias = INPUT_VARIABLE(2); // B, row of biases with twice length [1 x 2*K]
NDArray<T>* init = INPUT_VARIABLE(3); // C_{0}, 2d tensor of initial state [bS x K] at time t=0
*/

View File

@ -103,7 +103,7 @@ public class AdaDeltaUpdater implements GradientUpdater<AdaDelta> {
double epsilon = config.getEpsilon();
//Line 4 of Algorithm 1: https://arxiv.org/pdf/1212.5701v1.pdf
//E[g^2]_t = rho * E[g^2]_{t1} + (1-rho)*g^2_t
//E[g^2]_t = rho * E[g^2]_{t-1} + (1-rho)*g^2_t
msg.muli(rho).addi(gradient.mul(gradient).muli(1 - rho));
//Calculate update: