Op validation improvements + encoding fix (#49)

* Op validation updates Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small logging fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7986 Fix minor character encoding issues Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small ignore fix Signed-off-by: AlexDBlack <blacka101@gmail.com>
2019-07-08 17:58:48 +10:00 · 2019-07-08 17:58:48 +10:00 · cc65c01118
commit cc65c01118
parent 88ea9a49eb
10 changed files with 236 additions and 44 deletions
--- a/libnd4j/include/ops/declarable/generic/recurrent/sru.cpp
+++ b/libnd4j/include/ops/declarable/generic/recurrent/sru.cpp
@ -145,7 +145,7 @@ DECLARE_SHAPE_FN(sru) {
 CUSTOM_OP_IMPL(sru_bp, 8, 4, true, 0, 0) {
    auto x        = INPUT_VARIABLE(0);                // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
    auto w        = INPUT_VARIABLE(1);                // W, 2d tensor of weights [3K x K]
-    auto b        = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 × 2*K]
+    auto b        = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 x 2*K]
    auto c0       = INPUT_VARIABLE(3);                // C_{0}, 2d tensor of initial state [bS x K] at time t=0
    auto c        = INPUT_VARIABLE(4);                // C, [bS x K x N]
    auto inGradCt = INPUT_VARIABLE(5);                // [bS x K]
@ -331,7 +331,7 @@ CUSTOM_OP_IMPL(sru_bi, 5, 2, true, 0, 0) {
    auto x  = INPUT_VARIABLE(0);                                      // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
    auto w  = INPUT_VARIABLE(1);                                      // W, 2d tensor of weights [2*inSize x 6*inSize]
-    auto b  = INPUT_VARIABLE(2);                                      // B, row of biases with twice length [1 × 4*inSize]
+    auto b  = INPUT_VARIABLE(2);                                      // B, row of biases with twice length [1 x 4*inSize]
    auto c0 = INPUT_VARIABLE(3);                                      // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
    NDArray* mask = block.width() > 4 ? INPUT_VARIABLE(4) : nullptr;  // optional, 2d tensor of dropout mask [bS x 2*inSize]
@ -431,7 +431,7 @@ CUSTOM_OP_IMPL(sru_bi_bp, 8, 4, true, 0, 0) {
    auto x        = INPUT_VARIABLE(0);                // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
    auto w        = INPUT_VARIABLE(1);                // W, 2d tensor of weights [2*inSize x 6*inSize]
-    auto b        = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 × 4*inSize]
+    auto b        = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 x 4*inSize]
    auto c0       = INPUT_VARIABLE(3);                // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
    auto ct       = INPUT_VARIABLE(4);                // C, [time x bS x 2*inSize]
    auto inGradC0 = INPUT_VARIABLE(5);                // [bS x 2*inSize]
@ -553,7 +553,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
       * Input arrays: 
       *    0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [3K x K]
-       *    2: row of biases with twice length [1 × 2K]
+       *    2: row of biases with twice length [1 x 2K]
       *    3: 2d tensor of previous cell state [bS x K]
       *    4: optional, 2d tensor of dropout mask [bS x K]
       *  
@ -572,7 +572,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
       * Input arrays: 
       *    0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [3K x K]
-       *    2: row of biases with twice length [1 × 2K]
+       *    2: row of biases with twice length [1 x 2K]
       *    3: 2d tensor of previous cell state [bS x K]
       *    4: optional, 2d tensor of dropout mask [bS x K]
       *  
@ -592,7 +592,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
       * Input arrays: 
       *    0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [3K x K]
-       *    2: row of biases with twice length [1 × 2K]
+       *    2: row of biases with twice length [1 x 2K]
       *    3: 2d tensor of previous cell state [bS x K]
       *    4: 3d tensor of cell state [bS x K x N]
       *    5: 2d tensor of cell state gradients [bS x K]
@ -622,7 +622,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
 //     auto input   = INPUT_VARIABLE(0);                // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
 //     auto weights = INPUT_VARIABLE(1);                // W, 2d tensor of weights [3K x K]
-//     auto bias    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 × 2*K]
+//     auto bias    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 x 2*K]
 //     auto init    = INPUT_VARIABLE(3);                // C_{0}, 2d tensor of initial state [bS x K] at time t=0
 //     NDArray* mask    = nullptr;                          // optional,  2d tensor of dropout mask [bS x K]
@ -710,7 +710,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
 // CUSTOM_OP_IMPL(sru_old, 5, 2, false, 0, 0) {
 //     auto x   = INPUT_VARIABLE(0);                // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
 //     auto w = INPUT_VARIABLE(1);                // W, 2d tensor of weights [3K x inSize]
-//     auto b    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 × 2*inSize]
+//     auto b    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 x 2*inSize]
 //     auto c0    = INPUT_VARIABLE(3);                // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
 //     NDArray* mask    = nullptr;                          // optional,  2d tensor of dropout mask [bS x inSize]
@ -820,7 +820,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
 //     auto x        = INPUT_VARIABLE(0);                                   // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
 //     auto w        = INPUT_VARIABLE(1);                                   // W, 2d tensor of weights [3*inSize x inSize]
-//     auto b        = INPUT_VARIABLE(2);                                   // B, row of biases with twice length [1 × 2*inSize]
+//     auto b        = INPUT_VARIABLE(2);                                   // B, row of biases with twice length [1 x 2*inSize]
 //     auto c0       = INPUT_VARIABLE(3);                                   // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
 //     auto c        = INPUT_VARIABLE(4);                                   // C, [bS x inSize x time]
 //     auto inGradCt = INPUT_VARIABLE(5);                                   // [bS x inSize]
--- a/libnd4j/include/ops/declarable/generic/recurrent/sruCell.cpp
+++ b/libnd4j/include/ops/declarable/generic/recurrent/sruCell.cpp
@ -34,7 +34,7 @@ CUSTOM_OP_IMPL(sruCell, 4, 2, false, 0, 0) {
    auto  xt   = INPUT_VARIABLE(0);               // input [bS x inSize], bS - batch size, inSize - number of features
    auto ct_1 = INPUT_VARIABLE(1);               // previous cell state ct  [bS x inSize], that is at previous time step t-1
    auto w    = INPUT_VARIABLE(2);               // weights [inSize x 3*inSize]
-    auto b    = INPUT_VARIABLE(3);               // biases [1 × 2*inSize]
+    auto b    = INPUT_VARIABLE(3);               // biases [1 x 2*inSize]
    auto ht   = OUTPUT_VARIABLE(0);              // current cell output [bS x inSize], that is at current time step t
    auto ct   = OUTPUT_VARIABLE(1);              // current cell state  [bS x inSize], that is at current time step t
--- a/libnd4j/include/ops/declarable/headers/recurrent.h
+++ b/libnd4j/include/ops/declarable/headers/recurrent.h
@ -33,7 +33,7 @@ namespace ops  {
       * Input arrays:
       *    0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [3K x K]
-       *    2: row of biases with twice length [1 × 2K]
+       *    2: row of biases with twice length [1 x 2K]
       *    3: 2d tensor of previous cell state [bS x K]
       *    4: optional, 2d tensor of dropout mask [bS x K]
       *
@ -52,7 +52,7 @@ namespace ops  {
       * Input arrays: 
       *    0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [2K x 6K]
-       *    2: row of biases with twice length [1 × 4K]
+       *    2: row of biases with twice length [1 x 4K]
       *    3: 2d tensor of previous cell state [bS x 2K]
       *    4: optional, 2d tensor of dropout mask [bS x 2K]
       *  
@ -72,7 +72,7 @@ namespace ops  {
       * Input arrays: 
       *    0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [3K x K]
-       *    2: row of biases with twice length [1 × 2K]
+       *    2: row of biases with twice length [1 x 2K]
       *    3: 2d tensor of previous cell state [bS x K]
       *    4: 3d tensor of cell state [bS x K x N]
       *    5: 2d tensor of cell state gradients [bS x K]
@ -96,7 +96,7 @@ namespace ops  {
       * Input arrays: 
       *    0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [2K x 6K]
-       *    2: row of biases with twice length [1 × 4K]
+       *    2: row of biases with twice length [1 x 4K]
       *    3: 2d tensor of previous cell state [bS x 2K]
       *    4: 3d tensor of cell state [N x bS x 2K]
       *    5: 2d tensor of cell state gradients [bS x 2K]
@ -239,7 +239,7 @@ namespace ops  {
       *    0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features
       *    1: previous cell state [batchSize x inSize], that is at previous time step t-1
       *    2: weights [inSize x 3*inSize]
-       *    3: biases [1 × 2*inSize]
+       *    3: biases [1 x 2*inSize]
       * 
       * Output arrays: 
       *    0: current cell output [batchSize x inSize], that is at current time step t
--- a/libnd4j/include/ops/declarable/helpers/cpu/sru.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/sru.cpp
@ -110,7 +110,7 @@ static void sruBI_(NDArray* x, const NDArray* w, const NDArray* b, const NDArray
    // x     input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
    // w     2d tensor of weights [2*inSize x 6*inSize]
-    // b     row of biases with twice length [1 × 4*inSize]
+    // b     row of biases with twice length [1 x 4*inSize]
    // c0    2d tensor of initial state [bS x 2*inSize] at time t=0
    // mask  optional, 2d tensor of dropout mask [bS x 2*inSize]
@ -193,7 +193,7 @@ static void sruBIBP_(NDArray* x, const NDArray* w, const NDArray* b, const NDArr
    // x  input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
    // w  2d tensor of weights [2*inSize x 6*inSize]
-    // b  row of biases with twice length [1 × 4*inSize]
+    // b  row of biases with twice length [1 x 4*inSize]
    // c0 2d tensor of initial state [bS x 2*inSize] at time t=0
    // ct [time x bS x 2*inSize]
    // inGradC0 [bS x 2*inSize]
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java
@ -35,9 +35,24 @@ import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.CustomOpDescriptor;
 import org.nd4j.linalg.api.ops.DefaultOpConverter;
 import org.nd4j.linalg.api.ops.DynamicCustomOp;
 import org.nd4j.linalg.api.ops.custom.BarnesEdgeForces;
 import org.nd4j.linalg.api.ops.custom.BarnesHutGains;
 import org.nd4j.linalg.api.ops.custom.BarnesHutSymmetrize;
 import org.nd4j.linalg.api.ops.custom.SpTreeCell;
 import org.nd4j.linalg.api.ops.impl.broadcast.bool.*;
 import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
 import org.nd4j.linalg.api.ops.impl.loss.bp.*;
 import org.nd4j.linalg.api.ops.impl.meta.InvertedPredicateMetaOp;
 import org.nd4j.linalg.api.ops.impl.meta.PostulateMetaOp;
 import org.nd4j.linalg.api.ops.impl.meta.PredicateMetaOp;
 import org.nd4j.linalg.api.ops.impl.meta.ReduceMetaOp;
 import org.nd4j.linalg.api.ops.impl.nlp.CbowRound;
 import org.nd4j.linalg.api.ops.impl.nlp.SkipGramRound;
 import org.nd4j.linalg.api.ops.impl.reduce.MmulBp;
 import org.nd4j.linalg.api.ops.impl.reduce.bool.All;
 import org.nd4j.linalg.api.ops.impl.reduce.bool.Any;
 import org.nd4j.linalg.api.ops.impl.reduce.bool.IsInf;
 import org.nd4j.linalg.api.ops.impl.reduce.bool.IsNaN;
 import org.nd4j.linalg.api.ops.impl.reduce.longer.MatchCondition;
 import org.nd4j.linalg.api.ops.impl.reduce3.EqualsWithEps;
 import org.nd4j.linalg.api.ops.impl.reduce.NormalizeMoments;
@ -49,21 +64,26 @@ import org.nd4j.linalg.api.ops.impl.layers.convolution.*;
 import org.nd4j.linalg.api.ops.impl.scalar.PowDerivative;
 import org.nd4j.linalg.api.ops.impl.scalar.ScalarRemainder;
 import org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue;
-import org.nd4j.linalg.api.ops.impl.shape.ConfusionMatrix;
+import org.nd4j.linalg.api.ops.impl.shape.*;
 import org.nd4j.linalg.api.ops.impl.shape.Eye;
 import org.nd4j.linalg.api.ops.impl.shape.MergeSum;
 import org.nd4j.linalg.api.ops.impl.shape.OneHot;
 import org.nd4j.linalg.api.ops.impl.shape.bp.ConcatBp;
 import org.nd4j.linalg.api.ops.impl.shape.bp.SliceBp;
 import org.nd4j.linalg.api.ops.impl.shape.bp.StridedSliceBp;
 import org.nd4j.linalg.api.ops.impl.shape.bp.TileBp;
-import org.nd4j.linalg.api.ops.impl.transforms.custom.InvertPermutation;
+import org.nd4j.linalg.api.ops.impl.transforms.Assert;
 import org.nd4j.linalg.api.ops.impl.transforms.bool.BooleanNot;
 import org.nd4j.linalg.api.ops.impl.transforms.bool.MatchConditionTransform;
 import org.nd4j.linalg.api.ops.impl.transforms.custom.*;
 import org.nd4j.linalg.api.ops.impl.transforms.floating.Histogram;
 import org.nd4j.linalg.api.ops.impl.transforms.pairwise.BinaryMinimalRelativeError;
 import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.bp.*;
 import org.nd4j.linalg.api.ops.impl.transforms.gradient.*;
 import org.nd4j.linalg.api.ops.impl.transforms.gradient.SigmoidDerivative;
 import org.nd4j.linalg.api.ops.impl.transforms.gradient.TanhDerivative;
 import org.nd4j.linalg.api.ops.impl.transforms.pairwise.bool.Not;
 import org.nd4j.linalg.api.ops.impl.transforms.segment.UnsortedSegmentMax;
 import org.nd4j.linalg.api.ops.impl.transforms.segment.bp.*;
 import org.nd4j.linalg.api.ops.impl.transforms.strict.GELUDerivative;
 import org.nd4j.linalg.api.ops.impl.transforms.strict.PreciseGELUDerivative;
 import org.nd4j.linalg.api.ops.impl.transforms.strict.SwishDerivative;
 import org.nd4j.linalg.api.ops.impl.transforms.strict.TanDerivative;
 import org.nd4j.linalg.api.ops.persistence.RestoreV2;
@ -71,6 +91,7 @@ import org.nd4j.linalg.api.ops.persistence.SaveV2;
 import org.nd4j.linalg.api.ops.random.compat.RandomStandardNormal;
 import org.nd4j.linalg.api.ops.random.custom.DistributionUniform;
 import org.nd4j.linalg.api.ops.random.impl.*;
 import org.nd4j.linalg.api.ops.random.impl.Linspace;
 import org.nd4j.linalg.api.shape.LongShapeDescriptor;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.function.Function;
@ -681,10 +702,22 @@ public class OpValidation {
            }
        }
        int countLibnd4jIgnored = 0;
        if(logUnmappedLibnd4jOps ){
            Set<String> ignoreLibnd4j = excludeFromLibnd4jCustomOpMapping();
            log.info(" --- Libnd4j Ops Not Mapped ---");
            for(long l : nonMappedLibnd4jOps){
                Pair<List<String>,CustomOpDescriptor> p = dedupedCustomOps.get(l);
                boolean foundIgnore = false;
                for(String s : p.getFirst()){
                    if(ignoreLibnd4j.contains(s)){
                        foundIgnore = true;
                        countLibnd4jIgnored++;
                        break;
                    }
                }
                if(foundIgnore)
                    continue;
                log.info("Not mapped libnd4j custom op: {} (hash: {})", p.getFirst(), l);
            }
        }
@ -712,6 +745,7 @@ public class OpValidation {
        }
        if(logUnmappedTFOps){
            log.info(" --- TF Ops Not Mapped for Import ---");
            Map<String,OpDef> allTFOps;
            try{
                allTFOps = TensorflowDescriptorParser.opDescs();
@ -760,7 +794,7 @@ public class OpValidation {
        String fracTfStr = String.format("%.2f", 100.0 * tfFrac);
        int countLibnd4jMapped = countTotalLibnd4jOps - nonMappedLibnd4jOps.size();
-        String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)countTotalLibnd4jOps));
+        String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)(countTotalLibnd4jOps - countLibnd4jIgnored)));
        String fracTFMappedTested = String.format("%.2f", 100.0 * tfOpsWithImportTests / (double)(totalTFMappedOps-tfImportIgnored));
@ -772,7 +806,7 @@ public class OpValidation {
        log.info("({} ops excluded from fwd+gradient tests)", excludedFromAllTestCoverage.size());
        log.info("TF mapped ops:                        {} of {} ({}%)", countTfMapped, countTf, fracTfStr);
        log.info("SD ops with TF import mapping + test  {} of {} ({}%) - {} ignored for coverage", tfOpsWithImportTests, (totalTFMappedOps-tfImportIgnored), fracTFMappedTested, tfImportIgnored);
-        log.info("Libnd4j mapped ops:                   {} of {} ({}%)", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j);
+        log.info("Libnd4j mapped ops:                   {} of {} ({}%) - {} excluded for coverage", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j, countLibnd4jIgnored);
        log.info("*****************************************************");
    }
@ -832,9 +866,12 @@ public class OpValidation {
                CumProdBp.class,
                DotBp.class,
                SquaredNormBp.class,
                SoftmaxBp.class,
                CubeDerivative.class,
                ELUDerivative.class,
                GELUDerivative.class,
                PreciseGELUDerivative.class,
                HardSigmoidDerivative.class,
                HardTanhDerivative.class,
                LeakyReLUDerivative.class,
@ -872,13 +909,54 @@ public class OpValidation {
                SliceBp.class,
                StridedSliceBp.class,
                MmulBp.class,
                DotProductAttentionBp.class,
                MultiHeadDotProductAttentionBp.class,
                LayerNormBp.class,
                StandardizeBp.class,
                DynamicPartitionBp.class,
-                //We can't use these dropout ops in SameDiff: https://github.com/deeplearning4j/deeplearning4j/issues/5650
+                AbsoluteDifferenceLossBp.class,
-                DropOut.class,
+                CosineDistanceLossBp.class,
-                DropOutInverted.class,
+                HingeLossBp.class,
-                AlphaDropOut.class,
+                HuberLossBp.class,
-                Choice.class,
+                LogLossBp.class,
-                ProbablisticMerge.class
+                LogPoissonLossBp.class,
                MeanPairwiseSquaredErrorLossBp.class,
                MeanSquaredErrorLossBp.class,
                SigmoidCrossEntropyLossBp.class,
                SoftmaxCrossEntropyLossBp.class,
                SparseSoftmaxCrossEntropyLossWithLogitsBp.class,
                SegmentMaxBp.class,
                SegmentMeanBp.class,
                SegmentMinBp.class,
                SegmentProdBp.class,
                SegmentSumBp.class,
                UnsortedSegmentMaxBp.class,
                UnsortedSegmentMeanBp.class,
                UnsortedSegmentMinBp.class,
                UnsortedSegmentProdBp.class,
                UnsortedSegmentSqrtNBp.class,
                UnsortedSegmentSumBp.class,
                //Not intended for general users; only used in DL4J SameDiff integration + tested adequately there
                ExternalErrorsFunction.class,
                //Meta-Ops: not available in SameDiff
                InvertedPredicateMetaOp.class,
                PostulateMetaOp.class,
                PredicateMetaOp.class,
                ReduceMetaOp.class,
                //Ops not intended to be used in SameDiff:
                BarnesEdgeForces.class,
                BarnesHutGains.class,
                BarnesHutSymmetrize.class,
                SpTreeCell.class,
                CbowRound.class,
                SkipGramRound.class
        );
        return new HashSet<>(list);
@ -907,9 +985,21 @@ public class OpValidation {
                InvertPermutation.class,    //Uses integer indices
                ConfusionMatrix.class,      //Integer indices
                Linspace.class,             //No input array
-                //Exclude boolean operations:
+                Assert.class,
                //Exclude boolean operations, boolean reductions, etc:
                Any.class,
                All.class,
                IsInf.class,
                org.nd4j.linalg.api.ops.impl.transforms.bool.IsInf.class,
                IsNaN.class,
                org.nd4j.linalg.api.ops.impl.transforms.bool.IsNaN.class,
                BooleanNot.class,
                Not.class,
                MatchConditionTransform.class,
                InTopK.class,
                IsNonDecreasing.class,
                IsStrictlyIncreasing.class,
                IsNumericTensor.class,
                //Exclude index accumulations (index out, not real-valued)
                FirstIndex.class,
                IAMax.class,
@ -917,6 +1007,12 @@ public class OpValidation {
                IMax.class,
                IMin.class,
                LastIndex.class,
                //Exclude ops that output integer types only:
                Shape.class,
                ShapeN.class,
                SizeAt.class,
                //Exclude Random ops
                RandomStandardNormal.class,
                DistributionUniform.class,
@ -949,7 +1045,12 @@ public class OpValidation {
                ProdBp.class,
                StandardDeviationBp.class,
                SumBp.class,
-                VarianceBp.class
+                VarianceBp.class,
                LogicalAnd.class,
                LogicalNot.class,
                LogicalOr.class,
                LogicalXor.class
        );
        return new HashSet<>(list);
@ -981,6 +1082,72 @@ public class OpValidation {
                "BatchSelfAdjointEigV2",    //Deprecated in favor of "SelfAdjointEigV2"
                "BatchSvd",                 //Deprecated in favor of "Svd"
                //These we will likely neven support importing
                "ExperimentalBytesProducedStatsDataset",
                "ExperimentalCSVDataset",
                "ExperimentalDatasetCardinality",
                "ExperimentalDatasetToTFRecord",
                "ExperimentalDenseToSparseBatchDataset",
                "ExperimentalDirectedInterleaveDataset",
                "ExperimentalGroupByReducerDataset",
                "ExperimentalGroupByWindowDataset",
                "ExperimentalIdentityIndexedDataset",
                "ExperimentalIgnoreErrorsDataset",
                "ExperimentalIndexedDatasetGet",
                "ExperimentalIndexedDatasetMaterialize",
                "ExperimentalIteratorGetDevice",
                "ExperimentalLMDBDataset",
                "ExperimentalLatencyStatsDataset",
                "ExperimentalMapAndBatchDataset",
                "ExperimentalMapDataset",
                "ExperimentalMatchingFilesDataset",
                "ExperimentalMaterializedIndexDatasetHandle",
                "ExperimentalMaxIntraOpParallelismDataset",
                "ExperimentalNonSerializableDataset",
                "ExperimentalNumaMapAndBatchDataset",
                "ExperimentalParallelInterleaveDataset",
                "ExperimentalParseExampleDataset",
                "ExperimentalPrivateThreadPoolDataset",
                "ExperimentalRandomDataset",
                "ExperimentalScanDataset",
                "ExperimentalSetStatsAggregatorDataset",
                "ExperimentalSleepDataset",
                "ExperimentalSlidingWindowDataset",
                "ExperimentalSqlDataset",
                "ExperimentalStatsAggregatorHandle",
                "ExperimentalStatsAggregatorSummary",
                "ExperimentalThreadPoolDataset",
                "ExperimentalThreadPoolHandle",
                "ExperimentalUnbatchDataset",
                "ExperimentalUniqueDataset",
                "DebugIdentity",
                "NcclAllReduce",
                "NcclBroadcast",
                "NcclReduce",
                //Can't import these without embedding entire python runtime and dependencies
                "PyFunc",
                "PyFuncStateless",
                //"QuantizedX" ops are deprecated / no longer supported ("standard" ops have quantized support in many cases)
                "QuantizedAdd",
                "QuantizedAvgPool",
                "QuantizedBatchNormWithGlobalNormalization",
                "QuantizedBiasAdd",
                "QuantizedConcat",
                "QuantizedConv2D",
                "QuantizedInstanceNorm",
                "QuantizedMatMul",
                "QuantizedMaxPool",
                "QuantizedMul",
                "QuantizedRelu",
                "QuantizedRelu6",
                "QuantizedReluX",
                "QuantizedReshape",
                "QuantizedResizeBilinear",
                //All of the following ops - not available in TF (can't find them) - op mapping is wrong?
                //TODO: Check these and remove the import mapping from the Java classes if they are indeed bad
                "HardTanh",
@ -993,12 +1160,37 @@ public class OpValidation {
                "absargmin",
                "entropy_shannon",   //This is a thing, but quite different from our op: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/bayesflow/entropy/entropy_shannon
                "count_zero"
        );
        return new HashSet<>(list);
    }
    /**
     * These ops are ones we will never map at Java level for one reason or another
     */
    private static Set<String> excludeFromLibnd4jCustomOpMapping(){
        Set<String> out = new HashSet<>();
        Collections.addAll(out,
                //Test and misc ops:
                "TestOp2i2o", "testop2i2o",
                "firas_sparse",
                "test_output_reshape",
                "test_scalar",
                "testcustom",
                "testreduction",
                //"to_x" ops - we'll use cast instead in SameDiff (which supports all dtypes)
                "to_double",
                "to_float16",
                "to_float32",
                "to_int32",
                "to_int64",
                "to_uint32",
                "to_uint64"
                );
        return out;
    }
 }
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/classification/Evaluation.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/classification/Evaluation.java
@ -702,7 +702,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
            builder.append("\nCost array: ").append(Arrays.toString(costArray.dup().data().asFloat()));
        }
        //Note that we could report micro-averaged too - but these are the same as accuracy
-        //"Note that for “micro<EFBFBD>?-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
+        //"Note that for “micro-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
        //http://scikit-learn.org/stable/modules/model_evaluation.html
        builder.append("\n\n");
@ -884,7 +884,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
    /**
     * When calculating the (macro) average precision, how many classes are excluded from the average due to
-     * no predictions – i.e., precision would be the edge case of 0/0
+     * no predictions - i.e., precision would be the edge case of 0/0
     *
     * @return Number of classes excluded from the  average precision
     */
@ -894,7 +894,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
    /**
     * When calculating the (macro) average Recall, how many classes are excluded from the average due to
-     * no predictions – i.e., recall would be the edge case of 0/0
+     * no predictions - i.e., recall would be the edge case of 0/0
     *
     * @return Number of classes excluded from the average recall
     */
@ -904,7 +904,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
    /**
     * When calculating the (macro) average F1, how many classes are excluded from the average due to
-     * no predictions – i.e., F1 would be calculated from a precision or recall of 0/0
+     * no predictions - i.e., F1 would be calculated from a precision or recall of 0/0
     *
     * @return Number of classes excluded from the average F1
     */
@ -914,7 +914,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
    /**
     * When calculating the (macro) average FBeta, how many classes are excluded from the average due to
-     * no predictions – i.e., FBeta would be calculated from a precision or recall of 0/0
+     * no predictions - i.e., FBeta would be calculated from a precision or recall of 0/0
     *
     * @return Number of classes excluded from the average FBeta
     */
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/ScatterUpdate.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/ScatterUpdate.java
@ -83,7 +83,7 @@ public class ScatterUpdate implements CustomOp {
     */
    @Override
    public String opName() {
-        return op.opName();
+        return "scatter_update";
    }
    /**
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/config/SRUCellConfiguration.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/config/SRUCellConfiguration.java
@ -28,7 +28,7 @@ public class SRUCellConfiguration {
     NDArray<T>* xt   = INPUT_VARIABLE(0);               // input [batchSize x inSize], batchSize - batch size, inSize - number of features
     NDArray<T>* ct_1 = INPUT_VARIABLE(1);               // previous cell state ct  [batchSize x inSize], that is at previous time step t-1
     NDArray<T>* w    = INPUT_VARIABLE(2);               // weights [inSize x 3*inSize]
-     NDArray<T>* b    = INPUT_VARIABLE(3);               // biases [1 × 2*inSize]
+     NDArray<T>* b    = INPUT_VARIABLE(3);               // biases [1 x 2*inSize]
     NDArray<T>* ht   = OUTPUT_VARIABLE(0);              // current cell output [batchSize x inSize], that is at current time step t
     NDArray<T>* ct   = OUTPUT_VARIABLE(1);              // current cell state  [batchSize x inSize], that is at current time step t
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/config/SRUConfiguration.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/config/SRUConfiguration.java
@ -26,7 +26,7 @@ public class SRUConfiguration {
    /**
     * NDArray<T>* input   = INPUT_VARIABLE(0);                // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
     NDArray<T>* weights = INPUT_VARIABLE(1);                // W, 2d tensor of weights [3K x K]
-     NDArray<T>* bias    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 × 2*K]
+     NDArray<T>* bias    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 x 2*K]
     NDArray<T>* init    = INPUT_VARIABLE(3);                // C_{0}, 2d tensor of initial state [bS x K] at time t=0
     */
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdaDeltaUpdater.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdaDeltaUpdater.java
@ -103,7 +103,7 @@ public class AdaDeltaUpdater implements GradientUpdater<AdaDelta> {
        double epsilon = config.getEpsilon();
        //Line 4 of Algorithm 1: https://arxiv.org/pdf/1212.5701v1.pdf
-        //E[g^2]_t = rho * E[g^2]_{t−1} + (1-rho)*g^2_t
+        //E[g^2]_t = rho * E[g^2]_{t-1} + (1-rho)*g^2_t
        msg.muli(rho).addi(gradient.mul(gradient).muli(1 - rho));
        //Calculate update: