Op validation improvements + encoding fix (#49)

* Op validation updates Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small logging fix Signed-off-by: AlexDBlack <blacka101@gmail.com> * #7986 Fix minor character encoding issues Signed-off-by: AlexDBlack <blacka101@gmail.com> * Small ignore fix Signed-off-by: AlexDBlack <blacka101@gmail.com>
2019-07-08 17:58:48 +10:00 · 2019-07-08 17:58:48 +10:00 · cc65c01118
commit cc65c01118
parent 88ea9a49eb
10 changed files with 236 additions and 44 deletions
--- a/libnd4j/include/ops/declarable/generic/recurrent/sru.cpp
+++ b/libnd4j/include/ops/declarable/generic/recurrent/sru.cpp
@ -145,7 +145,7 @@ DECLARE_SHAPE_FN(sru) {
 CUSTOM_OP_IMPL(sru_bp, 8, 4, true, 0, 0) {
    auto x        = INPUT_VARIABLE(0);                // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
    auto w        = INPUT_VARIABLE(1);                // W, 2d tensor of weights [3K x K]
-    auto b        = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 × 2*K]
+    auto b        = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 x 2*K]
    auto c0       = INPUT_VARIABLE(3);                // C_{0}, 2d tensor of initial state [bS x K] at time t=0
    auto c        = INPUT_VARIABLE(4);                // C, [bS x K x N]
    auto inGradCt = INPUT_VARIABLE(5);                // [bS x K]
@ -331,7 +331,7 @@ CUSTOM_OP_IMPL(sru_bi, 5, 2, true, 0, 0) {
    
    auto x  = INPUT_VARIABLE(0);                                      // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
    auto w  = INPUT_VARIABLE(1);                                      // W, 2d tensor of weights [2*inSize x 6*inSize]
-    auto b  = INPUT_VARIABLE(2);                                      // B, row of biases with twice length [1 × 4*inSize]
+    auto b  = INPUT_VARIABLE(2);                                      // B, row of biases with twice length [1 x 4*inSize]
    auto c0 = INPUT_VARIABLE(3);                                      // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
    NDArray* mask = block.width() > 4 ? INPUT_VARIABLE(4) : nullptr;  // optional, 2d tensor of dropout mask [bS x 2*inSize]
       
@ -431,7 +431,7 @@ CUSTOM_OP_IMPL(sru_bi_bp, 8, 4, true, 0, 0) {
    
    auto x        = INPUT_VARIABLE(0);                // X, input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
    auto w        = INPUT_VARIABLE(1);                // W, 2d tensor of weights [2*inSize x 6*inSize]
-    auto b        = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 × 4*inSize]
+    auto b        = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 x 4*inSize]
    auto c0       = INPUT_VARIABLE(3);                // C_{0}, 2d tensor of initial state [bS x 2*inSize] at time t=0
    auto ct       = INPUT_VARIABLE(4);                // C, [time x bS x 2*inSize]
    auto inGradC0 = INPUT_VARIABLE(5);                // [bS x 2*inSize]
@ -553,7 +553,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
       * Input arrays: 
       *    0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [3K x K]
-       *    2: row of biases with twice length [1 × 2K]
+       *    2: row of biases with twice length [1 x 2K]
       *    3: 2d tensor of previous cell state [bS x K]
       *    4: optional, 2d tensor of dropout mask [bS x K]
       *  
@ -572,7 +572,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
       * Input arrays: 
       *    0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [3K x K]
-       *    2: row of biases with twice length [1 × 2K]
+       *    2: row of biases with twice length [1 x 2K]
       *    3: 2d tensor of previous cell state [bS x K]
       *    4: optional, 2d tensor of dropout mask [bS x K]
       *  
@ -592,7 +592,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
       * Input arrays: 
       *    0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [3K x K]
-       *    2: row of biases with twice length [1 × 2K]
+       *    2: row of biases with twice length [1 x 2K]
       *    3: 2d tensor of previous cell state [bS x K]
       *    4: 3d tensor of cell state [bS x K x N]
       *    5: 2d tensor of cell state gradients [bS x K]
@ -622,7 +622,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
    
 //     auto input   = INPUT_VARIABLE(0);                // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
 //     auto weights = INPUT_VARIABLE(1);                // W, 2d tensor of weights [3K x K]
-//     auto bias    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 × 2*K]
+//     auto bias    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 x 2*K]
 //     auto init    = INPUT_VARIABLE(3);                // C_{0}, 2d tensor of initial state [bS x K] at time t=0
 //     NDArray* mask    = nullptr;                          // optional,  2d tensor of dropout mask [bS x K]

@ -710,7 +710,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {
 // CUSTOM_OP_IMPL(sru_old, 5, 2, false, 0, 0) {
 //     auto x   = INPUT_VARIABLE(0);                // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
 //     auto w = INPUT_VARIABLE(1);                // W, 2d tensor of weights [3K x inSize]
-//     auto b    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 × 2*inSize]
+//     auto b    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 x 2*inSize]
 //     auto c0    = INPUT_VARIABLE(3);                // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
 //     NDArray* mask    = nullptr;                          // optional,  2d tensor of dropout mask [bS x inSize]

@ -820,7 +820,7 @@ DECLARE_SHAPE_FN(sru_bi_bp) {

 //     auto x        = INPUT_VARIABLE(0);                                   // X, input 3d tensor [bS x inSize x time], time - number of time steps, bS - batch size, inSize - number of features
 //     auto w        = INPUT_VARIABLE(1);                                   // W, 2d tensor of weights [3*inSize x inSize]
-//     auto b        = INPUT_VARIABLE(2);                                   // B, row of biases with twice length [1 × 2*inSize]
+//     auto b        = INPUT_VARIABLE(2);                                   // B, row of biases with twice length [1 x 2*inSize]
 //     auto c0       = INPUT_VARIABLE(3);                                   // C_{0}, 2d tensor of initial state [bS x inSize] at time t=0
 //     auto c        = INPUT_VARIABLE(4);                                   // C, [bS x inSize x time]
 //     auto inGradCt = INPUT_VARIABLE(5);                                   // [bS x inSize]
--- a/libnd4j/include/ops/declarable/generic/recurrent/sruCell.cpp
+++ b/libnd4j/include/ops/declarable/generic/recurrent/sruCell.cpp
@ -34,7 +34,7 @@ CUSTOM_OP_IMPL(sruCell, 4, 2, false, 0, 0) {
    auto  xt   = INPUT_VARIABLE(0);               // input [bS x inSize], bS - batch size, inSize - number of features
    auto ct_1 = INPUT_VARIABLE(1);               // previous cell state ct  [bS x inSize], that is at previous time step t-1
    auto w    = INPUT_VARIABLE(2);               // weights [inSize x 3*inSize]
-    auto b    = INPUT_VARIABLE(3);               // biases [1 × 2*inSize]
+    auto b    = INPUT_VARIABLE(3);               // biases [1 x 2*inSize]

    auto ht   = OUTPUT_VARIABLE(0);              // current cell output [bS x inSize], that is at current time step t
    auto ct   = OUTPUT_VARIABLE(1);              // current cell state  [bS x inSize], that is at current time step t
--- a/libnd4j/include/ops/declarable/headers/recurrent.h
+++ b/libnd4j/include/ops/declarable/headers/recurrent.h
@ -33,7 +33,7 @@ namespace ops  {
       * Input arrays:
       *    0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [3K x K]
-       *    2: row of biases with twice length [1 × 2K]
+       *    2: row of biases with twice length [1 x 2K]
       *    3: 2d tensor of previous cell state [bS x K]
       *    4: optional, 2d tensor of dropout mask [bS x K]
       *
@ -52,7 +52,7 @@ namespace ops  {
       * Input arrays: 
       *    0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [2K x 6K]
-       *    2: row of biases with twice length [1 × 4K]
+       *    2: row of biases with twice length [1 x 4K]
       *    3: 2d tensor of previous cell state [bS x 2K]
       *    4: optional, 2d tensor of dropout mask [bS x 2K]
       *  
@ -72,7 +72,7 @@ namespace ops  {
       * Input arrays: 
       *    0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [3K x K]
-       *    2: row of biases with twice length [1 × 2K]
+       *    2: row of biases with twice length [1 x 2K]
       *    3: 2d tensor of previous cell state [bS x K]
       *    4: 3d tensor of cell state [bS x K x N]
       *    5: 2d tensor of cell state gradients [bS x K]
@ -96,7 +96,7 @@ namespace ops  {
       * Input arrays: 
       *    0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
       *    1: 2d tensor of weights [2K x 6K]
-       *    2: row of biases with twice length [1 × 4K]
+       *    2: row of biases with twice length [1 x 4K]
       *    3: 2d tensor of previous cell state [bS x 2K]
       *    4: 3d tensor of cell state [N x bS x 2K]
       *    5: 2d tensor of cell state gradients [bS x 2K]
@ -239,7 +239,7 @@ namespace ops  {
       *    0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features
       *    1: previous cell state [batchSize x inSize], that is at previous time step t-1
       *    2: weights [inSize x 3*inSize]
-       *    3: biases [1 × 2*inSize]
+       *    3: biases [1 x 2*inSize]
       * 
       * Output arrays: 
       *    0: current cell output [batchSize x inSize], that is at current time step t
--- a/libnd4j/include/ops/declarable/helpers/cpu/sru.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/sru.cpp
@ -110,7 +110,7 @@ static void sruBI_(NDArray* x, const NDArray* w, const NDArray* b, const NDArray

    // x     input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
    // w     2d tensor of weights [2*inSize x 6*inSize]
-    // b     row of biases with twice length [1 × 4*inSize]
+    // b     row of biases with twice length [1 x 4*inSize]
    // c0    2d tensor of initial state [bS x 2*inSize] at time t=0
    // mask  optional, 2d tensor of dropout mask [bS x 2*inSize]

@ -193,7 +193,7 @@ static void sruBIBP_(NDArray* x, const NDArray* w, const NDArray* b, const NDArr

    // x  input 3d tensor [time x bS x 2*inSize], time - number of time steps, bS - batch size, inSize - number of features
    // w  2d tensor of weights [2*inSize x 6*inSize]
-    // b  row of biases with twice length [1 × 4*inSize]
+    // b  row of biases with twice length [1 x 4*inSize]
    // c0 2d tensor of initial state [bS x 2*inSize] at time t=0
    // ct [time x bS x 2*inSize]
    // inGradC0 [bS x 2*inSize]
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java
@ -35,9 +35,24 @@ import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.CustomOpDescriptor;
 import org.nd4j.linalg.api.ops.DefaultOpConverter;
 import org.nd4j.linalg.api.ops.DynamicCustomOp;
+import org.nd4j.linalg.api.ops.custom.BarnesEdgeForces;
+import org.nd4j.linalg.api.ops.custom.BarnesHutGains;
+import org.nd4j.linalg.api.ops.custom.BarnesHutSymmetrize;
+import org.nd4j.linalg.api.ops.custom.SpTreeCell;
 import org.nd4j.linalg.api.ops.impl.broadcast.bool.*;
+import org.nd4j.linalg.api.ops.impl.layers.ExternalErrorsFunction;
+import org.nd4j.linalg.api.ops.impl.loss.bp.*;
+import org.nd4j.linalg.api.ops.impl.meta.InvertedPredicateMetaOp;
+import org.nd4j.linalg.api.ops.impl.meta.PostulateMetaOp;
+import org.nd4j.linalg.api.ops.impl.meta.PredicateMetaOp;
+import org.nd4j.linalg.api.ops.impl.meta.ReduceMetaOp;
+import org.nd4j.linalg.api.ops.impl.nlp.CbowRound;
+import org.nd4j.linalg.api.ops.impl.nlp.SkipGramRound;
+import org.nd4j.linalg.api.ops.impl.reduce.MmulBp;
 import org.nd4j.linalg.api.ops.impl.reduce.bool.All;
 import org.nd4j.linalg.api.ops.impl.reduce.bool.Any;
+import org.nd4j.linalg.api.ops.impl.reduce.bool.IsInf;
+import org.nd4j.linalg.api.ops.impl.reduce.bool.IsNaN;
 import org.nd4j.linalg.api.ops.impl.reduce.longer.MatchCondition;
 import org.nd4j.linalg.api.ops.impl.reduce3.EqualsWithEps;
 import org.nd4j.linalg.api.ops.impl.reduce.NormalizeMoments;
@ -49,21 +64,26 @@ import org.nd4j.linalg.api.ops.impl.layers.convolution.*;
 import org.nd4j.linalg.api.ops.impl.scalar.PowDerivative;
 import org.nd4j.linalg.api.ops.impl.scalar.ScalarRemainder;
 import org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue;
-import org.nd4j.linalg.api.ops.impl.shape.ConfusionMatrix;
-import org.nd4j.linalg.api.ops.impl.shape.Eye;
-import org.nd4j.linalg.api.ops.impl.shape.MergeSum;
-import org.nd4j.linalg.api.ops.impl.shape.OneHot;
+import org.nd4j.linalg.api.ops.impl.shape.*;
 import org.nd4j.linalg.api.ops.impl.shape.bp.ConcatBp;
 import org.nd4j.linalg.api.ops.impl.shape.bp.SliceBp;
 import org.nd4j.linalg.api.ops.impl.shape.bp.StridedSliceBp;
 import org.nd4j.linalg.api.ops.impl.shape.bp.TileBp;
-import org.nd4j.linalg.api.ops.impl.transforms.custom.InvertPermutation;
+import org.nd4j.linalg.api.ops.impl.transforms.Assert;
+import org.nd4j.linalg.api.ops.impl.transforms.bool.BooleanNot;
+import org.nd4j.linalg.api.ops.impl.transforms.bool.MatchConditionTransform;
+import org.nd4j.linalg.api.ops.impl.transforms.custom.*;
 import org.nd4j.linalg.api.ops.impl.transforms.floating.Histogram;
 import org.nd4j.linalg.api.ops.impl.transforms.pairwise.BinaryMinimalRelativeError;
 import org.nd4j.linalg.api.ops.impl.transforms.pairwise.arithmetic.bp.*;
 import org.nd4j.linalg.api.ops.impl.transforms.gradient.*;
 import org.nd4j.linalg.api.ops.impl.transforms.gradient.SigmoidDerivative;
 import org.nd4j.linalg.api.ops.impl.transforms.gradient.TanhDerivative;
+import org.nd4j.linalg.api.ops.impl.transforms.pairwise.bool.Not;
+import org.nd4j.linalg.api.ops.impl.transforms.segment.UnsortedSegmentMax;
+import org.nd4j.linalg.api.ops.impl.transforms.segment.bp.*;
+import org.nd4j.linalg.api.ops.impl.transforms.strict.GELUDerivative;
+import org.nd4j.linalg.api.ops.impl.transforms.strict.PreciseGELUDerivative;
 import org.nd4j.linalg.api.ops.impl.transforms.strict.SwishDerivative;
 import org.nd4j.linalg.api.ops.impl.transforms.strict.TanDerivative;
 import org.nd4j.linalg.api.ops.persistence.RestoreV2;
@ -71,6 +91,7 @@ import org.nd4j.linalg.api.ops.persistence.SaveV2;
 import org.nd4j.linalg.api.ops.random.compat.RandomStandardNormal;
 import org.nd4j.linalg.api.ops.random.custom.DistributionUniform;
 import org.nd4j.linalg.api.ops.random.impl.*;
+import org.nd4j.linalg.api.ops.random.impl.Linspace;
 import org.nd4j.linalg.api.shape.LongShapeDescriptor;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.function.Function;
@ -681,10 +702,22 @@ public class OpValidation {
            }
        }

+        int countLibnd4jIgnored = 0;
        if(logUnmappedLibnd4jOps ){
+            Set<String> ignoreLibnd4j = excludeFromLibnd4jCustomOpMapping();
            log.info(" --- Libnd4j Ops Not Mapped ---");
            for(long l : nonMappedLibnd4jOps){
                Pair<List<String>,CustomOpDescriptor> p = dedupedCustomOps.get(l);
+                boolean foundIgnore = false;
+                for(String s : p.getFirst()){
+                    if(ignoreLibnd4j.contains(s)){
+                        foundIgnore = true;
+                        countLibnd4jIgnored++;
+                        break;
+                    }
+                }
+                if(foundIgnore)
+                    continue;
                log.info("Not mapped libnd4j custom op: {} (hash: {})", p.getFirst(), l);
            }
        }
@ -712,6 +745,7 @@ public class OpValidation {
        }

        if(logUnmappedTFOps){
+            log.info(" --- TF Ops Not Mapped for Import ---");
            Map<String,OpDef> allTFOps;
            try{
                allTFOps = TensorflowDescriptorParser.opDescs();
@ -760,7 +794,7 @@ public class OpValidation {
        String fracTfStr = String.format("%.2f", 100.0 * tfFrac);

        int countLibnd4jMapped = countTotalLibnd4jOps - nonMappedLibnd4jOps.size();
-        String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)countTotalLibnd4jOps));
+        String fracLibnd4j = String.format("%.2f", 100.0 * (countLibnd4jMapped / (double)(countTotalLibnd4jOps - countLibnd4jIgnored)));

        String fracTFMappedTested = String.format("%.2f", 100.0 * tfOpsWithImportTests / (double)(totalTFMappedOps-tfImportIgnored));

@ -772,7 +806,7 @@ public class OpValidation {
        log.info("({} ops excluded from fwd+gradient tests)", excludedFromAllTestCoverage.size());
        log.info("TF mapped ops:                        {} of {} ({}%)", countTfMapped, countTf, fracTfStr);
        log.info("SD ops with TF import mapping + test  {} of {} ({}%) - {} ignored for coverage", tfOpsWithImportTests, (totalTFMappedOps-tfImportIgnored), fracTFMappedTested, tfImportIgnored);
-        log.info("Libnd4j mapped ops:                   {} of {} ({}%)", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j);
+        log.info("Libnd4j mapped ops:                   {} of {} ({}%) - {} excluded for coverage", countLibnd4jMapped, countTotalLibnd4jOps, fracLibnd4j, countLibnd4jIgnored);
        log.info("*****************************************************");
    }

@ -832,9 +866,12 @@ public class OpValidation {
                CumProdBp.class,
                DotBp.class,
                SquaredNormBp.class,
+                SoftmaxBp.class,

                CubeDerivative.class,
                ELUDerivative.class,
+                GELUDerivative.class,
+                PreciseGELUDerivative.class,
                HardSigmoidDerivative.class,
                HardTanhDerivative.class,
                LeakyReLUDerivative.class,
@ -872,13 +909,54 @@ public class OpValidation {

                SliceBp.class,
                StridedSliceBp.class,
+                MmulBp.class,
+                DotProductAttentionBp.class,
+                MultiHeadDotProductAttentionBp.class,
+                LayerNormBp.class,
+                StandardizeBp.class,
+                DynamicPartitionBp.class,

-                //We can't use these dropout ops in SameDiff: https://github.com/deeplearning4j/deeplearning4j/issues/5650
-                DropOut.class,
-                DropOutInverted.class,
-                AlphaDropOut.class,
-                Choice.class,
-                ProbablisticMerge.class
+                AbsoluteDifferenceLossBp.class,
+                CosineDistanceLossBp.class,
+                HingeLossBp.class,
+                HuberLossBp.class,
+                LogLossBp.class,
+                LogPoissonLossBp.class,
+                MeanPairwiseSquaredErrorLossBp.class,
+                MeanSquaredErrorLossBp.class,
+                SigmoidCrossEntropyLossBp.class,
+                SoftmaxCrossEntropyLossBp.class,
+                SparseSoftmaxCrossEntropyLossWithLogitsBp.class,
+
+                SegmentMaxBp.class,
+                SegmentMeanBp.class,
+                SegmentMinBp.class,
+                SegmentProdBp.class,
+                SegmentSumBp.class,
+                UnsortedSegmentMaxBp.class,
+                UnsortedSegmentMeanBp.class,
+                UnsortedSegmentMinBp.class,
+                UnsortedSegmentProdBp.class,
+                UnsortedSegmentSqrtNBp.class,
+                UnsortedSegmentSumBp.class,
+
+                //Not intended for general users; only used in DL4J SameDiff integration + tested adequately there
+                ExternalErrorsFunction.class,
+
+                //Meta-Ops: not available in SameDiff
+                InvertedPredicateMetaOp.class,
+                PostulateMetaOp.class,
+                PredicateMetaOp.class,
+                ReduceMetaOp.class,
+
+
+                //Ops not intended to be used in SameDiff:
+                BarnesEdgeForces.class,
+                BarnesHutGains.class,
+                BarnesHutSymmetrize.class,
+                SpTreeCell.class,
+                CbowRound.class,
+                SkipGramRound.class
        );

        return new HashSet<>(list);
@ -907,9 +985,21 @@ public class OpValidation {
                InvertPermutation.class,    //Uses integer indices
                ConfusionMatrix.class,      //Integer indices
                Linspace.class,             //No input array
-                //Exclude boolean operations:
+                Assert.class,
+                //Exclude boolean operations, boolean reductions, etc:
                Any.class,
                All.class,
+                IsInf.class,
+                org.nd4j.linalg.api.ops.impl.transforms.bool.IsInf.class,
+                IsNaN.class,
+                org.nd4j.linalg.api.ops.impl.transforms.bool.IsNaN.class,
+                BooleanNot.class,
+                Not.class,
+                MatchConditionTransform.class,
+                InTopK.class,
+                IsNonDecreasing.class,
+                IsStrictlyIncreasing.class,
+                IsNumericTensor.class,
                //Exclude index accumulations (index out, not real-valued)
                FirstIndex.class,
                IAMax.class,
@ -917,6 +1007,12 @@ public class OpValidation {
                IMax.class,
                IMin.class,
                LastIndex.class,
+
+                //Exclude ops that output integer types only:
+                Shape.class,
+                ShapeN.class,
+                SizeAt.class,
+
                //Exclude Random ops
                RandomStandardNormal.class,
                DistributionUniform.class,
@ -949,7 +1045,12 @@ public class OpValidation {
                ProdBp.class,
                StandardDeviationBp.class,
                SumBp.class,
-                VarianceBp.class
+                VarianceBp.class,
+
+                LogicalAnd.class,
+                LogicalNot.class,
+                LogicalOr.class,
+                LogicalXor.class
        );

        return new HashSet<>(list);
@ -981,6 +1082,72 @@ public class OpValidation {
                "BatchSelfAdjointEigV2",    //Deprecated in favor of "SelfAdjointEigV2"
                "BatchSvd",                 //Deprecated in favor of "Svd"

+                //These we will likely neven support importing
+                "ExperimentalBytesProducedStatsDataset",
+                "ExperimentalCSVDataset",
+                "ExperimentalDatasetCardinality",
+                "ExperimentalDatasetToTFRecord",
+                "ExperimentalDenseToSparseBatchDataset",
+                "ExperimentalDirectedInterleaveDataset",
+                "ExperimentalGroupByReducerDataset",
+                "ExperimentalGroupByWindowDataset",
+                "ExperimentalIdentityIndexedDataset",
+                "ExperimentalIgnoreErrorsDataset",
+                "ExperimentalIndexedDatasetGet",
+                "ExperimentalIndexedDatasetMaterialize",
+                "ExperimentalIteratorGetDevice",
+                "ExperimentalLMDBDataset",
+                "ExperimentalLatencyStatsDataset",
+                "ExperimentalMapAndBatchDataset",
+                "ExperimentalMapDataset",
+                "ExperimentalMatchingFilesDataset",
+                "ExperimentalMaterializedIndexDatasetHandle",
+                "ExperimentalMaxIntraOpParallelismDataset",
+                "ExperimentalNonSerializableDataset",
+                "ExperimentalNumaMapAndBatchDataset",
+                "ExperimentalParallelInterleaveDataset",
+                "ExperimentalParseExampleDataset",
+                "ExperimentalPrivateThreadPoolDataset",
+                "ExperimentalRandomDataset",
+                "ExperimentalScanDataset",
+                "ExperimentalSetStatsAggregatorDataset",
+                "ExperimentalSleepDataset",
+                "ExperimentalSlidingWindowDataset",
+                "ExperimentalSqlDataset",
+                "ExperimentalStatsAggregatorHandle",
+                "ExperimentalStatsAggregatorSummary",
+                "ExperimentalThreadPoolDataset",
+                "ExperimentalThreadPoolHandle",
+                "ExperimentalUnbatchDataset",
+                "ExperimentalUniqueDataset",
+
+                "DebugIdentity",
+                "NcclAllReduce",
+                "NcclBroadcast",
+                "NcclReduce",
+
+                //Can't import these without embedding entire python runtime and dependencies
+                "PyFunc",
+                "PyFuncStateless",
+
+                //"QuantizedX" ops are deprecated / no longer supported ("standard" ops have quantized support in many cases)
+                "QuantizedAdd",
+                "QuantizedAvgPool",
+                "QuantizedBatchNormWithGlobalNormalization",
+                "QuantizedBiasAdd",
+                "QuantizedConcat",
+                "QuantizedConv2D",
+                "QuantizedInstanceNorm",
+                "QuantizedMatMul",
+                "QuantizedMaxPool",
+                "QuantizedMul",
+                "QuantizedRelu",
+                "QuantizedRelu6",
+                "QuantizedReluX",
+                "QuantizedReshape",
+                "QuantizedResizeBilinear",
+
+
                //All of the following ops - not available in TF (can't find them) - op mapping is wrong?
                //TODO: Check these and remove the import mapping from the Java classes if they are indeed bad
                "HardTanh",
@ -993,12 +1160,37 @@ public class OpValidation {
                "absargmin",
                "entropy_shannon",   //This is a thing, but quite different from our op: https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/bayesflow/entropy/entropy_shannon
                "count_zero"
-
-
-
        );

        return new HashSet<>(list);
    }

+
+    /**
+     * These ops are ones we will never map at Java level for one reason or another
+     */
+    private static Set<String> excludeFromLibnd4jCustomOpMapping(){
+        Set<String> out = new HashSet<>();
+        Collections.addAll(out,
+                //Test and misc ops:
+                "TestOp2i2o", "testop2i2o",
+                "firas_sparse",
+                "test_output_reshape",
+                "test_scalar",
+                "testcustom",
+                "testreduction",
+
+                //"to_x" ops - we'll use cast instead in SameDiff (which supports all dtypes)
+                "to_double",
+                "to_float16",
+                "to_float32",
+                "to_int32",
+                "to_int64",
+                "to_uint32",
+                "to_uint64"
+                );
+
+        return out;
+    }
+
 }
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/classification/Evaluation.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/evaluation/classification/Evaluation.java
@ -702,7 +702,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {
            builder.append("\nCost array: ").append(Arrays.toString(costArray.dup().data().asFloat()));
        }
        //Note that we could report micro-averaged too - but these are the same as accuracy
-        //"Note that for “micro<EFBFBD>?-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
+        //"Note that for “micro-averaging in a multiclass setting with all labels included will produce equal precision, recall and F,"
        //http://scikit-learn.org/stable/modules/model_evaluation.html

        builder.append("\n\n");
@ -884,7 +884,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {

    /**
     * When calculating the (macro) average precision, how many classes are excluded from the average due to
-     * no predictions – i.e., precision would be the edge case of 0/0
+     * no predictions - i.e., precision would be the edge case of 0/0
     *
     * @return Number of classes excluded from the  average precision
     */
@ -894,7 +894,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {

    /**
     * When calculating the (macro) average Recall, how many classes are excluded from the average due to
-     * no predictions – i.e., recall would be the edge case of 0/0
+     * no predictions - i.e., recall would be the edge case of 0/0
     *
     * @return Number of classes excluded from the average recall
     */
@ -904,7 +904,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {

    /**
     * When calculating the (macro) average F1, how many classes are excluded from the average due to
-     * no predictions – i.e., F1 would be calculated from a precision or recall of 0/0
+     * no predictions - i.e., F1 would be calculated from a precision or recall of 0/0
     *
     * @return Number of classes excluded from the average F1
     */
@ -914,7 +914,7 @@ public class Evaluation extends BaseEvaluation<Evaluation> {

    /**
     * When calculating the (macro) average FBeta, how many classes are excluded from the average due to
-     * no predictions – i.e., FBeta would be calculated from a precision or recall of 0/0
+     * no predictions - i.e., FBeta would be calculated from a precision or recall of 0/0
     *
     * @return Number of classes excluded from the average FBeta
     */
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/ScatterUpdate.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/ScatterUpdate.java
@ -83,7 +83,7 @@ public class ScatterUpdate implements CustomOp {
     */
    @Override
    public String opName() {
-        return op.opName();
+        return "scatter_update";
    }

    /**
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/config/SRUCellConfiguration.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/config/SRUCellConfiguration.java
@ -28,7 +28,7 @@ public class SRUCellConfiguration {
     NDArray<T>* xt   = INPUT_VARIABLE(0);               // input [batchSize x inSize], batchSize - batch size, inSize - number of features
     NDArray<T>* ct_1 = INPUT_VARIABLE(1);               // previous cell state ct  [batchSize x inSize], that is at previous time step t-1
     NDArray<T>* w    = INPUT_VARIABLE(2);               // weights [inSize x 3*inSize]
-     NDArray<T>* b    = INPUT_VARIABLE(3);               // biases [1 × 2*inSize]
+     NDArray<T>* b    = INPUT_VARIABLE(3);               // biases [1 x 2*inSize]

     NDArray<T>* ht   = OUTPUT_VARIABLE(0);              // current cell output [batchSize x inSize], that is at current time step t
     NDArray<T>* ct   = OUTPUT_VARIABLE(1);              // current cell state  [batchSize x inSize], that is at current time step t
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/config/SRUConfiguration.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/config/SRUConfiguration.java
@ -26,7 +26,7 @@ public class SRUConfiguration {
    /**
     * NDArray<T>* input   = INPUT_VARIABLE(0);                // X, input 3d tensor [bS x K x N], N - number of time steps, bS - batch size, K - number of features
     NDArray<T>* weights = INPUT_VARIABLE(1);                // W, 2d tensor of weights [3K x K]
-     NDArray<T>* bias    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 × 2*K]
+     NDArray<T>* bias    = INPUT_VARIABLE(2);                // B, row of biases with twice length [1 x 2*K]
     NDArray<T>* init    = INPUT_VARIABLE(3);                // C_{0}, 2d tensor of initial state [bS x K] at time t=0

     */
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdaDeltaUpdater.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdaDeltaUpdater.java
@ -103,7 +103,7 @@ public class AdaDeltaUpdater implements GradientUpdater<AdaDelta> {
        double epsilon = config.getEpsilon();

        //Line 4 of Algorithm 1: https://arxiv.org/pdf/1212.5701v1.pdf
-        //E[g^2]_t = rho * E[g^2]_{t−1} + (1-rho)*g^2_t
+        //E[g^2]_t = rho * E[g^2]_{t-1} + (1-rho)*g^2_t
        msg.muli(rho).addi(gradient.mul(gradient).muli(1 - rho));

        //Calculate update: