Add new clion rules, fix batch norml

2021-02-09 07:44:23 +09:00 · 2021-02-09 07:44:23 +09:00 · 5bd386a4f9
commit 5bd386a4f9
parent 968eaad2dd
8 changed files with 232 additions and 186 deletions
--- a/libnd4j/blas/CMakeLists.txt
+++ b/libnd4j/blas/CMakeLists.txt
@ -373,7 +373,11 @@ elseif(SD_CPU)
    foreach (_variableName ${_variableNames})
        message(STATUS "${_variableName}=${${_variableName}}")
    endforeach()
+
+    #This breaks the build. Normally you want to run tests anyways.
+    if(NOT "$ENV{CLION_IDE}")
        target_link_libraries(${SD_LIBRARY_NAME} ${MKLDNN}  ${MKLDNN_LIBRARIES} ${ARMCOMPUTE_LIBRARIES} ${OPENBLAS_LIBRARIES} ${BLAS_LIBRARIES} ${CPU_FEATURES})
+    endif()

    if ("${SD_ALL_OPS}" AND "${SD_BUILD_MINIFIER}")
        message(STATUS "Building minifier...")
--- a/libnd4j/include/ops/declarable/generic/nn/fusedBatchNorm.cpp
+++ b/libnd4j/include/ops/declarable/generic/nn/fusedBatchNorm.cpp
@ -26,7 +26,7 @@
 #include <ops/declarable/CustomOperations.h>

 namespace sd {
-namespace ops {
+    namespace ops {

        DECLARE_TYPES(fused_batch_norm) {
            getOpDescriptor()
@ -34,7 +34,7 @@ namespace ops {
                    ->setAllowedOutputTypes({ALL_FLOATS});
        }

-CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
+        CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
            auto x      = INPUT_VARIABLE(0);                 // [bS,iH,iW,iD] (NHWC) or [bS,iD,iH,iW] (NCHW)
            auto scale  = INPUT_VARIABLE(1);                 // [iD]
            auto offset = INPUT_VARIABLE(2);                 // [iD]
@ -61,11 +61,14 @@ CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
                iW = x->sizeAt(2);
            }

+            auto xCast = x->cast(sd::DataType::FLOAT32);
+
+
            REQUIRE_TRUE(scale->rankOf() == 1  && scale->sizeAt(0)  == iD, 0, "CUSTOM_OP fused_batch_norm: wrong shape of input scale array, expected is [%i], but got %s instead", iD, ShapeUtils::shapeAsString(scale).c_str());
            REQUIRE_TRUE(offset->rankOf() == 1 && offset->sizeAt(0) == iD, 0, "CUSTOM_OP fused_batch_norm: wrong shape of input offset array, expected is [%i], but got %s instead", iD, ShapeUtils::shapeAsString(offset).c_str());

            NDArray *mean(nullptr), *variance(nullptr);
-    if(!isTraining){
+            if(!isTraining) {
                mean     = INPUT_VARIABLE(3);
                variance = INPUT_VARIABLE(4);
                REQUIRE_TRUE(mean->rankOf() == 1     && mean->sizeAt(0) == iD,     0, "CUSTOM_OP fused_batch_norm: wrong shape of input mean array, expected is [%i], but got %s instead", iD, ShapeUtils::shapeAsString(mean).c_str());
@ -74,36 +77,38 @@ CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
            else {
                //REQUIRE_TRUE(block.width() == 3, 0, "CUSTOM_OP fused_batch_norm: when isTraining=true then number of input arrays must be equal to 3, but got %i instead !", block.width());
                std::vector<Nd4jLong> shape = {iD};
-        mean = NDArrayFactory::create_(scale->ordering(), shape, scale->dataType(), block.launchContext());
-        variance = NDArrayFactory::create_(scale->ordering(), shape, scale->dataType(), block.launchContext());
+                mean = NDArrayFactory::create_(scale->ordering(), shape, sd::DataType::FLOAT32, block.launchContext());
+                variance = NDArrayFactory::create_(scale->ordering(), shape, sd::DataType::FLOAT32, block.launchContext());
            }

-    // FIXME: double?
-    double epsilon;
-    if(block.getTArguments()->size() > 0)
-        epsilon = T_ARG(0) > 1.001e-5 ? T_ARG(0) : 1.001e-5;
-    else
-        epsilon = 0.001;
+
+            float epsilon;
+            if(block.getTArguments()->size() > 0) {
+                epsilon = (float) (T_ARG(0) > 1.001e-5 ? T_ARG(0) : 1.001e-5);
+            }
+            else {
+                epsilon = 0.001f;
+            }

            const int restSize = x->lengthOf() / iD;
-    auto xAffected = NDArrayFactory::create(x->ordering(), {restSize, iD}, mean->dataType(), block.launchContext());
-    xAffected.assign(x);
+
+            auto xAffected = NDArrayFactory::create(x->ordering(), {restSize, iD}, sd::DataType::FLOAT32, block.launchContext());
+            xAffected.assign(xCast);

            const int restSizeMinusOne = (restSize > 1) ? (restSize - 1) : 1;
-    // FIXME: float?
-    const double restSizeInv = 1.0 / restSize;
-    const double restSizeAdjust = (double)restSize / restSizeMinusOne;
+            const float restSizeInv = 1.0f / restSize;
+            const float restSizeAdjust = (float)restSize / restSizeMinusOne;

            if(isTraining) {
                auto sum = xAffected.reduceAlongDimension(reduce::Sum, {0});
                sum *=  restSizeInv;
                mean->assign(sum);
                *batchMean = *mean;
-        //delete sum;
            }
            else
                *batchMean = 0.;

+            auto xCentered = xAffected - *mean;
            xAffected -= *mean;

            if(isTraining) {
@ -112,13 +117,17 @@ CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
                auto sum = xAffected.reduceAlongDimension(reduce::Sum, {0});
                sum *=  restSizeInv;
                variance->assign(sum);
-        *batchVar  = (*variance) * restSizeAdjust;
-        //delete sum;
+                auto varOutput   = (*variance) * restSizeAdjust;
+                batchVar->assign(varOutput);
            }
            else
                *batchVar  = 0.;
-    xAffected *= (*variance + epsilon).transform(transform::RSqrt) * (*scale) + (*offset);
-    y->assign( xAffected );
+
+            auto scaledVariance =  ((*variance + epsilon).transform(transform::RSqrt) * (*scale)).cast(xAffected.dataType());
+            auto xScaled1 = xCentered * scaledVariance;
+            auto xShifted1 = xScaled1 + *offset;
+
+            y->assign(xShifted1);

            if(isTraining) {
                delete mean;
@ -126,11 +135,11 @@ CUSTOM_OP_IMPL(fused_batch_norm, 3, 3, false, 0, 2) {
            }

            return Status::OK();
-}
+        }



-DECLARE_SHAPE_FN(fused_batch_norm) {
+        DECLARE_SHAPE_FN(fused_batch_norm) {
            auto xShapeInfo     = inputShape->at(0);
            auto scaleShapeInfo = inputShape->at(1);

@ -146,12 +155,9 @@ DECLARE_SHAPE_FN(fused_batch_norm) {
            COPY_SHAPE(scaleShapeInfo, batchVarShapeInfo);

            return SHAPELIST(CONSTANT(outShapeInfo), CONSTANT(batchMeanShapeInfo), CONSTANT(batchVarShapeInfo));
-}
+        }
        
-
-
-
-}
+    }
 }

 #endif
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/FusedBatchNorm.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/FusedBatchNorm.java
@ -87,9 +87,12 @@ public class FusedBatchNorm extends DynamicCustomOp {
    }

    @Override
-    public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes){
+    public List<DataType> calculateOutputDataTypes(List<DataType> inputDataTypes) {
        int n = args().length;
        Preconditions.checkState(inputDataTypes != null && inputDataTypes.size() == n, "Expected %s input data types for %s, got %s", n, getClass(), inputDataTypes);
+        if(!dArguments.isEmpty()) {
+            return Arrays.asList(dArguments.get(0),dArguments.get(0),dArguments.get(0));
+        }
        return Arrays.asList(outputDataType == null ? DataType.FLOAT : outputDataType,
                outputDataType == null ? DataType.FLOAT : outputDataType,
                outputDataType == null ? DataType.FLOAT : outputDataType);
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/imports/TFGraphs/TFGraphTestAllSameDiff.java
@ -69,10 +69,8 @@ public class TFGraphTestAllSameDiff {   //Note: Can't extend BaseNd4jTest here a
     * the status of the test failing. No tests will run.
     */
    public final static List<String> EXECUTE_ONLY_MODELS = Arrays.asList(
-               "max_pool_with_argmax/int32_int64_padding_SAME",
-            //      "fused_batch_norm/float32_nhwc",
-                 "max_pool_with_argmax/int64_int64_padding_SAME"
-            //      "fused_batch_norm/float16_nhwc",
+                  "fused_batch_norm/float32_nhwc"
+           // , "fused_batch_norm/float16_nhwc"

    );

@ -86,9 +84,6 @@ public class TFGraphTestAllSameDiff {   //Note: Can't extend BaseNd4jTest here a
            // Still failing 2020/04/27 java.lang.IllegalStateException: Could not find class for TF Ops: TruncateMod
            "truncatemod/.*",

-            //Still failing as of 2019/09/11 - https://github.com/deeplearning4j/deeplearning4j/issues/6464 - not sure if related to: https://github.com/deeplearning4j/deeplearning4j/issues/6447
-            "cnn2d_nn/nhwc_b1_k12_s12_d12_SAME",
-
            //2019/09/11 - No tensorflow op found for SparseTensorDenseAdd
            // 2020/04/27 java.lang.IllegalStateException: Could not find class for TF Ops: SparseTensorDenseAdd
            "confusion/.*",
--- a/nd4j/samediff-import/samediff-import-tensorflow/src/main/kotlin/org/nd4j/samediff/frameworkimport/tensorflow/definitions/TensorflowOpDeclarations.kt
+++ b/nd4j/samediff-import/samediff-import-tensorflow/src/main/kotlin/org/nd4j/samediff/frameworkimport/tensorflow/definitions/TensorflowOpDeclarations.kt
@ -958,7 +958,7 @@ val fusedBatchnormV1 = TensorflowMappingProcess(
                "offset" to "offset","mean" to "mean","variance" to "variance"))),
        inputFrameworkOpName = "FusedBatchNorm",
        opMappingRegistry = tensorflowOpRegistry,
-        attributeMappingRules = listOf(valueMapping(mutableMapOf("epsilon" to "epsilon")),
+        attributeMappingRules = listOf(valueMapping(mutableMapOf("epsilon" to "epsilon","dtype" to "T")),
                invertBooleanNumber(mutableMapOf("isTraining" to "is_training")),
                stringEqualsRule(outputAttribute = "dataFormat",inputFrameworkAttributeName = "data_format",valueToTest = "NCHW",argumentIndex = 0))
 )
@ -971,7 +971,7 @@ val fusedBatchnormV2 = TensorflowMappingProcess(
                "offset" to "offset","mean" to "mean","variance" to "variance"))),
        inputFrameworkOpName = "FusedBatchNormV2",
        opMappingRegistry = tensorflowOpRegistry,
-        attributeMappingRules = listOf(valueMapping(mutableMapOf("epsilon" to "epsilon")),
+        attributeMappingRules = listOf(valueMapping(mutableMapOf("epsilon" to "epsilon","dtype" to "T")),
                invertBooleanNumber(mutableMapOf("isTraining" to "is_training")),
                stringEqualsRule(outputAttribute = "dataFormat",inputFrameworkAttributeName = "data_format",valueToTest = "NCHW",argumentIndex = 0))
 )
@ -983,7 +983,7 @@ val fusedBatchnormV3 = TensorflowMappingProcess(
                "offset" to "offset","mean" to "mean","variance" to "variance"))),
        inputFrameworkOpName = "FusedBatchNormV3",
        opMappingRegistry = tensorflowOpRegistry,
-        attributeMappingRules = listOf(valueMapping(mutableMapOf("epsilon" to "epsilon")),
+        attributeMappingRules = listOf(valueMapping(mutableMapOf("epsilon" to "epsilon","dtype" to "T")),
                invertBooleanNumber(mutableMapOf("isTraining" to "is_training")),
                stringEqualsRule(outputAttribute = "dataFormat",inputFrameworkAttributeName = "data_format",valueToTest = "NCHW",argumentIndex = 0))
 )
--- a/nd4j/samediff-import/samediff-import-tensorflow/src/main/resources/tensorflow-mapping-ruleset.pbtxt
+++ b/nd4j/samediff-import/samediff-import-tensorflow/src/main/resources/tensorflow-mapping-ruleset.pbtxt
@ -8367,10 +8367,16 @@ mappings {
    functionName: "valuemapping"
    inputFloatName: "epsilon"
    outputDoubleName: "epsilon"
+    inputDataTypeName: "T"
+    outputDataTypeName: "dtype"
    inputToOutput {
      key: "epsilon"
      value: "epsilon"
    }
+    inputToOutput {
+      key: "dtype"
+      value: "T"
+    }
    ruleType: "attribute"
    inputFrameworkOpName: "FusedBatchNorm"
  }
@ -12480,10 +12486,16 @@ mappings {
    functionName: "valuemapping"
    inputFloatName: "epsilon"
    outputDoubleName: "epsilon"
+    inputDataTypeName: "T"
+    outputDataTypeName: "dtype"
    inputToOutput {
      key: "epsilon"
      value: "epsilon"
    }
+    inputToOutput {
+      key: "dtype"
+      value: "T"
+    }
    ruleType: "attribute"
    inputFrameworkOpName: "FusedBatchNormV3"
  }
@ -13056,10 +13068,16 @@ mappings {
    functionName: "valuemapping"
    inputFloatName: "epsilon"
    outputDoubleName: "epsilon"
+    inputDataTypeName: "T"
+    outputDataTypeName: "dtype"
    inputToOutput {
      key: "epsilon"
      value: "epsilon"
    }
+    inputToOutput {
+      key: "dtype"
+      value: "T"
+    }
    ruleType: "attribute"
    inputFrameworkOpName: "FusedBatchNormV2"
  }
--- a/nd4j/samediff-import/samediff-import-tensorflow/src/test/kotlin/org/nd4j/samediff/frameworkimport/tensorflow/TestTensorflowIR.kt
+++ b/nd4j/samediff-import/samediff-import-tensorflow/src/test/kotlin/org/nd4j/samediff/frameworkimport/tensorflow/TestTensorflowIR.kt
@ -90,7 +90,9 @@ class TestTensorflowIR {
        //val inputMap = mapOf("image" to Nd4j.ones(1,128,128,4))
        val inputMap = emptyMap<String,INDArray>()
        val tensorflowIRGraph = TensorflowIRGraph(textGraph,tensorflowOps,tfImporter.registry)
-        val outputList = tensorflowIRGraph.nodeList().map { input -> input.nodeName() }.toSet()
+        val outputList = tensorflowIRGraph.nodeList().map { input -> input.nodeName() }.toMutableSet()
+        outputList.add("FusedBatchNormV3:1")
+        outputList.add("FusedBatchNormV3:2")
        val tfGraphRunner = TensorflowIRGraphRunner(tensorflowIRGraph, inputMap.keys.toList(), outputList.toList())
        val importedGraph = TFGraphMapper.importGraph(textGraph)
        val graph = tfImporter.importFromGraph(textGraph,inputMap)
@ -104,7 +106,7 @@ class TestTensorflowIR {
        val names = tensorflowIRGraph.nodeList().map { input -> input.nodeName() }
        val skipValidation = setOf("parallel_stack/ExpandDims/dim")
        //assertEquals(output.keys,output2.keys)
-        val notEquals = HashSet<String>()
+    /*    val notEquals = HashSet<String>()
        names.forEach {
            val value = output[it]
            val value2 = output2[it]
@ -115,9 +117,9 @@ class TestTensorflowIR {
                val newVar = graph.variables[it]
                notEquals.add(it)
            }
-        }
+        }*/

-        println(notEquals)
+        //println(notEquals)

        // assertEquals(output,output2)
        //assertEquals(tfOutput,output)
--- a/nd4j/samediff-import/samediff-import-tensorflow/tensorflow-processes.pbtxt
+++ b/nd4j/samediff-import/samediff-import-tensorflow/tensorflow-processes.pbtxt
@ -8367,10 +8367,16 @@ mappings {
    functionName: "valuemapping"
    inputFloatName: "epsilon"
    outputDoubleName: "epsilon"
+    inputDataTypeName: "T"
+    outputDataTypeName: "dtype"
    inputToOutput {
      key: "epsilon"
      value: "epsilon"
    }
+    inputToOutput {
+      key: "dtype"
+      value: "T"
+    }
    ruleType: "attribute"
    inputFrameworkOpName: "FusedBatchNorm"
  }
@ -12480,10 +12486,16 @@ mappings {
    functionName: "valuemapping"
    inputFloatName: "epsilon"
    outputDoubleName: "epsilon"
+    inputDataTypeName: "T"
+    outputDataTypeName: "dtype"
    inputToOutput {
      key: "epsilon"
      value: "epsilon"
    }
+    inputToOutput {
+      key: "dtype"
+      value: "T"
+    }
    ruleType: "attribute"
    inputFrameworkOpName: "FusedBatchNormV3"
  }
@ -13056,10 +13068,16 @@ mappings {
    functionName: "valuemapping"
    inputFloatName: "epsilon"
    outputDoubleName: "epsilon"
+    inputDataTypeName: "T"
+    outputDataTypeName: "dtype"
    inputToOutput {
      key: "epsilon"
      value: "epsilon"
    }
+    inputToOutput {
+      key: "dtype"
+      value: "T"
+    }
    ruleType: "attribute"
    inputFrameworkOpName: "FusedBatchNormV2"
  }