From 352f1eee802475e88ba8b30a757f4b965875afa3 Mon Sep 17 00:00:00 2001
From: shugeo <sgazeos@gmail.com>
Date: Wed, 9 Oct 2019 21:39:59 +0300
Subject: [PATCH] Implemented fake_quant_with_min_max_per_channel helper for
 cpu platform. The first approach.

---
 .../helpers/cpu/fake_quantization.cpp         | 75 +++++++++++++++----
 .../layers_tests/DeclarableOpsTests10.cpp     | 19 +++--
 2 files changed, 73 insertions(+), 21 deletions(-)
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp
index 88c451ffb..a2d0c3c59 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp
@@ -25,6 +25,55 @@ namespace nd4j {
 namespace ops {
 namespace helpers {
 
+    template <typename T>
+    static void Nudge(T min, T max, T quant_min, T quant_max, T* scale, T* nudged_min, T* nudged_max) {
+        *scale = (max - min) / (quant_max - quant_min);
+        auto zero_point_from_min = quant_min - min / *scale;
+        uint16_t const nudged_zero_point = [zero_point_from_min, quant_min, quant_max] {
+                if (zero_point_from_min < quant_min) {
+                    return static_cast<uint16_t>(quant_min);
+                }
+                if (zero_point_from_min > quant_max) {
+                    return static_cast<uint16_t>(quant_max);
+                }
+                return nd4j::math::nd4j_round<T,uint16_t>(zero_point_from_min);
+            }();
+            *nudged_min = (quant_min - nudged_zero_point) * (*scale);
+            *nudged_max = (quant_max - nudged_zero_point) * (*scale);
+    }
+
+    template <typename T>
+    void fakeQuantWithMinMaxVarsPerChannel_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
+        int lowIntBound = narrowed ? 1 : 0;
+        int upperIntBound = 1 << numBits - 1;
+
+        const float quant_min_float = static_cast<float>(lowIntBound);
+        const float quant_max_float = static_cast<float>(upperIntBound);
+//        auto scaleTensor(*input); // = NDArrayFactory::create(input->ordering(), input->getShapeAsVector(), input->getWorkspace());
+        auto clamped(*input); // = NDArrayFactory::create(input->ordering(), input->getShapeAsVector(), input->getWorkspace());
+        for (auto i = 0; i < min->lengthOf(); i++) {
+            T scale, nudged_min, nudged_max;
+            Nudge<T>(min->t<T>(i), max->t<T>(i), quant_min_float, quant_max_float, &scale, &nudged_min, &nudged_max);
+            auto wiseMinMax = LAMBDA_T(x, nudged_min, nudged_max) {
+                if (x < nudged_min) {
+                    return nudged_min;
+                }
+                else if (x > nudged_max)
+                    return nudged_max;
+                return x;
+            };
+//            scaleTensor.assign(scale);
+            input->applyLambda<T>(wiseMinMax, &clamped);
+            clamped -= nudged_min;
+            // auto nudgedScale = scale;
+            clamped /= scale;
+            clamped += T(0.5f);
+            clamped.applyTransform(transform::Floor, output, nullptr);
+            (*output) *= scale;
+            (*output) += nudged_min;
+        }
+    }
+
     template <typename T>
     void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
         int lowIntBound = narrowed ? 1 : 0;
@@ -35,15 +84,15 @@ namespace helpers {
         T scale = (max->t<T>(0) - min->t<T>(0)) / (quant_max_float - quant_min_float);
         const T zero_point_from_min = quant_min_float - min->e<T>(0) / scale;
         const uint16_t nudged_zero_point = [zero_point_from_min, lowIntBound,
-                                        quant_min_float, upperIntBound,
-                                        quant_max_float] {
-        if (zero_point_from_min < quant_min_float) {
-          return static_cast<uint16_t>(lowIntBound);
-        }
-        if (zero_point_from_min > quant_max_float) {
-          return static_cast<uint16_t>(upperIntBound);
-        }
-        return static_cast<uint16_t>(roundf(zero_point_from_min));
+                quant_min_float, upperIntBound,
+                quant_max_float] {
+            if (zero_point_from_min < quant_min_float) {
+                return static_cast<uint16_t>(lowIntBound);
+            }
+            if (zero_point_from_min > quant_max_float) {
+                return static_cast<uint16_t>(upperIntBound);
+            }
+            return static_cast<uint16_t>(roundf(zero_point_from_min));
         }();
 
         auto nudged_min = (quant_min_float - nudged_zero_point) * (scale);
@@ -71,10 +120,10 @@ namespace helpers {
         clamped.applyLambda<T>(wiseMax, output);
 //        const auto clamped_shifted = clamped - nudged_min;
         *output -= nudged_min;
-       // auto nudgedScale = scale;
+        // auto nudgedScale = scale;
         (*output) /= scaleTensor;
-        (*output) += T(0.5f);
-        output->applyTransform(transform::Floor, nullptr, nullptr);
+//        (*output) += T(0.5f);
+        output->applyTransform(transform::Round, nullptr, nullptr);
         (*output) *= scaleTensor;
         (*output) += nudged_min;
         //output->printIndexedBuffer("FAKE QUANTED");
@@ -94,7 +143,7 @@ namespace helpers {
         BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES);
     }
     void fakeQuantWithMinMaxVarsPerChannel(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
-        BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES);
+        BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVarsPerChannel_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES);
     }
 
     BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVars_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES);
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp
index 191ee8524..a6edb23c7 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp
@@ -2159,8 +2159,8 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_3) {
 
     NDArray x = NDArrayFactory::create<double>('c', {1,2,3,1}, {-63.80, -63.75, -63.4, -63.5, 0.0, 0.1});
     NDArray exp = NDArrayFactory::create<double>('c', {1,2,3,1},  {-63.75, -63.75, -63.251953, -63.251953, 0.0, 0.0});
-    NDArray min = NDArrayFactory::create<double>(-63.65);
-    NDArray max = NDArrayFactory::create<double>(0.1);
+    NDArray min = NDArrayFactory::create<double>('c', {1},{-63.65});
+    NDArray max = NDArrayFactory::create<double>('c', {1}, {0.1});
 
     nd4j::ops::fake_quant_with_min_max_vars_per_channel op;
     auto results = op.execute({&x, &min, &max}, {}, {});
@@ -2178,8 +2178,8 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_3) {
 ////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) {
 
-    NDArray x = NDArrayFactory::create<double>('c', {2,4,5,3});
-    NDArray exp = NDArrayFactory::create<double>('c', {2,4,5,3},
+    NDArray x = NDArrayFactory::create<float>('c', {2,4,5,3});
+    NDArray exp = NDArrayFactory::create<float>('c', {2,4,5,3},
             {1.0588236,  1.9607843,  3.019608,  4.0588236,  5.098039,  6.039216,  7.0588236,  8.039216,  9.058824,
                  10.058824,  10.980392,  12.078432, 13.058824,  13.921569, 15.09804,  16.058825,  17.058825, 18.117647,
                  19.058825,  20.,        21.137257, 22.058825,  22.941177, 23.882355, 25.058825,  26.078432, 26.901962,
@@ -2194,16 +2194,19 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) {
                  45.,       50.,         70.,       45.,        50.,       70.,       45.,        50.,       70.,
                  45.,       50.,         70.,       45.,        50.,       70.,       45.,        50.,       70.,
                  45.,       50.,        70.});
-    NDArray min = NDArrayFactory::create<double>({20., 20., 20.});
-    NDArray max = NDArrayFactory::create<double>({65., 70., 90.});
-
+    NDArray min = NDArrayFactory::create<float>({20., 20., 20.});
+    NDArray max = NDArrayFactory::create<float>({65., 70., 90.});
+    x.linspace(1.);
     nd4j::ops::fake_quant_with_min_max_vars_per_channel op;
     auto results = op.execute({&x, &min, &max}, {}, {});
 
     ASSERT_EQ(ND4J_STATUS_OK, results->status());
 
     auto result = results->at(0);
-    // result->printIndexedBuffer("Quantized2");
+    result->printBuffer("Quantized per channels 4");
+    exp.printBuffer("Quantized per channest E");
+    auto diff = *result - exp;
+    diff.printIndexedBuffer("Difference");
     ASSERT_TRUE(exp.isSameShapeStrict(result));
     ASSERT_TRUE(exp.equalsTo(result));