Merge pull request #10 from KonduitAI/shugeo_fake_quant2

Shugeo fake quant2
2019-10-10 18:40:02 +03:00 · 2019-10-10 18:40:02 +03:00 · d5568df7b3
commit d5568df7b3
parent fa8105bf0f ace65355c5
6 changed files with 360 additions and 100 deletions
--- a/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp
@ -0,0 +1,71 @@
 /*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/
 //
 // @author George Shulinok <sgazeos@gmail.com>, created on 08.10.2019
 //
 #include <op_boilerplate.h>
 #if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars_per_channel)
 #include <ops/declarable/CustomOperations.h>
 #include <ops/declarable/helpers/fake_quantization.h>
 namespace nd4j {
    namespace ops {
        CONFIGURABLE_OP_IMPL(fake_quant_with_min_max_vars_per_channel, 1, 1, true, 0, 0) {
            auto x = INPUT_VARIABLE(0);
            auto min = INPUT_VARIABLE(1);
            auto max = INPUT_VARIABLE(2);
            REQUIRE_TRUE(block.width() == 3 || block.getTArguments()->size() == 2, 0, "fake_quant_with_min_max_vars_per_channel: No minimum/maximum values provided by either input arrays or TArgs");
            auto depth = x->sizeAt(-1);
            REQUIRE_TRUE(min->rankOf() == 1 && max->rankOf() == 1 && min->lengthOf() == max->lengthOf(), 0,
                    "fake_quant_with_min_max_vars_per_channel: Min and Max should be 1D tensors with the same length");
            REQUIRE_TRUE(depth == min->lengthOf(), 0, "fake_quant_with_min_max_vars_per_channel: Min length should be"
                                                      " %lld, but %lld occurs.", depth, min->lengthOf());
            REQUIRE_TRUE(depth == max->lengthOf(), 0, "fake_quant_with_min_max_vars_per_channel: Max length should be"
                                                      "%lld, but %lld occurs.", depth, max->lengthOf());
            auto output  = OUTPUT_VARIABLE(0);
            int numBits = 8;
            if (block.getIArguments() && block.getIArguments()->size())
                numBits = INT_ARG(0);
            bool narrowed = false;
            //INT_ARG(1);
            if (block.getIArguments()->size() == 2) {
                numBits = INT_ARG(0);
                narrowed = INT_ARG(1);
                REQUIRE_TRUE(numBits > 1 && numBits < 17, 0, "fake_quant_with_min_max_vars_per_channel: Number of bits"
                                                             " for quatization should be in between 2 and 16, but %i "
                                                             "was given.", numBits);
            }
            helpers::fakeQuantWithMinMaxVarsPerChannel(block.launchContext(), x, min, max, numBits, narrowed, output);
            return ND4J_STATUS_OK;
        }
        DECLARE_TYPES(fake_quant_with_min_max_vars_per_channel) {
            getOpDescriptor()
            -> setAllowedOutputTypes({ALL_FLOATS})
            -> setAllowedInputTypes({ALL_INTS, ALL_FLOATS});
        }
        DECLARE_SYN(fake_quant_with_min_max_args_per_channel, fake_quant_with_min_max_vars_per_channel);
    }
 }
 #endif
--- a/libnd4j/include/ops/declarable/headers/parity_ops.h
+++ b/libnd4j/include/ops/declarable/headers/parity_ops.h
@ -1748,6 +1748,25 @@ namespace nd4j {
        DECLARE_CONFIGURABLE_OP(fake_quant_with_min_max_vars, 3, 1, true, 0, -2);
        #endif
 /**
         * fake_quant_with_min_max_vals_per_channel - tf.quantization.fake_quant_with_min_max_vars_per_channel
         *
         * input params:
         *    0 - NDArray (input) - at least 2D.
         *    1 - 1D Tensor - min values (min length equals to last dim of input)
         *    2 - 1D Tensor - max value (length equals to min)
         *
         * int params (optional):
         *    0 - num_bits (allowed interval [2, 16], default 8)
         *    1 - narrow_range (default False)
         *
         * output:
         *    0 - NDArray with the same shape as input
         */
        #if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars_per_channel)
                DECLARE_CONFIGURABLE_OP(fake_quant_with_min_max_vars_per_channel, 3, 1, true, 0, -2);
        #endif
        /**
         * compare_and_bitpack - compare with greater and pack result with uint8 
         *
--- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp
@ -25,74 +25,89 @@ namespace nd4j {
 namespace ops {
 namespace helpers {
    //
    // nudge - nudged min max over scale
    // scale = (Max - Min) / (quantMax - quantMin)
    // quantMin = 0 or 1, quantMax = 2^b - 1 == (1 << b) - 1
    //
    template <typename T>
    static void nudge(T min, T max, int quantMin, int quantMax, T* scale, T* nudgedMin, T* nudgedMax) {
        // floating point instead integers
        T quantMaxF = static_cast<T>(quantMax);
        T quantMinF = static_cast<T>(quantMin);
        // compute scale
        *scale = (max - min) / (quantMaxF - quantMinF);
        // compute left bound point
        auto zeroPointFromMin = quantMinF - min / *scale;
        // bound zero point to conform with range [0 or 1, 2^b - 1]
        uint16_t const nudged_zero_point = [zeroPointFromMin, quantMin, quantMax, quantMaxF, quantMinF] {
                if (zeroPointFromMin < quantMinF) {
                    return static_cast<uint16_t>(quantMin);
                }
                if (zeroPointFromMin > quantMaxF) {
                    return static_cast<uint16_t>(quantMax);
                }
                return nd4j::math::nd4j_round<T,uint16_t>(zeroPointFromMin);
        }();
        // compute nudged min and max with computed nudged zero point
        *nudgedMin = (quantMinF - nudged_zero_point) * (*scale);
        *nudgedMax = (quantMaxF - nudged_zero_point) * (*scale);
    }
    template <typename T>
    void fakeQuantWithMinMaxVarsPerChannel_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
        int lowIntBound = narrowed ? 1 : 0; // 0 or 1
        int upperIntBound = (1 << numBits) - 1; // 2^b - 1
        auto channels = input->sizeAt(-1); // last dimension
        PRAGMA_OMP_PARALLEL_FOR
        for (auto i = 0; i < channels; i++) {
            T scale, nudged_min, nudged_max;
            // nudge min and max first, with scale computing
            nudge<T>(min->t<T>(i), max->t<T>(i), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max);
            // slide using last dimension and process all for given channel
            for (auto e = 0; e < input->lengthOf(); e += channels) {
                T val = input->t<T>(e + i);
                if ( val <= nudged_min)
                    val = nudged_min;
                else if (val >= nudged_max)
                    val = nudged_max;
                // quantization itself
                output->t<T>(e + i) = math::nd4j_floor<T,T>((val - nudged_min)/scale + T(0.5)) * scale + nudged_min;
            }
        }
    }
    template <typename T>
    void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
        int lowIntBound = narrowed ? 1 : 0;
-        int upperIntBound = 1 << numBits - 1;
+        int upperIntBound = (1 << numBits) - 1;
-        const float quant_min_float = static_cast<float>(lowIntBound);
+        T nudgedMin, nudgedMax, scale;
-        const float quant_max_float = static_cast<float>(upperIntBound);
+        // nudge with given min and max and compute scale and nudged min and max
-        T scale = (max->t<T>(0) - min->t<T>(0)) / (quant_max_float - quant_min_float);
+        nudge<T>(min->t<T>(0), max->t<T>(0), lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax);
-        const T zero_point_from_min = quant_min_float - min->e<T>(0) / scale;
+        // quantization as one
-        const uint16_t nudged_zero_point = [zero_point_from_min, lowIntBound,
+        auto fakeQuantizationWithMinMax = LAMBDA_T(x, nudgedMin, nudgedMax, scale) {
-                                        quant_min_float, upperIntBound,
+            T val = x; // boundign value between nudged min and max
-                                        quant_max_float] {
+            if (val < nudgedMin) {
-        if (zero_point_from_min < quant_min_float) {
+                val = nudgedMin;
          return static_cast<uint16_t>(lowIntBound);
            }
-        if (zero_point_from_min > quant_max_float) {
+            else if (val > nudgedMax)
-          return static_cast<uint16_t>(upperIntBound);
+                val = nudgedMax;
-        }
+            // converse value with scale and shifted with nudged min
-        return static_cast<uint16_t>(roundf(zero_point_from_min));
+            return (nd4j::math::nd4j_floor<T,T>((val - nudgedMin)/scale + T(0.5)) * scale + nudgedMin);
        }();
        auto nudged_min = (quant_min_float - nudged_zero_point) * (scale);
        auto nudged_max = (quant_max_float - nudged_zero_point) * (scale);
        //input->applyScalar(scalar::CompareAndSet, nudged_max, clamped, nullptr); //.cwiseMin(nudged_max).cwiseMax(nudged_min);
        //input->applyScalar(scalar::CompareAndSet, nudged_min, clamped, nullptr); //.cwiseMin(nudged_max).cwiseMax(nudged_min);
        auto wiseMax = LAMBDA_T(x, nudged_min) {
            if (x < nudged_min) {
                return nudged_min;
            }
            return x;
        };
        auto wiseMin = LAMBDA_T(x, nudged_max) {
            if (x > nudged_max) {
                return nudged_max;
            }
            return x;
        };
        auto scaleTensor(*input); // = NDArrayFactory::create(input->ordering(), input->getShapeAsVector(), input->getWorkspace());
        auto clamped(*input); // = NDArrayFactory::create(input->ordering(), input->getShapeAsVector(), input->getWorkspace());
        scaleTensor.assign(scale);
        input->applyLambda<T>(wiseMin, &clamped);
 //        const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
        clamped.applyLambda<T>(wiseMax, output);
 //        const auto clamped_shifted = clamped - nudged_min;
        *output -= nudged_min;
       // auto nudgedScale = scale;
        (*output) /= scaleTensor;
        (*output) += T(0.5f);
        output->applyTransform(transform::Floor, nullptr, nullptr);
        (*output) *= scaleTensor;
        (*output) += nudged_min;
        //output->printIndexedBuffer("FAKE QUANTED");
        /*
    const auto nudged_scale_repl = inputs.constant(nudged_scale);
    const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
    const auto clamped_shifted = clamped - nudged_min;
    *output = (clamped_shifted / nudged_scale_repl + 0.5f).floor() *
                            nudged_scale_repl +
                        nudged_min;
 */
        input->applyLambda<T>(fakeQuantizationWithMinMax, output);
    }
    void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
        BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES);
    }
    void fakeQuantWithMinMaxVarsPerChannel(LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
        BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVarsPerChannel_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES);
    }
    BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVars_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES);
 }
--- a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu
@ -33,65 +33,105 @@ namespace helpers {
 // narrowed - shrink is true
 // output - output tensor
 //
    template <typename T>
    static __host__ __device__ void
    nudge(T min, T max, int quantMin, int quantMax, T* scale, T* nudgedMin, T* nudgedMax) {
        T quantMaxF = static_cast<T>(quantMax);
        T quantMinF = static_cast<T>(quantMin);
        *scale = (max - min) / (quantMaxF - quantMinF);
        auto zeroPointFromMin = quantMinF - min / *scale;
        uint16_t const nudgedZeroPoint = [zeroPointFromMin, quantMin, quantMax, quantMaxF, quantMinF] {
            if (zeroPointFromMin < quantMinF) {
                return static_cast<uint16_t>(quantMin);
            }
            if (zeroPointFromMin > quantMaxF) {
                return static_cast<uint16_t>(quantMax);
            }
            return nd4j::math::nd4j_round<T,uint16_t>(zeroPointFromMin);
        }();
        *nudgedMin = (quantMinF - nudgedZeroPoint) * (*scale);
        *nudgedMax = (quantMaxF - nudgedZeroPoint) * (*scale);
    }
    template <typename T>
    void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
        int lowIntBound = narrowed?1:0;
-        int upperIntBound = 1 << numBits - 1;
+        int upperIntBound = (1 << numBits) - 1;
-        min->syncToHost();
+        min->syncToHost(); // these are scalars, so nothing much happened
        max->syncToHost();
-        const float quant_min_float = static_cast<float>(lowIntBound);
+        T scale, nudgedMin, nudgedMax;
-        const float quant_max_float = static_cast<float>(upperIntBound);
+        nudge(min->t<T>(0), max->t<T>(0), lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax);
        T scale = (max->t<T>(0) - min->t<T>(0)) / (quant_max_float - quant_min_float);
        const T zero_point_from_min = quant_min_float - min->t<T>(0) / scale;
-        const uint16_t nudged_zero_point = [zero_point_from_min, lowIntBound,
+        auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudgedMin, nudgedMax, scale) {
-                                        quant_min_float, upperIntBound,
+            T val = x;
-                                        quant_max_float] {
+            if (x < nudgedMin) {
-            if (zero_point_from_min < quant_min_float) {
+                val = nudgedMin;
                return static_cast<uint16_t>(lowIntBound);
            }
-            if (zero_point_from_min > quant_max_float) {
+            else if (x > nudgedMax) {
-                return static_cast<uint16_t>(upperIntBound);
+                val = nudgedMax;
            }
-            return static_cast<uint16_t>(roundf(zero_point_from_min));
+            else
-        }();
+                val = x;
-
+            return (math::nd4j_floor<T,T>((val - nudgedMin) / scale + T(0.5)) * scale + nudgedMin);
        auto nudged_min = (quant_min_float - nudged_zero_point) * (scale);
        auto nudged_max = (quant_max_float - nudged_zero_point) * (scale);
        auto wiseMax = LAMBDA_T(x, nudged_min) {
            if (x < nudged_min) {
                return nudged_min;
            }
            return x;
        };
-        auto wiseMin = LAMBDA_T(x, nudged_max) {
+        input->applyLambda(wiseMinMaxAndSoOn, output);
            if (x > nudged_max) {
                return nudged_max;
    }
-            return x;
+
    template <typename T>
    static __global__ void fakeQuantWithMinMaxKernel(T* input, Nd4jLong* inputShape, T* min, T* max,
            int lowIntBound, int upperIntBound, Nd4jLong channels,
            T* output, Nd4jLong* outputShape, Nd4jLong length) {
        __shared__ int block;
        if (threadIdx.x == 0) {
            block = length / channels; // to loop with last dimension as block
        }
        __syncthreads();
        for (auto i = blockIdx.x; i < (int)channels; i += gridDim.x) {
            T scale, nudgedMin, nudgedMax;
            nudge(min[i], max[i], lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax);
            // loop over blocks to quantization between nudged min and max
            for (auto b = threadIdx.x; b < block; b += blockDim.x) {
                T val = input[shape::getIndexOffset(b * channels + i, inputShape)];
                if (val < nudgedMin) {
                    val = nudgedMin;
                } else if (val > nudgedMax) {
                    val = nudgedMax;
                }
                output[shape::getIndexOffset(b * channels + i, outputShape)] =
                        (math::nd4j_floor<T, T>((val - nudgedMin) / scale + T(0.5)) * scale + nudgedMin);
            };
        }
    }
-        auto scaleTensor(*input);
+    template <typename T>
-        auto clamped(*input);
+    void fakeQuantWithMinMaxVarsPerChannel_(LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
-        scaleTensor.assign(scale);
+        int lowIntBound = narrowed?1:0;
-        input->applyLambda(wiseMin, &clamped);
+        int upperIntBound = (1 << numBits) - 1;
        auto channels = min->lengthOf();
        auto length = input->lengthOf();
        NDArray::prepareSpecialUse({output}, {min, max, input});
        auto stream = context->getCudaStream();
        T* inputBuf = input->dataBuffer()->specialAsT<T>();
        T* outputBuf = output->dataBuffer()->specialAsT<T>();
        T* minBuf = min->dataBuffer()->specialAsT<T>();
        T* maxBuf = max->dataBuffer()->specialAsT<T>();
        fakeQuantWithMinMaxKernel<<<128, 256, 256, *stream>>>(inputBuf, input->specialShapeInfo(),
                minBuf, maxBuf, lowIntBound, upperIntBound, channels, outputBuf, output->specialShapeInfo(), length);
        NDArray::registerSpecialUse({output}, {min, max, input});
        clamped.applyLambda(wiseMax, output);
        *output -= nudged_min;
        (*output) /= scaleTensor;
        (*output) += T(0.5f);
        output->applyTransform(transform::Floor, nullptr, nullptr);
        (*output) *= scaleTensor;
        (*output) += nudged_min;
    }
    void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
        BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES);
    }
    void fakeQuantWithMinMaxVarsPerChannel(LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
        BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVarsPerChannel_, (context, input, min, max, numBits, narrowed, output), FLOAT_TYPES);
    }
    BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVars_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES);
    BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVarsPerChannel_, (LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES);
 }
 }
--- a/libnd4j/include/ops/declarable/helpers/fake_quantization.h
+++ b/libnd4j/include/ops/declarable/helpers/fake_quantization.h
@ -27,6 +27,7 @@ namespace ops {
 namespace helpers {
    void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output);
    void fakeQuantWithMinMaxVarsPerChannel(LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output);
 }
 }
 }
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp
@ -2117,7 +2117,7 @@ TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_2) {
 TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_1) {
    NDArray x('c', {2,3}, {-63.80f, -63.75f, -63.70f, -63.5f, 0.0f, 0.1f}, nd4j::DataType::FLOAT32);
-    NDArray exp('c', {2,3},  {-63.75f, -63.75f, -63.75f, -63.251953f, 0.0f, 0.0f}, nd4j::DataType::FLOAT32);
+    NDArray exp('c', {2,3},  {-63.75, -63.75, -63.75, -63.5, 0., 0.}, nd4j::DataType::FLOAT32);
    NDArray min('c', {},  {-63.65f}, nd4j::DataType::FLOAT32);
    NDArray max('c', {},  {0.1f}, nd4j::DataType::FLOAT32);
@ -2127,7 +2127,8 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_1) {
    ASSERT_EQ(ND4J_STATUS_OK, results->status());
    auto result = results->at(0);
-    // result->printIndexedBuffer("Quantized");
+//    result->printBuffer("Quantized");
 //    exp.printBuffer("Expected");
    ASSERT_TRUE(exp.isSameShapeStrict(result));
    ASSERT_TRUE(exp.equalsTo(result));
@ -2137,7 +2138,7 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_1) {
 TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_2) {
    NDArray x = NDArrayFactory::create<double>('c', {2,3}, {-63.80, -63.75, -63.4, -63.5, 0.0, 0.1});
-    NDArray exp = NDArrayFactory::create<double>('c', {2,3},  {-63.75, -63.75, -63.251953, -63.251953, 0.0, 0.0});
+    NDArray exp = NDArrayFactory::create<double>('c', {2,3},  {-63.75, -63.75, -63.5 , -63.5 ,   0.  ,   0. });
    NDArray min = NDArrayFactory::create<double>(-63.65);
    NDArray max = NDArrayFactory::create<double>(0.1);
@ -2154,6 +2155,119 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_2) {
    delete results;
 }
 ////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_3) {
    NDArray x = NDArrayFactory::create<double>('c', {1,2,3,1}, {-63.80, -63.75, -63.4, -63.5, 0.0, 0.1});
    NDArray exp = NDArrayFactory::create<double>('c', {1,2,3,1},  {-63.75, -63.75, -63.5 , -63.5 ,   0.  ,   0. });
    NDArray min = NDArrayFactory::create<double>('c', {1},{-63.65});
    NDArray max = NDArrayFactory::create<double>('c', {1}, {0.1});
    nd4j::ops::fake_quant_with_min_max_vars_per_channel op;
    auto results = op.execute({&x, &min, &max}, {}, {});
    ASSERT_EQ(ND4J_STATUS_OK, results->status());
    auto result = results->at(0);
    // result->printIndexedBuffer("Quantized2");
    ASSERT_TRUE(exp.isSameShapeStrict(result));
    ASSERT_TRUE(exp.equalsTo(result));
    delete results;
 }
 ////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) {
    NDArray x = NDArrayFactory::create<float>('c', {2,4,5,3});
    NDArray exp = NDArrayFactory::create<float>('c', {2,4,5,3},{
                  1.0588236,  1.9607843,  3.019608,  4.0588236,  5.098039,  6.039216,  7.0588236,  8.039216,  9.058824,
                 10.058824,  10.980392,  12.078432, 13.058824,  13.921569, 15.09804,  16.058825,  17.058825, 18.117647,
                 19.058825,  20.,        21.137257, 22.058825,  22.941177, 23.882355, 25.058825,  26.078432, 26.901962,
                 28.058825, 29.019608,   29.92157,  31.058825,  31.960785, 32.941177, 34.058823,  35.09804,  35.960785,
                 37.058823, 38.039215,   38.980392, 40.058823,  40.980392, 42.000004, 43.058826,  43.92157,  45.01961,
                 45.,       47.058823,   48.03922,  45.,        50.,       51.058826, 45.,        50.,       54.078434,
                 45.,       50.,         57.09804,  45.,        50.,       60.11765,  45.,        50.,       62.862747,
                 45.,       50.,         65.882355, 45.,        50.,       68.90196,  45.,        50.,       70.,
                 45.,       50.,         70.,       45.,        50.,       70.,       45.,        50.,       70.,
                 45.,       50.,         70.,       45.,        50.,       70.,       45.,        50.,       70.,
                 45.,       50.,         70.,       45.,        50.,       70.,       45.,        50.,       70.,
                 45.,       50.,         70.,       45.,        50.,       70.,       45.,        50.,       70.,
                 45.,       50.,         70.,       45.,        50.,       70.,       45.,        50.,       70.,
                 45.,       50.,        70.});
    NDArray min = NDArrayFactory::create<float>({20., 20., 20.});
    NDArray max = NDArrayFactory::create<float>({65., 70., 90.});
    x.linspace(1.);
    nd4j::ops::fake_quant_with_min_max_vars_per_channel op;
    auto results = op.execute({&x, &min, &max}, {}, {});
    ASSERT_EQ(ND4J_STATUS_OK, results->status());
    auto result = results->at(0);
 //    result->printBuffer("Quantized per channels 4");
 //    exp.printBuffer("Quantized per channest E");
 //    auto diff = *result - exp;
 //    diff.printIndexedBuffer("Difference");
    ASSERT_TRUE(exp.isSameShapeStrict(result));
    ASSERT_TRUE(exp.equalsTo(result));
    delete results;
 }
 TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_5) {
    NDArray x = NDArrayFactory::create<float>('c', {2, 3, 5, 4});
    NDArray exp = NDArrayFactory::create<float>('c', {2, 3, 5, 4},{
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -19.92157   , -18.980392  , -18.039217  , -16.941177  ,
            -16.        , -15.058824  , -13.960785  , -13.0196085 ,
            -11.92157   , -10.980392  , -10.039217  ,  -8.941177  ,
            -8.000001  ,  -7.0588236 ,  -5.960785  ,  -5.0196085 ,
            -3.9215698 ,  -2.9803925 ,  -2.039217  ,  -0.94117737,
            0.        ,   0.94117737,   2.039215  ,   2.9803925 ,
            4.07843   ,   5.0196075 ,   5.960783  ,   7.0588226 ,
            8.        ,   8.941177  ,  10.039215  ,  10.980392  ,
            12.07843   ,  13.019608  ,  13.960783  ,  15.058823  ,
            16.        ,  16.941177  ,  18.039217  ,  18.980392  ,
            20.07843   ,  21.019608  ,  21.960783  ,  23.058823  ,
            20.07843   ,  21.019608  ,  21.960783  ,  23.058823  ,
            20.07843   ,  21.019608  ,  21.960783  ,  23.058823  ,
            20.07843   ,  21.019608  ,  21.960783  ,  23.058823  ,
            20.07843   ,  21.019608  ,  21.960783  ,  23.058823  ,
            20.07843   ,  21.019608  ,  21.960783  ,  23.058823  ,
            20.07843   ,  21.019608  ,  21.960783  ,  23.058823  ,
            20.07843   ,  21.019608  ,  21.960783  ,  23.058823  ,
            20.07843   ,  21.019608  ,  21.960783  ,  23.058823  ,
            20.07843   ,  21.019608  ,  21.960783  ,  23.058823
    });
    NDArray min = NDArrayFactory::create<float>({-20., -19., -18., -17});
    NDArray max = NDArrayFactory::create<float>({20., 21., 22., 23});
    x.linspace(-60.);
    nd4j::ops::fake_quant_with_min_max_vars_per_channel op;
    auto results = op.execute({&x, &min, &max}, {}, {});
    ASSERT_EQ(ND4J_STATUS_OK, results->status());
    auto result = results->at(0);
 //    result->printBuffer("Quantized per channels 5");
 //    exp.printBuffer("Quantized per channest E");
 //    auto diff = *result - exp;
 //    diff.printIndexedBuffer("Difference");
    ASSERT_TRUE(exp.isSameShapeStrict(result));
    ASSERT_TRUE(exp.equalsTo(result));
    delete results;
 }
 ////////////////////////////////////////////////////////////////////
 TYPED_TEST(TypedDeclarableOpsTests10, batchnorm_new_test1) {