From cb56b0b06a3b47d0507da1255df694f3b26d01db Mon Sep 17 00:00:00 2001 From: shugeo Date: Tue, 8 Oct 2019 19:00:41 +0300 Subject: [PATCH 01/15] The first approach for fake_quant_with_min_max_vars_per_channel op implementation. --- ...ke_quant_with_min_max_vars_per_channel.cpp | 73 +++++++++++++++++++ .../ops/declarable/headers/parity_ops.h | 3 + .../helpers/cpu/fake_quantization.cpp | 4 + .../helpers/cuda/fake_quantization.cu | 4 + .../declarable/helpers/fake_quantization.h | 1 + 5 files changed, 85 insertions(+) create mode 100644 libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp new file mode 100644 index 000000000..ba8eb9e7b --- /dev/null +++ b/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp @@ -0,0 +1,73 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author George Shulinok , created on 08.10.2019 +// + +#include +#if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars_per_channel) + +#include +#include +namespace nd4j { + namespace ops { + CONFIGURABLE_OP_IMPL(fake_quant_with_min_max_vars_per_channel, 1, 1, true, 0, 0) { + + auto x = INPUT_VARIABLE(0); + + NDArray* min; + NDArray* max; + + REQUIRE_TRUE(block.width() == 3 || block.getTArguments()->size() == 2, 0, "fake_quant_with_min_max_vars_per_channel: No minimum/maximum values provided by either input arrays or TArgs"); + + NDArray m; + NDArray m2; + if(block.width() == 3){ + min = INPUT_VARIABLE(1); + max = INPUT_VARIABLE(2); + } else if(block.getTArguments()->size() == 2){ + m = NDArrayFactory::create(x->dataType(), T_ARG(0), block.launchContext()); + m2 = NDArrayFactory::create(x->dataType(), T_ARG(1), block.launchContext()); + min = &m; + max = &m2; + } + auto output = OUTPUT_VARIABLE(0); + int numBits = 8; + if (block.getIArguments() && block.getIArguments()->size()) + numBits = INT_ARG(0); + bool narrowed = false; + //INT_ARG(1); + if (block.getIArguments()->size() == 2) { + numBits = INT_ARG(0); + narrowed = INT_ARG(1); + REQUIRE_TRUE(numBits > 1 && numBits < 17, 0, "fake_quant_with_min_max_vars_per_channel: Number of bits for quatization should be in between 2 and 16, but %i was given.", numBits); + } + helpers::fakeQuantWithMinMaxVarsPerChannel(x, min, max, numBits, narrowed, output); + return ND4J_STATUS_OK; + } + + DECLARE_TYPES(fake_quant_with_min_max_vars_per_channel) { + getOpDescriptor() + -> setAllowedOutputTypes({ALL_FLOATS}) + -> setAllowedInputTypes({ALL_INTS, ALL_FLOATS}); + } + + DECLARE_SYN(fake_quant_with_min_max_args_per_channel, fake_quant_with_min_max_vars_per_channel); + } +} + +#endif \ No newline at end of file diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h index 8f6849ef7..e0dc55937 100644 --- a/libnd4j/include/ops/declarable/headers/parity_ops.h +++ b/libnd4j/include/ops/declarable/headers/parity_ops.h @@ -1747,6 +1747,9 @@ namespace nd4j { #if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars) DECLARE_CONFIGURABLE_OP(fake_quant_with_min_max_vars, 3, 1, true, 0, -2); #endif + #if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars_per_channel) + DECLARE_CONFIGURABLE_OP(fake_quant_with_min_max_vars_per_channel, 3, 1, true, 0, -2); + #endif /** * compare_and_bitpack - compare with greater and pack result with uint8 diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp index df162474f..88c451ffb 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp @@ -93,6 +93,10 @@ namespace helpers { void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); } + void fakeQuantWithMinMaxVarsPerChannel(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { + BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); + } + BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVars_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu index 9bb331685..4e62aafa8 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu @@ -91,6 +91,10 @@ namespace helpers { void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); } + void fakeQuantWithMinMaxVarsPerChannel(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { + BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); + } + BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVars_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); } diff --git a/libnd4j/include/ops/declarable/helpers/fake_quantization.h b/libnd4j/include/ops/declarable/helpers/fake_quantization.h index aa0941db4..7a43a15cb 100644 --- a/libnd4j/include/ops/declarable/helpers/fake_quantization.h +++ b/libnd4j/include/ops/declarable/helpers/fake_quantization.h @@ -27,6 +27,7 @@ namespace ops { namespace helpers { void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output); + void fakeQuantWithMinMaxVarsPerChannel(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output); } } } From 3a89e518115b4123fb44ff014a14624b3fa84c71 Mon Sep 17 00:00:00 2001 From: shugeo Date: Wed, 9 Oct 2019 13:38:18 +0300 Subject: [PATCH 02/15] Added tests for fake_quant_with_min_max_vars_per_channel op. --- .../layers_tests/DeclarableOpsTests10.cpp | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index 446763096..191ee8524 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -2154,6 +2154,62 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_2) { delete results; } +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_3) { + + NDArray x = NDArrayFactory::create('c', {1,2,3,1}, {-63.80, -63.75, -63.4, -63.5, 0.0, 0.1}); + NDArray exp = NDArrayFactory::create('c', {1,2,3,1}, {-63.75, -63.75, -63.251953, -63.251953, 0.0, 0.0}); + NDArray min = NDArrayFactory::create(-63.65); + NDArray max = NDArrayFactory::create(0.1); + + nd4j::ops::fake_quant_with_min_max_vars_per_channel op; + auto results = op.execute({&x, &min, &max}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + auto result = results->at(0); + // result->printIndexedBuffer("Quantized2"); + ASSERT_TRUE(exp.isSameShapeStrict(result)); + ASSERT_TRUE(exp.equalsTo(result)); + + delete results; +} + +//////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) { + + NDArray x = NDArrayFactory::create('c', {2,4,5,3}); + NDArray exp = NDArrayFactory::create('c', {2,4,5,3}, + {1.0588236, 1.9607843, 3.019608, 4.0588236, 5.098039, 6.039216, 7.0588236, 8.039216, 9.058824, + 10.058824, 10.980392, 12.078432, 13.058824, 13.921569, 15.09804, 16.058825, 17.058825, 18.117647, + 19.058825, 20., 21.137257, 22.058825, 22.941177, 23.882355, 25.058825, 26.078432, 26.901962, + 28.058825, 29.019608, 29.92157, 31.058825, 31.960785, 32.941177, 34.058823, 35.09804, 35.960785, + 37.058823, 38.039215, 38.980392, 40.058823, 40.980392, 42.000004, 43.058826, 43.92157, 45.01961, + 45., 47.058823, 48.03922, 45., 50., 51.058826, 45., 50., 54.078434, + 45., 50., 57.09804, 45., 50., 60.11765, 45., 50., 62.862747, + 45., 50., 65.882355, 45., 50., 68.90196, 45., 50., 70., + 45., 50., 70., 45., 50., 70., 45., 50., 70., + 45., 50., 70., 45., 50., 70., 45., 50., 70., + 45., 50., 70., 45., 50., 70., 45., 50., 70., + 45., 50., 70., 45., 50., 70., 45., 50., 70., + 45., 50., 70., 45., 50., 70., 45., 50., 70., + 45., 50., 70.}); + NDArray min = NDArrayFactory::create({20., 20., 20.}); + NDArray max = NDArrayFactory::create({65., 70., 90.}); + + nd4j::ops::fake_quant_with_min_max_vars_per_channel op; + auto results = op.execute({&x, &min, &max}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + auto result = results->at(0); + // result->printIndexedBuffer("Quantized2"); + ASSERT_TRUE(exp.isSameShapeStrict(result)); + ASSERT_TRUE(exp.equalsTo(result)); + + delete results; +} + //////////////////////////////////////////////////////////////////// TYPED_TEST(TypedDeclarableOpsTests10, batchnorm_new_test1) { From d0cbd33b0e7cff44807a56b9514c23a1247093d5 Mon Sep 17 00:00:00 2001 From: shugeo Date: Wed, 9 Oct 2019 15:52:13 +0300 Subject: [PATCH 03/15] Added input checks for op. --- ...ke_quant_with_min_max_vars_per_channel.cpp | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp index ba8eb9e7b..4d719b38b 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp @@ -28,22 +28,19 @@ namespace nd4j { CONFIGURABLE_OP_IMPL(fake_quant_with_min_max_vars_per_channel, 1, 1, true, 0, 0) { auto x = INPUT_VARIABLE(0); - - NDArray* min; - NDArray* max; + auto min = INPUT_VARIABLE(1); + auto max = INPUT_VARIABLE(2); REQUIRE_TRUE(block.width() == 3 || block.getTArguments()->size() == 2, 0, "fake_quant_with_min_max_vars_per_channel: No minimum/maximum values provided by either input arrays or TArgs"); + auto depth = x->sizeAt(-1); + REQUIRE_TRUE(min->rankOf() == 1 && max->rankOf() == 1 && min->lengthOf() == max->lengthOf(), 0, + "fake_quant_with_min_max_vars_per_channel: Min and Max should be 1D tensors with the same length"); + REQUIRE_TRUE(depth == min->lengthOf(), 0, "fake_quant_with_min_max_vars_per_channel: Min length should be" + " %lld, but %lld occurs.", depth, min->lengthOf()); - NDArray m; - NDArray m2; - if(block.width() == 3){ - min = INPUT_VARIABLE(1); - max = INPUT_VARIABLE(2); - } else if(block.getTArguments()->size() == 2){ - m = NDArrayFactory::create(x->dataType(), T_ARG(0), block.launchContext()); - m2 = NDArrayFactory::create(x->dataType(), T_ARG(1), block.launchContext()); - min = &m; - max = &m2; + REQUIRE_TRUE(depth == max->lengthOf(), 0, "fake_quant_with_min_max_vars_per_channel: Max length should be" + "%lld, but %lld occurs.", depth, max->lengthOf()); + if(block.width() == 3) { } auto output = OUTPUT_VARIABLE(0); int numBits = 8; @@ -54,7 +51,9 @@ namespace nd4j { if (block.getIArguments()->size() == 2) { numBits = INT_ARG(0); narrowed = INT_ARG(1); - REQUIRE_TRUE(numBits > 1 && numBits < 17, 0, "fake_quant_with_min_max_vars_per_channel: Number of bits for quatization should be in between 2 and 16, but %i was given.", numBits); + REQUIRE_TRUE(numBits > 1 && numBits < 17, 0, "fake_quant_with_min_max_vars_per_channel: Number of bits" + " for quatization should be in between 2 and 16, but %i " + "was given.", numBits); } helpers::fakeQuantWithMinMaxVarsPerChannel(x, min, max, numBits, narrowed, output); return ND4J_STATUS_OK; From 352f1eee802475e88ba8b30a757f4b965875afa3 Mon Sep 17 00:00:00 2001 From: shugeo Date: Wed, 9 Oct 2019 21:39:59 +0300 Subject: [PATCH 04/15] Implemented fake_quant_with_min_max_per_channel helper for cpu platform. The first approach. --- .../helpers/cpu/fake_quantization.cpp | 75 +++++++++++++++---- .../layers_tests/DeclarableOpsTests10.cpp | 19 +++-- 2 files changed, 73 insertions(+), 21 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp index 88c451ffb..a2d0c3c59 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp @@ -25,6 +25,55 @@ namespace nd4j { namespace ops { namespace helpers { + template + static void Nudge(T min, T max, T quant_min, T quant_max, T* scale, T* nudged_min, T* nudged_max) { + *scale = (max - min) / (quant_max - quant_min); + auto zero_point_from_min = quant_min - min / *scale; + uint16_t const nudged_zero_point = [zero_point_from_min, quant_min, quant_max] { + if (zero_point_from_min < quant_min) { + return static_cast(quant_min); + } + if (zero_point_from_min > quant_max) { + return static_cast(quant_max); + } + return nd4j::math::nd4j_round(zero_point_from_min); + }(); + *nudged_min = (quant_min - nudged_zero_point) * (*scale); + *nudged_max = (quant_max - nudged_zero_point) * (*scale); + } + + template + void fakeQuantWithMinMaxVarsPerChannel_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { + int lowIntBound = narrowed ? 1 : 0; + int upperIntBound = 1 << numBits - 1; + + const float quant_min_float = static_cast(lowIntBound); + const float quant_max_float = static_cast(upperIntBound); +// auto scaleTensor(*input); // = NDArrayFactory::create(input->ordering(), input->getShapeAsVector(), input->getWorkspace()); + auto clamped(*input); // = NDArrayFactory::create(input->ordering(), input->getShapeAsVector(), input->getWorkspace()); + for (auto i = 0; i < min->lengthOf(); i++) { + T scale, nudged_min, nudged_max; + Nudge(min->t(i), max->t(i), quant_min_float, quant_max_float, &scale, &nudged_min, &nudged_max); + auto wiseMinMax = LAMBDA_T(x, nudged_min, nudged_max) { + if (x < nudged_min) { + return nudged_min; + } + else if (x > nudged_max) + return nudged_max; + return x; + }; +// scaleTensor.assign(scale); + input->applyLambda(wiseMinMax, &clamped); + clamped -= nudged_min; + // auto nudgedScale = scale; + clamped /= scale; + clamped += T(0.5f); + clamped.applyTransform(transform::Floor, output, nullptr); + (*output) *= scale; + (*output) += nudged_min; + } + } + template void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { int lowIntBound = narrowed ? 1 : 0; @@ -35,15 +84,15 @@ namespace helpers { T scale = (max->t(0) - min->t(0)) / (quant_max_float - quant_min_float); const T zero_point_from_min = quant_min_float - min->e(0) / scale; const uint16_t nudged_zero_point = [zero_point_from_min, lowIntBound, - quant_min_float, upperIntBound, - quant_max_float] { - if (zero_point_from_min < quant_min_float) { - return static_cast(lowIntBound); - } - if (zero_point_from_min > quant_max_float) { - return static_cast(upperIntBound); - } - return static_cast(roundf(zero_point_from_min)); + quant_min_float, upperIntBound, + quant_max_float] { + if (zero_point_from_min < quant_min_float) { + return static_cast(lowIntBound); + } + if (zero_point_from_min > quant_max_float) { + return static_cast(upperIntBound); + } + return static_cast(roundf(zero_point_from_min)); }(); auto nudged_min = (quant_min_float - nudged_zero_point) * (scale); @@ -71,10 +120,10 @@ namespace helpers { clamped.applyLambda(wiseMax, output); // const auto clamped_shifted = clamped - nudged_min; *output -= nudged_min; - // auto nudgedScale = scale; + // auto nudgedScale = scale; (*output) /= scaleTensor; - (*output) += T(0.5f); - output->applyTransform(transform::Floor, nullptr, nullptr); +// (*output) += T(0.5f); + output->applyTransform(transform::Round, nullptr, nullptr); (*output) *= scaleTensor; (*output) += nudged_min; //output->printIndexedBuffer("FAKE QUANTED"); @@ -94,7 +143,7 @@ namespace helpers { BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); } void fakeQuantWithMinMaxVarsPerChannel(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { - BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVarsPerChannel_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); } BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVars_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index 191ee8524..a6edb23c7 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -2159,8 +2159,8 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_3) { NDArray x = NDArrayFactory::create('c', {1,2,3,1}, {-63.80, -63.75, -63.4, -63.5, 0.0, 0.1}); NDArray exp = NDArrayFactory::create('c', {1,2,3,1}, {-63.75, -63.75, -63.251953, -63.251953, 0.0, 0.0}); - NDArray min = NDArrayFactory::create(-63.65); - NDArray max = NDArrayFactory::create(0.1); + NDArray min = NDArrayFactory::create('c', {1},{-63.65}); + NDArray max = NDArrayFactory::create('c', {1}, {0.1}); nd4j::ops::fake_quant_with_min_max_vars_per_channel op; auto results = op.execute({&x, &min, &max}, {}, {}); @@ -2178,8 +2178,8 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_3) { //////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) { - NDArray x = NDArrayFactory::create('c', {2,4,5,3}); - NDArray exp = NDArrayFactory::create('c', {2,4,5,3}, + NDArray x = NDArrayFactory::create('c', {2,4,5,3}); + NDArray exp = NDArrayFactory::create('c', {2,4,5,3}, {1.0588236, 1.9607843, 3.019608, 4.0588236, 5.098039, 6.039216, 7.0588236, 8.039216, 9.058824, 10.058824, 10.980392, 12.078432, 13.058824, 13.921569, 15.09804, 16.058825, 17.058825, 18.117647, 19.058825, 20., 21.137257, 22.058825, 22.941177, 23.882355, 25.058825, 26.078432, 26.901962, @@ -2194,16 +2194,19 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) { 45., 50., 70., 45., 50., 70., 45., 50., 70., 45., 50., 70., 45., 50., 70., 45., 50., 70., 45., 50., 70.}); - NDArray min = NDArrayFactory::create({20., 20., 20.}); - NDArray max = NDArrayFactory::create({65., 70., 90.}); - + NDArray min = NDArrayFactory::create({20., 20., 20.}); + NDArray max = NDArrayFactory::create({65., 70., 90.}); + x.linspace(1.); nd4j::ops::fake_quant_with_min_max_vars_per_channel op; auto results = op.execute({&x, &min, &max}, {}, {}); ASSERT_EQ(ND4J_STATUS_OK, results->status()); auto result = results->at(0); - // result->printIndexedBuffer("Quantized2"); + result->printBuffer("Quantized per channels 4"); + exp.printBuffer("Quantized per channest E"); + auto diff = *result - exp; + diff.printIndexedBuffer("Difference"); ASSERT_TRUE(exp.isSameShapeStrict(result)); ASSERT_TRUE(exp.equalsTo(result)); From 3c0c59ab889875bd0d0d6f542569490c4e1d5ddb Mon Sep 17 00:00:00 2001 From: shugeo Date: Wed, 9 Oct 2019 22:09:33 +0300 Subject: [PATCH 05/15] Refactored fake_quant_with_min_max_vars op. --- .../helpers/cpu/fake_quantization.cpp | 74 ++++++------------- 1 file changed, 21 insertions(+), 53 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp index a2d0c3c59..28437359e 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp @@ -74,6 +74,20 @@ namespace helpers { } } + template + static void WiseMinMax(NDArray* input, T min, T max, NDArray* output) { + auto wiseMinMax = LAMBDA_T(x, min, max) { + if (x < min) { + return min; + } + else if (x > max) + return max; + return x; + }; + + input->applyLambda(wiseMinMax, output); + } + template void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { int lowIntBound = narrowed ? 1 : 0; @@ -81,62 +95,16 @@ namespace helpers { const float quant_min_float = static_cast(lowIntBound); const float quant_max_float = static_cast(upperIntBound); - T scale = (max->t(0) - min->t(0)) / (quant_max_float - quant_min_float); - const T zero_point_from_min = quant_min_float - min->e(0) / scale; - const uint16_t nudged_zero_point = [zero_point_from_min, lowIntBound, - quant_min_float, upperIntBound, - quant_max_float] { - if (zero_point_from_min < quant_min_float) { - return static_cast(lowIntBound); - } - if (zero_point_from_min > quant_max_float) { - return static_cast(upperIntBound); - } - return static_cast(roundf(zero_point_from_min)); - }(); + T nudged_min, nudged_max, scale; - auto nudged_min = (quant_min_float - nudged_zero_point) * (scale); - auto nudged_max = (quant_max_float - nudged_zero_point) * (scale); - //input->applyScalar(scalar::CompareAndSet, nudged_max, clamped, nullptr); //.cwiseMin(nudged_max).cwiseMax(nudged_min); - //input->applyScalar(scalar::CompareAndSet, nudged_min, clamped, nullptr); //.cwiseMin(nudged_max).cwiseMax(nudged_min); - auto wiseMax = LAMBDA_T(x, nudged_min) { - if (x < nudged_min) { - return nudged_min; - } - return x; - - }; - auto wiseMin = LAMBDA_T(x, nudged_max) { - if (x > nudged_max) { - return nudged_max; - } - return x; - }; - auto scaleTensor(*input); // = NDArrayFactory::create(input->ordering(), input->getShapeAsVector(), input->getWorkspace()); - auto clamped(*input); // = NDArrayFactory::create(input->ordering(), input->getShapeAsVector(), input->getWorkspace()); - scaleTensor.assign(scale); - input->applyLambda(wiseMin, &clamped); -// const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min); - clamped.applyLambda(wiseMax, output); -// const auto clamped_shifted = clamped - nudged_min; + Nudge(min->t(0), max->t(0), quant_min_float, quant_max_float, &scale, &nudged_min, &nudged_max); + WiseMinMax(input, nudged_min, nudged_max, output); *output -= nudged_min; - // auto nudgedScale = scale; - (*output) /= scaleTensor; -// (*output) += T(0.5f); - output->applyTransform(transform::Round, nullptr, nullptr); - (*output) *= scaleTensor; + (*output) /= scale; + (*output) += T(0.5f); + output->applyTransform(transform::Floor, nullptr, nullptr); + (*output) *= scale; (*output) += nudged_min; - //output->printIndexedBuffer("FAKE QUANTED"); - /* - const auto nudged_scale_repl = inputs.constant(nudged_scale); - - const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min); - const auto clamped_shifted = clamped - nudged_min; - *output = (clamped_shifted / nudged_scale_repl + 0.5f).floor() * - nudged_scale_repl + - nudged_min; -*/ - } void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { From c13e945a96491b1d4451a5fccbc1c594d5c9c1e4 Mon Sep 17 00:00:00 2001 From: shugeo Date: Thu, 10 Oct 2019 13:23:11 +0300 Subject: [PATCH 06/15] Fixed fake_quant_with_min_max_vars op and tests. --- .../helpers/cpu/fake_quantization.cpp | 59 +++++++-------- .../layers_tests/DeclarableOpsTests10.cpp | 75 ++++++++++++++++--- 2 files changed, 91 insertions(+), 43 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp index 28437359e..622934407 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp @@ -26,51 +26,44 @@ namespace ops { namespace helpers { template - static void Nudge(T min, T max, T quant_min, T quant_max, T* scale, T* nudged_min, T* nudged_max) { - *scale = (max - min) / (quant_max - quant_min); - auto zero_point_from_min = quant_min - min / *scale; - uint16_t const nudged_zero_point = [zero_point_from_min, quant_min, quant_max] { - if (zero_point_from_min < quant_min) { + static void Nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) { + T quant_max_float = static_cast(quant_max); + T quant_min_float = static_cast(quant_min); + *scale = (max - min) / (quant_max_float - quant_min_float); + auto zero_point_from_min = quant_min_float - min / *scale; + uint16_t const nudged_zero_point = [zero_point_from_min, quant_min, quant_max, quant_max_float, quant_min_float] { + if (zero_point_from_min < quant_min_float) { return static_cast(quant_min); } - if (zero_point_from_min > quant_max) { + if (zero_point_from_min > quant_max_float) { return static_cast(quant_max); } return nd4j::math::nd4j_round(zero_point_from_min); }(); - *nudged_min = (quant_min - nudged_zero_point) * (*scale); - *nudged_max = (quant_max - nudged_zero_point) * (*scale); + *nudged_min = (quant_min_float - nudged_zero_point) * (*scale); + *nudged_max = (quant_max_float - nudged_zero_point) * (*scale); } template void fakeQuantWithMinMaxVarsPerChannel_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { int lowIntBound = narrowed ? 1 : 0; - int upperIntBound = 1 << numBits - 1; + int upperIntBound = (1 << numBits) - 1; + auto channels = input->sizeAt(-1); - const float quant_min_float = static_cast(lowIntBound); - const float quant_max_float = static_cast(upperIntBound); -// auto scaleTensor(*input); // = NDArrayFactory::create(input->ordering(), input->getShapeAsVector(), input->getWorkspace()); - auto clamped(*input); // = NDArrayFactory::create(input->ordering(), input->getShapeAsVector(), input->getWorkspace()); - for (auto i = 0; i < min->lengthOf(); i++) { + PRAGMA_OMP_PARALLEL_FOR + for (auto i = 0; i < channels; i++) { T scale, nudged_min, nudged_max; - Nudge(min->t(i), max->t(i), quant_min_float, quant_max_float, &scale, &nudged_min, &nudged_max); - auto wiseMinMax = LAMBDA_T(x, nudged_min, nudged_max) { - if (x < nudged_min) { - return nudged_min; - } - else if (x > nudged_max) - return nudged_max; - return x; - }; -// scaleTensor.assign(scale); - input->applyLambda(wiseMinMax, &clamped); - clamped -= nudged_min; - // auto nudgedScale = scale; - clamped /= scale; - clamped += T(0.5f); - clamped.applyTransform(transform::Floor, output, nullptr); - (*output) *= scale; - (*output) += nudged_min; + Nudge(min->t(i), max->t(i), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); + + for (auto e = 0; e < input->lengthOf(); e += channels) { + T val = input->t(e + i); + if ( val <= nudged_min) + val = nudged_min; + else if (val >= nudged_max) + val = nudged_max; + + output->t(e + i) = math::nd4j_floor((val - nudged_min)/scale + T(0.5)) * scale + nudged_min; + } } } @@ -91,7 +84,7 @@ namespace helpers { template void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { int lowIntBound = narrowed ? 1 : 0; - int upperIntBound = 1 << numBits - 1; + int upperIntBound = (1 << numBits) - 1; const float quant_min_float = static_cast(lowIntBound); const float quant_max_float = static_cast(upperIntBound); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index a6edb23c7..6ae982cf8 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -2117,7 +2117,7 @@ TEST_F(DeclarableOpsTests10, Image_DrawBoundingBoxes_2) { TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_1) { NDArray x('c', {2,3}, {-63.80f, -63.75f, -63.70f, -63.5f, 0.0f, 0.1f}, nd4j::DataType::FLOAT32); - NDArray exp('c', {2,3}, {-63.75f, -63.75f, -63.75f, -63.251953f, 0.0f, 0.0f}, nd4j::DataType::FLOAT32); + NDArray exp('c', {2,3}, {-63.75, -63.75, -63.75, -63.5, 0., 0.}, nd4j::DataType::FLOAT32); NDArray min('c', {}, {-63.65f}, nd4j::DataType::FLOAT32); NDArray max('c', {}, {0.1f}, nd4j::DataType::FLOAT32); @@ -2127,7 +2127,8 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_1) { ASSERT_EQ(ND4J_STATUS_OK, results->status()); auto result = results->at(0); - // result->printIndexedBuffer("Quantized"); + result->printBuffer("Quantized"); + exp.printBuffer("Expected"); ASSERT_TRUE(exp.isSameShapeStrict(result)); ASSERT_TRUE(exp.equalsTo(result)); @@ -2137,7 +2138,7 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_1) { TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_2) { NDArray x = NDArrayFactory::create('c', {2,3}, {-63.80, -63.75, -63.4, -63.5, 0.0, 0.1}); - NDArray exp = NDArrayFactory::create('c', {2,3}, {-63.75, -63.75, -63.251953, -63.251953, 0.0, 0.0}); + NDArray exp = NDArrayFactory::create('c', {2,3}, {-63.75, -63.75, -63.5 , -63.5 , 0. , 0. }); NDArray min = NDArrayFactory::create(-63.65); NDArray max = NDArrayFactory::create(0.1); @@ -2158,7 +2159,7 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_2) { TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_3) { NDArray x = NDArrayFactory::create('c', {1,2,3,1}, {-63.80, -63.75, -63.4, -63.5, 0.0, 0.1}); - NDArray exp = NDArrayFactory::create('c', {1,2,3,1}, {-63.75, -63.75, -63.251953, -63.251953, 0.0, 0.0}); + NDArray exp = NDArrayFactory::create('c', {1,2,3,1}, {-63.75, -63.75, -63.5 , -63.5 , 0. , 0. }); NDArray min = NDArrayFactory::create('c', {1},{-63.65}); NDArray max = NDArrayFactory::create('c', {1}, {0.1}); @@ -2179,8 +2180,8 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_3) { TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) { NDArray x = NDArrayFactory::create('c', {2,4,5,3}); - NDArray exp = NDArrayFactory::create('c', {2,4,5,3}, - {1.0588236, 1.9607843, 3.019608, 4.0588236, 5.098039, 6.039216, 7.0588236, 8.039216, 9.058824, + NDArray exp = NDArrayFactory::create('c', {2,4,5,3},{ + 1.0588236, 1.9607843, 3.019608, 4.0588236, 5.098039, 6.039216, 7.0588236, 8.039216, 9.058824, 10.058824, 10.980392, 12.078432, 13.058824, 13.921569, 15.09804, 16.058825, 17.058825, 18.117647, 19.058825, 20., 21.137257, 22.058825, 22.941177, 23.882355, 25.058825, 26.078432, 26.901962, 28.058825, 29.019608, 29.92157, 31.058825, 31.960785, 32.941177, 34.058823, 35.09804, 35.960785, @@ -2203,10 +2204,64 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_4) { ASSERT_EQ(ND4J_STATUS_OK, results->status()); auto result = results->at(0); - result->printBuffer("Quantized per channels 4"); - exp.printBuffer("Quantized per channest E"); - auto diff = *result - exp; - diff.printIndexedBuffer("Difference"); +// result->printBuffer("Quantized per channels 4"); +// exp.printBuffer("Quantized per channest E"); +// auto diff = *result - exp; +// diff.printIndexedBuffer("Difference"); + ASSERT_TRUE(exp.isSameShapeStrict(result)); + ASSERT_TRUE(exp.equalsTo(result)); + + delete results; +} + +TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_5) { + NDArray x = NDArrayFactory::create('c', {2, 3, 5, 4}); + NDArray exp = NDArrayFactory::create('c', {2, 3, 5, 4},{ + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -19.92157 , -18.980392 , -18.039217 , -16.941177 , + -16. , -15.058824 , -13.960785 , -13.0196085 , + -11.92157 , -10.980392 , -10.039217 , -8.941177 , + -8.000001 , -7.0588236 , -5.960785 , -5.0196085 , + -3.9215698 , -2.9803925 , -2.039217 , -0.94117737, + 0. , 0.94117737, 2.039215 , 2.9803925 , + 4.07843 , 5.0196075 , 5.960783 , 7.0588226 , + 8. , 8.941177 , 10.039215 , 10.980392 , + 12.07843 , 13.019608 , 13.960783 , 15.058823 , + 16. , 16.941177 , 18.039217 , 18.980392 , + 20.07843 , 21.019608 , 21.960783 , 23.058823 , + 20.07843 , 21.019608 , 21.960783 , 23.058823 , + 20.07843 , 21.019608 , 21.960783 , 23.058823 , + 20.07843 , 21.019608 , 21.960783 , 23.058823 , + 20.07843 , 21.019608 , 21.960783 , 23.058823 , + 20.07843 , 21.019608 , 21.960783 , 23.058823 , + 20.07843 , 21.019608 , 21.960783 , 23.058823 , + 20.07843 , 21.019608 , 21.960783 , 23.058823 , + 20.07843 , 21.019608 , 21.960783 , 23.058823 , + 20.07843 , 21.019608 , 21.960783 , 23.058823 + }); + NDArray min = NDArrayFactory::create({-20., -19., -18., -17}); + NDArray max = NDArrayFactory::create({20., 21., 22., 23}); + x.linspace(-60.); + nd4j::ops::fake_quant_with_min_max_vars_per_channel op; + auto results = op.execute({&x, &min, &max}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + auto result = results->at(0); +// result->printBuffer("Quantized per channels 5"); +// exp.printBuffer("Quantized per channest E"); +// auto diff = *result - exp; +// diff.printIndexedBuffer("Difference"); + ASSERT_TRUE(exp.isSameShapeStrict(result)); ASSERT_TRUE(exp.equalsTo(result)); From 753565145c7c4d1cdbfe3380a26e003083c29821 Mon Sep 17 00:00:00 2001 From: shugeo Date: Thu, 10 Oct 2019 14:00:49 +0300 Subject: [PATCH 07/15] Refactored fake_quant_with_min_max_vars op cuda implementation. --- .../helpers/cuda/fake_quantization.cu | 71 ++++++++----------- 1 file changed, 30 insertions(+), 41 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu index 4e62aafa8..9def35152 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu @@ -34,58 +34,47 @@ namespace helpers { // output - output tensor // template - void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { - int lowIntBound = narrowed?1:0; - int upperIntBound = 1 << numBits - 1; - min->syncToHost(); - max->syncToHost(); - const float quant_min_float = static_cast(lowIntBound); - const float quant_max_float = static_cast(upperIntBound); - T scale = (max->t(0) - min->t(0)) / (quant_max_float - quant_min_float); - const T zero_point_from_min = quant_min_float - min->t(0) / scale; - - const uint16_t nudged_zero_point = [zero_point_from_min, lowIntBound, - quant_min_float, upperIntBound, - quant_max_float] { + static void Nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) { + T quant_max_float = static_cast(quant_max); + T quant_min_float = static_cast(quant_min); + *scale = (max - min) / (quant_max_float - quant_min_float); + auto zero_point_from_min = quant_min_float - min / *scale; + uint16_t const nudged_zero_point = [zero_point_from_min, quant_min, quant_max, quant_max_float, quant_min_float] { if (zero_point_from_min < quant_min_float) { - return static_cast(lowIntBound); + return static_cast(quant_min); } if (zero_point_from_min > quant_max_float) { - return static_cast(upperIntBound); + return static_cast(quant_max); } - return static_cast(roundf(zero_point_from_min)); + return nd4j::math::nd4j_round(zero_point_from_min); }(); + *nudged_min = (quant_min_float - nudged_zero_point) * (*scale); + *nudged_max = (quant_max_float - nudged_zero_point) * (*scale); + } - auto nudged_min = (quant_min_float - nudged_zero_point) * (scale); - auto nudged_max = (quant_max_float - nudged_zero_point) * (scale); + template + void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { + int lowIntBound = narrowed?1:0; + int upperIntBound = (1 << numBits) - 1; + min->syncToHost(); + max->syncToHost(); + T scale, nudged_min, nudged_max; + Nudge(min->t(0), max->t(0), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); - auto wiseMax = LAMBDA_T(x, nudged_min) { + auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudged_min, nudged_max, scale) { + T val = x; if (x < nudged_min) { - return nudged_min; + val = nudged_min; } - return x; + else if (x > nudged_max) { + val = nudged_max; + } + else + val = x; + return (math::nd4j_floor((val - nudged_min) / scale + T(0.5)) * scale + nudged_min); }; - auto wiseMin = LAMBDA_T(x, nudged_max) { - if (x > nudged_max) { - return nudged_max; - } - return x; - }; - - auto scaleTensor(*input); - auto clamped(*input); - scaleTensor.assign(scale); - input->applyLambda(wiseMin, &clamped); - - clamped.applyLambda(wiseMax, output); - *output -= nudged_min; - - (*output) /= scaleTensor; - (*output) += T(0.5f); - output->applyTransform(transform::Floor, nullptr, nullptr); - (*output) *= scaleTensor; - (*output) += nudged_min; + input->applyLambda(wiseMinMaxAndSoOn, output); } void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { From 3504b0cda99fbb31fb2735a90c7b1d82fc4ae25c Mon Sep 17 00:00:00 2001 From: shugeo Date: Thu, 10 Oct 2019 15:44:50 +0300 Subject: [PATCH 08/15] Implemented fake_quant_with_min_max_vars_per_channel fop cuda helper. The first working revision. --- .../helpers/cuda/fake_quantization.cu | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu index 9def35152..d491d056a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu @@ -77,14 +77,45 @@ namespace helpers { input->applyLambda(wiseMinMaxAndSoOn, output); } + template + void fakeQuantWithMinMaxVarsPerChannel_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { + int lowIntBound = narrowed?1:0; + int upperIntBound = (1 << numBits) - 1; + min->syncToHost(); + max->syncToHost(); + T scale, nudged_min, nudged_max; + auto channels = min->lengthOf(); + input->syncToHost(); + input->syncToDevice(); + output->syncToHost(); + for (auto i = 0; i < channels; i++) { + Nudge(min->t(i), max->t(i), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); + + //auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudged_min, nudged_max, scale) { + for (auto e = 0; e < input->lengthOf(); e += channels) { + T val = input->t(e + i); + if (val < nudged_min) { + val = nudged_min; + } else if (val > nudged_max) { + val = nudged_max; + } + + output->t(e + i) = (math::nd4j_floor((val - nudged_min) / scale + T(0.5)) * scale + nudged_min); + }; + } + output->syncToDevice(); + output->tickWriteDevice(); + } + void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); } void fakeQuantWithMinMaxVarsPerChannel(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { - BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVarsPerChannel_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); } BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVars_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); + BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVarsPerChannel_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); } } From 02d86166927cc804f2e74ab6f761fc92e4e491a6 Mon Sep 17 00:00:00 2001 From: shugeo Date: Thu, 10 Oct 2019 16:40:56 +0300 Subject: [PATCH 09/15] Implementation of cuda kernel for fake_quant_with_min_max_vars_per_channels op. --- ...ke_quant_with_min_max_vars_per_channel.cpp | 2 +- .../helpers/cpu/fake_quantization.cpp | 2 +- .../helpers/cuda/fake_quantization.cu | 54 +++++++++++-------- .../declarable/helpers/fake_quantization.h | 2 +- 4 files changed, 35 insertions(+), 25 deletions(-) diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp index 4d719b38b..e9bab7a1a 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp @@ -55,7 +55,7 @@ namespace nd4j { " for quatization should be in between 2 and 16, but %i " "was given.", numBits); } - helpers::fakeQuantWithMinMaxVarsPerChannel(x, min, max, numBits, narrowed, output); + helpers::fakeQuantWithMinMaxVarsPerChannel(block.launchContext(), x, min, max, numBits, narrowed, output); return ND4J_STATUS_OK; } diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp index 622934407..b09587cf7 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp @@ -103,7 +103,7 @@ namespace helpers { void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); } - void fakeQuantWithMinMaxVarsPerChannel(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { + void fakeQuantWithMinMaxVarsPerChannel(LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVarsPerChannel_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu index d491d056a..75a81f75a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu @@ -34,7 +34,7 @@ namespace helpers { // output - output tensor // template - static void Nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) { + static __host__ __device__ void Nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) { T quant_max_float = static_cast(quant_max); T quant_min_float = static_cast(quant_min); *scale = (max - min) / (quant_max_float - quant_min_float); @@ -78,44 +78,54 @@ namespace helpers { } template - void fakeQuantWithMinMaxVarsPerChannel_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { - int lowIntBound = narrowed?1:0; - int upperIntBound = (1 << numBits) - 1; - min->syncToHost(); - max->syncToHost(); - T scale, nudged_min, nudged_max; - auto channels = min->lengthOf(); - input->syncToHost(); - input->syncToDevice(); - output->syncToHost(); - for (auto i = 0; i < channels; i++) { - Nudge(min->t(i), max->t(i), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); + static __global__ void fakeQuantWithMinMaxKernel(T* input, Nd4jLong* inputShape, T* min, T* max, + int lowIntBound, int upperIntBound, Nd4jLong channels, + T* output, Nd4jLong* outputShape, Nd4jLong length) { + for (auto i = blockIdx.x; i < (int)channels; i += gridDim.x) { + T scale, nudged_min, nudged_max; + Nudge(min[i], max[i], lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); //auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudged_min, nudged_max, scale) { - for (auto e = 0; e < input->lengthOf(); e += channels) { - T val = input->t(e + i); + for (auto e = threadIdx.x; e < (int)length; e += (int)channels) { + T val = input[shape::getIndexOffset(e + i, inputShape)]; if (val < nudged_min) { val = nudged_min; } else if (val > nudged_max) { val = nudged_max; } - - output->t(e + i) = (math::nd4j_floor((val - nudged_min) / scale + T(0.5)) * scale + nudged_min); + output[shape::getIndexOffset(e + i, outputShape)] = (math::nd4j_floor((val - nudged_min) / scale + T(0.5)) * scale + nudged_min); }; } - output->syncToDevice(); - output->tickWriteDevice(); + + } + + template + void fakeQuantWithMinMaxVarsPerChannel_(LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { + int lowIntBound = narrowed?1:0; + int upperIntBound = (1 << numBits) - 1; + auto channels = min->lengthOf(); + auto length = input->lengthOf(); + NDArray::prepareSpecialUse({output}, {min, max, input}); + auto stream = context->getCudaStream(); + T* inputBuf = input->dataBuffer()->specialAsT(); + T* outputBuf = output->dataBuffer()->specialAsT(); + T* minBuf = min->dataBuffer()->specialAsT(); + T* maxBuf = max->dataBuffer()->specialAsT(); + fakeQuantWithMinMaxKernel<<<1, 1, 256, *stream>>>(inputBuf, input->specialShapeInfo(), + minBuf, maxBuf, lowIntBound, upperIntBound, channels, outputBuf, output->specialShapeInfo(), length); + NDArray::registerSpecialUse({output}, {min, max, input}); + } void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVars_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); } - void fakeQuantWithMinMaxVarsPerChannel(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { - BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVarsPerChannel_, (input, min, max, numBits, narrowed, output), FLOAT_TYPES); + void fakeQuantWithMinMaxVarsPerChannel(LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { + BUILD_SINGLE_SELECTOR(input->dataType(), fakeQuantWithMinMaxVarsPerChannel_, (context, input, min, max, numBits, narrowed, output), FLOAT_TYPES); } BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVars_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); - BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVarsPerChannel_, (NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); + BUILD_SINGLE_TEMPLATE(template void fakeQuantWithMinMaxVarsPerChannel_, (LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output), FLOAT_TYPES); } } diff --git a/libnd4j/include/ops/declarable/helpers/fake_quantization.h b/libnd4j/include/ops/declarable/helpers/fake_quantization.h index 7a43a15cb..cadd8be7c 100644 --- a/libnd4j/include/ops/declarable/helpers/fake_quantization.h +++ b/libnd4j/include/ops/declarable/helpers/fake_quantization.h @@ -27,7 +27,7 @@ namespace ops { namespace helpers { void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output); - void fakeQuantWithMinMaxVarsPerChannel(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output); + void fakeQuantWithMinMaxVarsPerChannel(LaunchContext* context, NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output); } } } From d5b352273dc17aa28bceeaf9707201274c6d8518 Mon Sep 17 00:00:00 2001 From: shugeo Date: Thu, 10 Oct 2019 16:51:29 +0300 Subject: [PATCH 10/15] Implementation of cuda kernel for fake_quant_with_min_max_vars_per_channels op. Final revision. --- .../declarable/helpers/cuda/fake_quantization.cu | 13 +++++++++---- .../tests_cpu/layers_tests/DeclarableOpsTests10.cpp | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu index 75a81f75a..893e016ed 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu @@ -81,19 +81,24 @@ namespace helpers { static __global__ void fakeQuantWithMinMaxKernel(T* input, Nd4jLong* inputShape, T* min, T* max, int lowIntBound, int upperIntBound, Nd4jLong channels, T* output, Nd4jLong* outputShape, Nd4jLong length) { + __shared__ int block; + if (threadIdx.x == 0) { + block = length / channels; + } + __syncthreads(); for (auto i = blockIdx.x; i < (int)channels; i += gridDim.x) { T scale, nudged_min, nudged_max; Nudge(min[i], max[i], lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); //auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudged_min, nudged_max, scale) { - for (auto e = threadIdx.x; e < (int)length; e += (int)channels) { - T val = input[shape::getIndexOffset(e + i, inputShape)]; + for (auto e = threadIdx.x; e < block; e += blockDim.x) { + T val = input[shape::getIndexOffset(e * channels + i, inputShape)]; if (val < nudged_min) { val = nudged_min; } else if (val > nudged_max) { val = nudged_max; } - output[shape::getIndexOffset(e + i, outputShape)] = (math::nd4j_floor((val - nudged_min) / scale + T(0.5)) * scale + nudged_min); + output[shape::getIndexOffset(e* channels + i, outputShape)] = (math::nd4j_floor((val - nudged_min) / scale + T(0.5)) * scale + nudged_min); }; } @@ -111,7 +116,7 @@ namespace helpers { T* outputBuf = output->dataBuffer()->specialAsT(); T* minBuf = min->dataBuffer()->specialAsT(); T* maxBuf = max->dataBuffer()->specialAsT(); - fakeQuantWithMinMaxKernel<<<1, 1, 256, *stream>>>(inputBuf, input->specialShapeInfo(), + fakeQuantWithMinMaxKernel<<<128, 256, 256, *stream>>>(inputBuf, input->specialShapeInfo(), minBuf, maxBuf, lowIntBound, upperIntBound, channels, outputBuf, output->specialShapeInfo(), length); NDArray::registerSpecialUse({output}, {min, max, input}); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index 6ae982cf8..0652a398e 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -2127,8 +2127,8 @@ TEST_F(DeclarableOpsTests10, FakeQuantWithMinMaxVars_Test_1) { ASSERT_EQ(ND4J_STATUS_OK, results->status()); auto result = results->at(0); - result->printBuffer("Quantized"); - exp.printBuffer("Expected"); +// result->printBuffer("Quantized"); +// exp.printBuffer("Expected"); ASSERT_TRUE(exp.isSameShapeStrict(result)); ASSERT_TRUE(exp.equalsTo(result)); From 92636b0b864ea877184f008813972606a32981b4 Mon Sep 17 00:00:00 2001 From: shugeo Date: Thu, 10 Oct 2019 17:08:59 +0300 Subject: [PATCH 11/15] Eliminated waste operator. --- .../parity_ops/fake_quant_with_min_max_vars_per_channel.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp index e9bab7a1a..e5873d9dd 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/fake_quant_with_min_max_vars_per_channel.cpp @@ -40,8 +40,7 @@ namespace nd4j { REQUIRE_TRUE(depth == max->lengthOf(), 0, "fake_quant_with_min_max_vars_per_channel: Max length should be" "%lld, but %lld occurs.", depth, max->lengthOf()); - if(block.width() == 3) { - } + auto output = OUTPUT_VARIABLE(0); int numBits = 8; if (block.getIArguments() && block.getIArguments()->size()) From a09cb5e2be064e6aae44add1d55fbbe158dfe89e Mon Sep 17 00:00:00 2001 From: shugeo Date: Thu, 10 Oct 2019 17:13:33 +0300 Subject: [PATCH 12/15] Added doc for fake_quant_with_min_max_per_channel op declaration. --- .../include/ops/declarable/headers/parity_ops.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h index e0dc55937..cbc7e56da 100644 --- a/libnd4j/include/ops/declarable/headers/parity_ops.h +++ b/libnd4j/include/ops/declarable/headers/parity_ops.h @@ -1747,6 +1747,22 @@ namespace nd4j { #if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars) DECLARE_CONFIGURABLE_OP(fake_quant_with_min_max_vars, 3, 1, true, 0, -2); #endif + +/** + * fake_quant_with_min_max_vals_per_channel - tf.quantization.fake_quant_with_min_max_vars_per_channel + * + * input params: + * 0 - NDArray (input) - at least 2D. + * 1 - 1D Tensor - min values (min length equals to last dim of input) + * 2 - 1D Tensor - max value (length equals to min) + * + * int params (optional): + * 0 - num_bits (allowed interval [2, 16], default 8) + * 1 - narrow_range (default False) + * + * output: + * 0 - NDArray with the same shape as input + */ #if NOT_EXCLUDED(OP_fake_quant_with_min_max_vars_per_channel) DECLARE_CONFIGURABLE_OP(fake_quant_with_min_max_vars_per_channel, 3, 1, true, 0, -2); #endif From c3f755d975d884a123ca25ecfb9bbc5b30be5e8d Mon Sep 17 00:00:00 2001 From: shugeo Date: Thu, 10 Oct 2019 18:02:49 +0300 Subject: [PATCH 13/15] Refactored helpers both for cuda and cpu platforms. --- .../helpers/cpu/fake_quantization.cpp | 43 +++++-------- .../helpers/cuda/fake_quantization.cu | 63 ++++++++++--------- 2 files changed, 48 insertions(+), 58 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp index b09587cf7..21163d44c 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp @@ -26,7 +26,7 @@ namespace ops { namespace helpers { template - static void Nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) { + static void nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) { T quant_max_float = static_cast(quant_max); T quant_min_float = static_cast(quant_min); *scale = (max - min) / (quant_max_float - quant_min_float); @@ -53,7 +53,7 @@ namespace helpers { PRAGMA_OMP_PARALLEL_FOR for (auto i = 0; i < channels; i++) { T scale, nudged_min, nudged_max; - Nudge(min->t(i), max->t(i), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); + nudge(min->t(i), max->t(i), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); for (auto e = 0; e < input->lengthOf(); e += channels) { T val = input->t(e + i); @@ -67,37 +67,26 @@ namespace helpers { } } - template - static void WiseMinMax(NDArray* input, T min, T max, NDArray* output) { - auto wiseMinMax = LAMBDA_T(x, min, max) { - if (x < min) { - return min; - } - else if (x > max) - return max; - return x; - }; - - input->applyLambda(wiseMinMax, output); - } - template void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { int lowIntBound = narrowed ? 1 : 0; int upperIntBound = (1 << numBits) - 1; - const float quant_min_float = static_cast(lowIntBound); - const float quant_max_float = static_cast(upperIntBound); - T nudged_min, nudged_max, scale; + T nudgedMin, nudgedMax, scale; - Nudge(min->t(0), max->t(0), quant_min_float, quant_max_float, &scale, &nudged_min, &nudged_max); - WiseMinMax(input, nudged_min, nudged_max, output); - *output -= nudged_min; - (*output) /= scale; - (*output) += T(0.5f); - output->applyTransform(transform::Floor, nullptr, nullptr); - (*output) *= scale; - (*output) += nudged_min; + nudge(min->t(0), max->t(0), lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax); + + auto fakeQuantizationWithMinMax = LAMBDA_T(x, nudgedMin, nudgedMax, scale) { + T val = x; + if (val < nudgedMin) { + val = nudgedMin; + } + else if (val > nudgedMax) + val = nudgedMax; + return (nd4j::math::nd4j_floor((val - nudgedMin)/scale + T(0.5)) * scale + nudgedMin); + }; + + input->applyLambda(fakeQuantizationWithMinMax, output); } void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu index 893e016ed..70eaac67b 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu @@ -34,44 +34,45 @@ namespace helpers { // output - output tensor // template - static __host__ __device__ void Nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) { - T quant_max_float = static_cast(quant_max); - T quant_min_float = static_cast(quant_min); - *scale = (max - min) / (quant_max_float - quant_min_float); - auto zero_point_from_min = quant_min_float - min / *scale; - uint16_t const nudged_zero_point = [zero_point_from_min, quant_min, quant_max, quant_max_float, quant_min_float] { - if (zero_point_from_min < quant_min_float) { - return static_cast(quant_min); + static __host__ __device__ void + nudge(T min, T max, int quantMin, int quantMax, T* scale, T* nudgedMin, T* nudgedMax) { + T quantMaxF = static_cast(quantMax); + T quantMinF = static_cast(quantMin); + *scale = (max - min) / (quantMaxF - quantMinF); + auto zeroPointFromMin = quantMinF - min / *scale; + uint16_t const nudgedZeroPoint = [zeroPointFromMin, quantMin, quantMax, quantMaxF, quantMinF] { + if (zeroPointFromMin < quantMinF) { + return static_cast(quantMin); } - if (zero_point_from_min > quant_max_float) { - return static_cast(quant_max); + if (zeroPointFromMin > quantMaxF) { + return static_cast(quantMax); } - return nd4j::math::nd4j_round(zero_point_from_min); + return nd4j::math::nd4j_round(zeroPointFromMin); }(); - *nudged_min = (quant_min_float - nudged_zero_point) * (*scale); - *nudged_max = (quant_max_float - nudged_zero_point) * (*scale); + *nudgedMin = (quantMinF - nudgedZeroPoint) * (*scale); + *nudgedMax = (quantMaxF - nudgedZeroPoint) * (*scale); } template void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { int lowIntBound = narrowed?1:0; int upperIntBound = (1 << numBits) - 1; - min->syncToHost(); + min->syncToHost(); // these are scalars, so nothing much happened max->syncToHost(); - T scale, nudged_min, nudged_max; - Nudge(min->t(0), max->t(0), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); + T scale, nudgedMin, nudgedMax; + nudge(min->t(0), max->t(0), lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax); - auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudged_min, nudged_max, scale) { + auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudgedMin, nudgedMax, scale) { T val = x; - if (x < nudged_min) { - val = nudged_min; + if (x < nudgedMin) { + val = nudgedMin; } - else if (x > nudged_max) { - val = nudged_max; + else if (x > nudgedMax) { + val = nudgedMax; } else val = x; - return (math::nd4j_floor((val - nudged_min) / scale + T(0.5)) * scale + nudged_min); + return (math::nd4j_floor((val - nudgedMin) / scale + T(0.5)) * scale + nudgedMin); }; input->applyLambda(wiseMinMaxAndSoOn, output); @@ -88,20 +89,20 @@ namespace helpers { __syncthreads(); for (auto i = blockIdx.x; i < (int)channels; i += gridDim.x) { - T scale, nudged_min, nudged_max; - Nudge(min[i], max[i], lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); - //auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudged_min, nudged_max, scale) { + T scale, nudgedMin, nudgedMax; + nudge(min[i], max[i], lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax); + for (auto e = threadIdx.x; e < block; e += blockDim.x) { T val = input[shape::getIndexOffset(e * channels + i, inputShape)]; - if (val < nudged_min) { - val = nudged_min; - } else if (val > nudged_max) { - val = nudged_max; + if (val < nudgedMin) { + val = nudgedMin; + } else if (val > nudgedMax) { + val = nudgedMax; } - output[shape::getIndexOffset(e* channels + i, outputShape)] = (math::nd4j_floor((val - nudged_min) / scale + T(0.5)) * scale + nudged_min); + output[shape::getIndexOffset(e* channels + i, outputShape)] = + (math::nd4j_floor((val - nudgedMin) / scale + T(0.5)) * scale + nudgedMin); }; } - } template From c890de5a7b30925b77991c68c62c2ad611c51aae Mon Sep 17 00:00:00 2001 From: shugeo Date: Thu, 10 Oct 2019 18:31:17 +0300 Subject: [PATCH 14/15] Added doc for fake_quant_with_min_max* op helpers implementations. --- .../helpers/cpu/fake_quantization.cpp | 56 +++++++++++-------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp index 21163d44c..6ea2992b9 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/fake_quantization.cpp @@ -25,43 +25,54 @@ namespace nd4j { namespace ops { namespace helpers { + // + // nudge - nudged min max over scale + // scale = (Max - Min) / (quantMax - quantMin) + // quantMin = 0 or 1, quantMax = 2^b - 1 == (1 << b) - 1 + // template - static void nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) { - T quant_max_float = static_cast(quant_max); - T quant_min_float = static_cast(quant_min); - *scale = (max - min) / (quant_max_float - quant_min_float); - auto zero_point_from_min = quant_min_float - min / *scale; - uint16_t const nudged_zero_point = [zero_point_from_min, quant_min, quant_max, quant_max_float, quant_min_float] { - if (zero_point_from_min < quant_min_float) { - return static_cast(quant_min); + static void nudge(T min, T max, int quantMin, int quantMax, T* scale, T* nudgedMin, T* nudgedMax) { + // floating point instead integers + T quantMaxF = static_cast(quantMax); + T quantMinF = static_cast(quantMin); + // compute scale + *scale = (max - min) / (quantMaxF - quantMinF); + // compute left bound point + auto zeroPointFromMin = quantMinF - min / *scale; + // bound zero point to conform with range [0 or 1, 2^b - 1] + uint16_t const nudged_zero_point = [zeroPointFromMin, quantMin, quantMax, quantMaxF, quantMinF] { + if (zeroPointFromMin < quantMinF) { + return static_cast(quantMin); } - if (zero_point_from_min > quant_max_float) { - return static_cast(quant_max); + if (zeroPointFromMin > quantMaxF) { + return static_cast(quantMax); } - return nd4j::math::nd4j_round(zero_point_from_min); - }(); - *nudged_min = (quant_min_float - nudged_zero_point) * (*scale); - *nudged_max = (quant_max_float - nudged_zero_point) * (*scale); + return nd4j::math::nd4j_round(zeroPointFromMin); + }(); + // compute nudged min and max with computed nudged zero point + *nudgedMin = (quantMinF - nudged_zero_point) * (*scale); + *nudgedMax = (quantMaxF - nudged_zero_point) * (*scale); } template void fakeQuantWithMinMaxVarsPerChannel_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) { - int lowIntBound = narrowed ? 1 : 0; - int upperIntBound = (1 << numBits) - 1; - auto channels = input->sizeAt(-1); + int lowIntBound = narrowed ? 1 : 0; // 0 or 1 + int upperIntBound = (1 << numBits) - 1; // 2^b - 1 + auto channels = input->sizeAt(-1); // last dimension PRAGMA_OMP_PARALLEL_FOR for (auto i = 0; i < channels; i++) { T scale, nudged_min, nudged_max; + // nudge min and max first, with scale computing nudge(min->t(i), max->t(i), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max); - + // slide using last dimension and process all for given channel for (auto e = 0; e < input->lengthOf(); e += channels) { T val = input->t(e + i); if ( val <= nudged_min) val = nudged_min; else if (val >= nudged_max) val = nudged_max; - + // quantization itself output->t(e + i) = math::nd4j_floor((val - nudged_min)/scale + T(0.5)) * scale + nudged_min; } } @@ -73,16 +84,17 @@ namespace helpers { int upperIntBound = (1 << numBits) - 1; T nudgedMin, nudgedMax, scale; - + // nudge with given min and max and compute scale and nudged min and max nudge(min->t(0), max->t(0), lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax); - + // quantization as one auto fakeQuantizationWithMinMax = LAMBDA_T(x, nudgedMin, nudgedMax, scale) { - T val = x; + T val = x; // boundign value between nudged min and max if (val < nudgedMin) { val = nudgedMin; } else if (val > nudgedMax) val = nudgedMax; + // converse value with scale and shifted with nudged min return (nd4j::math::nd4j_floor((val - nudgedMin)/scale + T(0.5)) * scale + nudgedMin); }; From ace65355c5afb551b596bb5bc3d8d19f07ac67ac Mon Sep 17 00:00:00 2001 From: shugeo Date: Thu, 10 Oct 2019 18:35:28 +0300 Subject: [PATCH 15/15] Added doc for fake_quant_with_min_max* op helpers cuda implementations. --- .../ops/declarable/helpers/cuda/fake_quantization.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu index 70eaac67b..292b7e1c6 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/fake_quantization.cu @@ -84,22 +84,22 @@ namespace helpers { T* output, Nd4jLong* outputShape, Nd4jLong length) { __shared__ int block; if (threadIdx.x == 0) { - block = length / channels; + block = length / channels; // to loop with last dimension as block } __syncthreads(); for (auto i = blockIdx.x; i < (int)channels; i += gridDim.x) { T scale, nudgedMin, nudgedMax; nudge(min[i], max[i], lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax); - - for (auto e = threadIdx.x; e < block; e += blockDim.x) { - T val = input[shape::getIndexOffset(e * channels + i, inputShape)]; + // loop over blocks to quantization between nudged min and max + for (auto b = threadIdx.x; b < block; b += blockDim.x) { + T val = input[shape::getIndexOffset(b * channels + i, inputShape)]; if (val < nudgedMin) { val = nudgedMin; } else if (val > nudgedMax) { val = nudgedMax; } - output[shape::getIndexOffset(e* channels + i, outputShape)] = + output[shape::getIndexOffset(b * channels + i, outputShape)] = (math::nd4j_floor((val - nudgedMin) / scale + T(0.5)) * scale + nudgedMin); }; }