Refactored helpers both for cuda and cpu platforms.
parent
a09cb5e2be
commit
c3f755d975
|
@ -26,7 +26,7 @@ namespace ops {
|
||||||
namespace helpers {
|
namespace helpers {
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void Nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) {
|
static void nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) {
|
||||||
T quant_max_float = static_cast<T>(quant_max);
|
T quant_max_float = static_cast<T>(quant_max);
|
||||||
T quant_min_float = static_cast<T>(quant_min);
|
T quant_min_float = static_cast<T>(quant_min);
|
||||||
*scale = (max - min) / (quant_max_float - quant_min_float);
|
*scale = (max - min) / (quant_max_float - quant_min_float);
|
||||||
|
@ -53,7 +53,7 @@ namespace helpers {
|
||||||
PRAGMA_OMP_PARALLEL_FOR
|
PRAGMA_OMP_PARALLEL_FOR
|
||||||
for (auto i = 0; i < channels; i++) {
|
for (auto i = 0; i < channels; i++) {
|
||||||
T scale, nudged_min, nudged_max;
|
T scale, nudged_min, nudged_max;
|
||||||
Nudge<T>(min->t<T>(i), max->t<T>(i), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max);
|
nudge<T>(min->t<T>(i), max->t<T>(i), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max);
|
||||||
|
|
||||||
for (auto e = 0; e < input->lengthOf(); e += channels) {
|
for (auto e = 0; e < input->lengthOf(); e += channels) {
|
||||||
T val = input->t<T>(e + i);
|
T val = input->t<T>(e + i);
|
||||||
|
@ -67,37 +67,26 @@ namespace helpers {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static void WiseMinMax(NDArray* input, T min, T max, NDArray* output) {
|
|
||||||
auto wiseMinMax = LAMBDA_T(x, min, max) {
|
|
||||||
if (x < min) {
|
|
||||||
return min;
|
|
||||||
}
|
|
||||||
else if (x > max)
|
|
||||||
return max;
|
|
||||||
return x;
|
|
||||||
};
|
|
||||||
|
|
||||||
input->applyLambda<T>(wiseMinMax, output);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
|
void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
|
||||||
int lowIntBound = narrowed ? 1 : 0;
|
int lowIntBound = narrowed ? 1 : 0;
|
||||||
int upperIntBound = (1 << numBits) - 1;
|
int upperIntBound = (1 << numBits) - 1;
|
||||||
|
|
||||||
const float quant_min_float = static_cast<float>(lowIntBound);
|
T nudgedMin, nudgedMax, scale;
|
||||||
const float quant_max_float = static_cast<float>(upperIntBound);
|
|
||||||
T nudged_min, nudged_max, scale;
|
|
||||||
|
|
||||||
Nudge<T>(min->t<T>(0), max->t<T>(0), quant_min_float, quant_max_float, &scale, &nudged_min, &nudged_max);
|
nudge<T>(min->t<T>(0), max->t<T>(0), lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax);
|
||||||
WiseMinMax<T>(input, nudged_min, nudged_max, output);
|
|
||||||
*output -= nudged_min;
|
auto fakeQuantizationWithMinMax = LAMBDA_T(x, nudgedMin, nudgedMax, scale) {
|
||||||
(*output) /= scale;
|
T val = x;
|
||||||
(*output) += T(0.5f);
|
if (val < nudgedMin) {
|
||||||
output->applyTransform(transform::Floor, nullptr, nullptr);
|
val = nudgedMin;
|
||||||
(*output) *= scale;
|
}
|
||||||
(*output) += nudged_min;
|
else if (val > nudgedMax)
|
||||||
|
val = nudgedMax;
|
||||||
|
return (nd4j::math::nd4j_floor<T,T>((val - nudgedMin)/scale + T(0.5)) * scale + nudgedMin);
|
||||||
|
};
|
||||||
|
|
||||||
|
input->applyLambda<T>(fakeQuantizationWithMinMax, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
|
void fakeQuantWithMinMaxVars(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
|
||||||
|
|
|
@ -34,44 +34,45 @@ namespace helpers {
|
||||||
// output - output tensor
|
// output - output tensor
|
||||||
//
|
//
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __host__ __device__ void Nudge(T min, T max, int quant_min, int quant_max, T* scale, T* nudged_min, T* nudged_max) {
|
static __host__ __device__ void
|
||||||
T quant_max_float = static_cast<T>(quant_max);
|
nudge(T min, T max, int quantMin, int quantMax, T* scale, T* nudgedMin, T* nudgedMax) {
|
||||||
T quant_min_float = static_cast<T>(quant_min);
|
T quantMaxF = static_cast<T>(quantMax);
|
||||||
*scale = (max - min) / (quant_max_float - quant_min_float);
|
T quantMinF = static_cast<T>(quantMin);
|
||||||
auto zero_point_from_min = quant_min_float - min / *scale;
|
*scale = (max - min) / (quantMaxF - quantMinF);
|
||||||
uint16_t const nudged_zero_point = [zero_point_from_min, quant_min, quant_max, quant_max_float, quant_min_float] {
|
auto zeroPointFromMin = quantMinF - min / *scale;
|
||||||
if (zero_point_from_min < quant_min_float) {
|
uint16_t const nudgedZeroPoint = [zeroPointFromMin, quantMin, quantMax, quantMaxF, quantMinF] {
|
||||||
return static_cast<uint16_t>(quant_min);
|
if (zeroPointFromMin < quantMinF) {
|
||||||
|
return static_cast<uint16_t>(quantMin);
|
||||||
}
|
}
|
||||||
if (zero_point_from_min > quant_max_float) {
|
if (zeroPointFromMin > quantMaxF) {
|
||||||
return static_cast<uint16_t>(quant_max);
|
return static_cast<uint16_t>(quantMax);
|
||||||
}
|
}
|
||||||
return nd4j::math::nd4j_round<T,uint16_t>(zero_point_from_min);
|
return nd4j::math::nd4j_round<T,uint16_t>(zeroPointFromMin);
|
||||||
}();
|
}();
|
||||||
*nudged_min = (quant_min_float - nudged_zero_point) * (*scale);
|
*nudgedMin = (quantMinF - nudgedZeroPoint) * (*scale);
|
||||||
*nudged_max = (quant_max_float - nudged_zero_point) * (*scale);
|
*nudgedMax = (quantMaxF - nudgedZeroPoint) * (*scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
|
void fakeQuantWithMinMaxVars_(NDArray* input, NDArray* min, NDArray* max, int numBits, bool narrowed, NDArray* output) {
|
||||||
int lowIntBound = narrowed?1:0;
|
int lowIntBound = narrowed?1:0;
|
||||||
int upperIntBound = (1 << numBits) - 1;
|
int upperIntBound = (1 << numBits) - 1;
|
||||||
min->syncToHost();
|
min->syncToHost(); // these are scalars, so nothing much happened
|
||||||
max->syncToHost();
|
max->syncToHost();
|
||||||
T scale, nudged_min, nudged_max;
|
T scale, nudgedMin, nudgedMax;
|
||||||
Nudge(min->t<T>(0), max->t<T>(0), lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max);
|
nudge(min->t<T>(0), max->t<T>(0), lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax);
|
||||||
|
|
||||||
auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudged_min, nudged_max, scale) {
|
auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudgedMin, nudgedMax, scale) {
|
||||||
T val = x;
|
T val = x;
|
||||||
if (x < nudged_min) {
|
if (x < nudgedMin) {
|
||||||
val = nudged_min;
|
val = nudgedMin;
|
||||||
}
|
}
|
||||||
else if (x > nudged_max) {
|
else if (x > nudgedMax) {
|
||||||
val = nudged_max;
|
val = nudgedMax;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
val = x;
|
val = x;
|
||||||
return (math::nd4j_floor<T,T>((val - nudged_min) / scale + T(0.5)) * scale + nudged_min);
|
return (math::nd4j_floor<T,T>((val - nudgedMin) / scale + T(0.5)) * scale + nudgedMin);
|
||||||
};
|
};
|
||||||
|
|
||||||
input->applyLambda(wiseMinMaxAndSoOn, output);
|
input->applyLambda(wiseMinMaxAndSoOn, output);
|
||||||
|
@ -88,20 +89,20 @@ namespace helpers {
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
for (auto i = blockIdx.x; i < (int)channels; i += gridDim.x) {
|
for (auto i = blockIdx.x; i < (int)channels; i += gridDim.x) {
|
||||||
T scale, nudged_min, nudged_max;
|
T scale, nudgedMin, nudgedMax;
|
||||||
Nudge(min[i], max[i], lowIntBound, upperIntBound, &scale, &nudged_min, &nudged_max);
|
nudge(min[i], max[i], lowIntBound, upperIntBound, &scale, &nudgedMin, &nudgedMax);
|
||||||
//auto wiseMinMaxAndSoOn = LAMBDA_T(x, nudged_min, nudged_max, scale) {
|
|
||||||
for (auto e = threadIdx.x; e < block; e += blockDim.x) {
|
for (auto e = threadIdx.x; e < block; e += blockDim.x) {
|
||||||
T val = input[shape::getIndexOffset(e * channels + i, inputShape)];
|
T val = input[shape::getIndexOffset(e * channels + i, inputShape)];
|
||||||
if (val < nudged_min) {
|
if (val < nudgedMin) {
|
||||||
val = nudged_min;
|
val = nudgedMin;
|
||||||
} else if (val > nudged_max) {
|
} else if (val > nudgedMax) {
|
||||||
val = nudged_max;
|
val = nudgedMax;
|
||||||
}
|
}
|
||||||
output[shape::getIndexOffset(e* channels + i, outputShape)] = (math::nd4j_floor<T, T>((val - nudged_min) / scale + T(0.5)) * scale + nudged_min);
|
output[shape::getIndexOffset(e* channels + i, outputShape)] =
|
||||||
|
(math::nd4j_floor<T, T>((val - nudgedMin) / scale + T(0.5)) * scale + nudgedMin);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
Loading…
Reference in New Issue