raver119 6de00bf75f
[WIP] Weekly update of repo (#8390)
* [WIP] Fix compilation after nd4j changes (#37)

* Fix compilation.

* Some tests fixed

* Disable tests temporarily.

* Restored test

* Tests restored.

* Test restored.

* [WIP] perf tests (#40)

* special maxpool test

Signed-off-by: raver119 <raver119@gmail.com>

* special maxpool test

Signed-off-by: raver119 <raver119@gmail.com>

* Shyrma bnorm bp (#41)

Batchnorm backprop mkldnn

* Add SameDiff memory reuse memory manager (array cache) (#39)

* Attention op comments

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* ArrayCacheMemoryMgr - first pass

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Tweak array cache for use with SameDiff identity arrays

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* ArrayCacheMemoryMgr javadoc and properly get max memory

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* LRU cache policy + add tests

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Fixes

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Resize arrays internally if required for ArrayCacheMemoryMgr

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Test improvement

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* Small polish

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* SameDiff op runtime benchmarking listener (#42)

Signed-off-by: AlexDBlack <blacka101@gmail.com>

* INLINE_LOOPS for windows

Signed-off-by: raver119 <raver119@gmail.com>

* [WIP] ThreadPool (#8)

This PR removes OpenMP use in 95% of cases
2019-11-13 17:15:18 +03:00

104 lines
4.7 KiB
C++

/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author raver119@gmail.com
//
#include <helpers/TAD.h>
#include <helpers/ConstantTadHelper.h>
#include <execution/Threads.h>
#include "../one_hot.h"
namespace nd4j {
namespace ops {
namespace helpers {
template <typename Z, typename I>
static void onehot_(void *voutput, Nd4jLong *zShapeInfo, void *vindices, Nd4jLong *iShapeInfo, int axis, double on, double off) {
auto output = reinterpret_cast<Z*>(voutput);
auto indices = reinterpret_cast<I*>(vindices);
auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(zShapeInfo, {axis});
auto iLen = static_cast<unsigned int>(shape::length(iShapeInfo));
auto tLen = static_cast<unsigned int>(shape::length(tadPack.primaryShapeInfo()));
auto numTads = static_cast<unsigned int>(tadPack.numberOfTads());
auto tadEws = shape::elementWiseStride(tadPack.primaryShapeInfo());
if (iLen != numTads)
throw std::runtime_error("OneHot: number of TADs should be equal to number of indices");
if (shape::elementWiseStride(zShapeInfo) != 1 || shape::elementWiseStride(iShapeInfo) != 1)
throw std::runtime_error("OneHot: op expects output and indices to have elementWiseStride to be equal to 1");
Z zero = static_cast<Z>(off);
Z one = static_cast<Z>(on);
if (tadEws >= 1) {
auto func = PRAGMA_THREADS_FOR {
for (auto e = 0; e < stop; e += increment) {
auto cO = output + tadPack.primaryOffsets()[e];
auto idx = static_cast<int>(indices[e]);
if (idx < 0 || idx >= tLen) {
PRAGMA_OMP_SIMD
for (unsigned int t = 0; t < tLen; t++) {
cO[t * tadEws] = zero;
}
} else {
PRAGMA_OMP_SIMD
for (unsigned int t = 0; t < tLen; t++) {
cO[t * tadEws] = idx == t ? one : zero;
}
}
}
};
samediff::Threads::parallel_tad(func, 0, numTads);
} else {
auto func = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e += increment) {
auto cO = output + tadPack.primaryOffsets()[e];
auto idx = static_cast<int>(indices[e]);
if (idx < 0 || idx >= tLen) {
PRAGMA_OMP_SIMD
for (unsigned int t = 0; t < tLen; t++) {
cO[shape::getIndexOffset(t, tadPack.primaryShapeInfo())] = zero;
}
} else {
PRAGMA_OMP_SIMD
for (unsigned int t = 0; t < tLen; t++) {
cO[shape::getIndexOffset(t, tadPack.primaryShapeInfo())] = idx == t ? one : zero;
}
}
}
};
samediff::Threads::parallel_tad(func, 0, numTads);
}
}
void onehot(const nd4j::LaunchContext* context, const NDArray *indices, NDArray *output, const uint axis, const uint depth, const double on, const double off) {
auto zType = output->dataType();
auto iType = indices->dataType();
BUILD_DOUBLE_SELECTOR(zType, iType, onehot_, (output->buffer(), output->shapeInfo(), indices->getBuffer(), indices->getShapeInfo(), axis, on, off), LIBND4J_TYPES, LIBND4J_TYPES);
}
}
}
}