Loops auto-vectorization problem fix (#274)

* libnd4j cast loop types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more type castination added to loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j sync casting types of iterated variable in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more loops reviewed for vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed several typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several more files reviewed to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several type casting added in broadcasting that were missed, fixed mac builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j double check all files and fix several more places in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j revert changes for lup.cpp

Signed-off-by: Oleg <oleg.semeniv@gmail.com>
master
Oleh 2020-02-26 20:12:19 +02:00 committed by GitHub
parent 5c806d2fb5
commit b4575d11e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
48 changed files with 1084 additions and 1084 deletions

View File

@ -14,9 +14,9 @@
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author Yurii Shyrma (iuriish@yahoo.com), created on 14.03.2019
//
//
// @author Yurii Shyrma (iuriish@yahoo.com), created on 14.03.2019
//
#ifndef LIBND4J_LOOPS_H
#define LIBND4J_LOOPS_H
@ -45,7 +45,7 @@ namespace nd4j {
};
template <typename X, typename Z>
class ReductionFloatLoops : public ReductionLoops<X,Z,Z> {
class ReductionFloatLoops : public ReductionLoops<X, Z, Z> {
public:
static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, Z* extraParams, int64_t start, int64_t stop);
@ -54,7 +54,7 @@ namespace nd4j {
};
template <typename X, typename Z>
class ND4J_EXPORT ReductionBoolLoops : public ReductionLoops<X,Z,X> {
class ND4J_EXPORT ReductionBoolLoops : public ReductionLoops<X, Z, X> {
public:
static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop);
@ -63,7 +63,7 @@ namespace nd4j {
};
template <typename X, typename Z>
class ND4J_EXPORT ReductionLongLoops : public ReductionLoops<X,Z,X> {
class ND4J_EXPORT ReductionLongLoops : public ReductionLoops<X, Z, X> {
public:
static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop);
@ -72,7 +72,7 @@ namespace nd4j {
};
template <typename X>
class ND4J_EXPORT ReductionSameLoops : public ReductionLoops<X,X,X> {
class ND4J_EXPORT ReductionSameLoops : public ReductionLoops<X, X, X> {
public:
static void wrapper(const int opNum, X* x, Nd4jLong* xShapeInfo, X* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams, int64_t start, int64_t stop);
@ -125,10 +125,10 @@ namespace nd4j {
/*
//////////////////////////////////////////////////////////////////////////////
template<typename X, typename Y, typename Z>
void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
/*
//////////////////////////////////////////////////////////////////////////////
template<typename X, typename Y, typename Z>
void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const Y* y, const Nd4jLong* yShapeInfo,
Z* z, const Nd4jLong* zShapeInfo,
Z* extraParams,
@ -255,12 +255,12 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
}
}
}
}
*/
}
*/
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
template<typename X, typename Z, typename E>
template <typename OpType>
void nd4j::ReductionLoops<X, Z, E>::loopReduce(X* x, Nd4jLong* xShapeInfo,
@ -324,7 +324,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto tad = x + tadOffsets[i];
auto s = OpType::startingValue(tad);
for (uint j = 0; j < tadLen; j++)
for (Nd4jLong j = 0; j < tadLen; j++)
s = OpType::update(s, OpType::op(tad[j], extraParams), extraParams);
z[i] = OpType::postProcess(s, tadLen, extraParams);
@ -338,7 +338,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto tad = x + tadOffsets[i];
auto s = OpType::startingValue(tad);
for (uint j = 0; j < tadLen; j++)
for (Nd4jLong j = 0; j < tadLen; j++)
s = OpType::update(s, OpType::op(tad[j * tadEws], extraParams), extraParams);
z[i * zEws] = OpType::postProcess(s, tadLen, extraParams);
@ -352,7 +352,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto tad = x + tadOffsets[i];
auto s = OpType::startingValue(tad);
for (uint i0 = 0; i0 < tadLen; ++i0)
for (Nd4jLong i0 = 0; i0 < tadLen; ++i0)
s = OpType::update(s, OpType::op(tad[i0 * tadStride[0]], extraParams), extraParams);
z[i] = OpType::postProcess(s, tadLen, extraParams);
@ -366,8 +366,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto tad = x + tadOffsets[i];
auto s = OpType::startingValue(tad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0)
for (uint i1 = 0; i1 < tadShape[1]; ++i1)
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0)
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1)
s = OpType::update(s, OpType::op(tad[i0 * tadStride[0] + i1 * tadStride[1]], extraParams), extraParams);
z[i] = OpType::postProcess(s, tadLen, extraParams);
@ -381,9 +381,9 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto tad = x + tadOffsets[i];
auto s = OpType::startingValue(tad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0)
for (uint i1 = 0; i1 < tadShape[1]; ++i1)
for (uint i2 = 0; i2 < tadShape[2]; ++i2)
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0)
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1)
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2)
s = OpType::update(s, OpType::op(tad[i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2]], extraParams), extraParams);
z[i] = OpType::postProcess(s, tadLen, extraParams);
@ -397,10 +397,10 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto tad = x + tadOffsets[i];
auto s = OpType::startingValue(tad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0)
for (uint i1 = 0; i1 < tadShape[1]; ++i1)
for (uint i2 = 0; i2 < tadShape[2]; ++i2)
for (uint i3 = 0; i3 < tadShape[3]; ++i3)
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0)
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1)
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2)
for (Nd4jLong i3 = 0; i3 < tadShape[3]; ++i3)
s = OpType::update(s, OpType::op(tad[i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2] + i3 * tadStride[3]], extraParams), extraParams);
z[i] = OpType::postProcess(s, tadLen, extraParams);
@ -414,11 +414,11 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto tad = x + tadOffsets[i];
auto s = OpType::startingValue(tad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0)
for (uint i1 = 0; i1 < tadShape[1]; ++i1)
for (uint i2 = 0; i2 < tadShape[2]; ++i2)
for (uint i3 = 0; i3 < tadShape[3]; ++i3)
for (uint i4 = 0; i4 < tadShape[4]; ++i4)
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0)
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1)
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2)
for (Nd4jLong i3 = 0; i3 < tadShape[3]; ++i3)
for (Nd4jLong i4 = 0; i4 < tadShape[4]; ++i4)
s = OpType::update(s, OpType::op(tad[i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2] + i3 * tadStride[3] + i4 * tadStride[4]], extraParams), extraParams);
z[i] = OpType::postProcess(s, tadLen, extraParams);
@ -435,7 +435,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto tad = x + tadOffsets[i];
auto s = OpType::startingValue(tad);
for (uint j = 0; j < tadLen; j++)
for (Nd4jLong j = 0; j < tadLen; j++)
s = OpType::update(s, OpType::op(tad[j * tadEws], extraParams), extraParams);
auto zOffset = shape::indexOffset(i, zShapeInfo, castZShapeInfo, canCastZ);
@ -453,7 +453,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto tad = x + tadOffsets[i];
auto s = OpType::startingValue(tad);
for (uint j = 0; j < tadLen; j++) {
for (Nd4jLong j = 0; j < tadLen; j++) {
auto tadOffset = shape::indexOffset(j, tadShapeInfo, castTadShapeInfo, canCastTad);
s = OpType::update(s, OpType::op(tad[tadOffset], extraParams), extraParams);
}
@ -475,7 +475,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto tad = x + tadOffsets[i];
auto s = OpType::startingValue(tad);
for (uint j = 0; j < tadLen; j++)
for (Nd4jLong j = 0; j < tadLen; j++)
s = OpType::update(s, OpType::op(tad[innertadOffsets[j]], extraParams), extraParams);
auto zOffset = shape::indexOffset(i, zShapeInfo, castZShapeInfo, canCastZ);
@ -492,7 +492,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
//////////////////////////////////////////////////////////////////////////////
template <typename X, typename Z, typename E>
template <typename OpType>
void nd4j::TransformLoops<X,Z,E>::loopTransform(X* x, Nd4jLong* xShapeInfo,
void nd4j::TransformLoops<X, Z, E>::loopTransform(X* x, Nd4jLong* xShapeInfo,
Z* z, Nd4jLong* zShapeInfo,
E* extraParams, uint64_t threadId, uint64_t numThreads) {
@ -528,7 +528,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
int64_t start = span.startX(), stop = span.stopX();
for (auto i = start; i < stop; i++)
z[i*zEws] = OpType::op(x[i*xEws], extraParams);
z[i * zEws] = OpType::op(x[i * xEws], extraParams);
}
break;
@ -546,7 +546,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto xOffset = shape::indexOffset(i, xShapeInfo, castXShapeInfo, canCastX);
z[i * zEws] = OpType::op(x[xOffset], extraParams);
}
} else {
}
else {
for (auto i = start; i < stop; i++) {
const auto xOffset = shape::indexOffset(i, xShapeInfo, castXShapeInfo, canCastX);
z[i] = OpType::op(x[xOffset], extraParams);
@ -576,7 +577,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto z0 = i0 * zStride[0];
auto x0 = i0 * xStride[0];
for (uint i1 = span.startY(); i1 < span.stopY(); ++i1)
for (auto i1 = span.startY(); i1 < span.stopY(); ++i1)
z[z0 + i1 * zStride[1]] = OpType::op(x[x0 + i1 * xStride[1]], extraParams);
}
}
@ -584,9 +585,9 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
//*********************************************//
case LoopKind::RANK3: {
auto uXShape0 = static_cast<uint>(xShape[0]);
auto uXShape1 = static_cast<uint>(xShape[1]);
auto uXShape2 = static_cast<uint>(xShape[2]);
auto uXShape0 = xShape[0];
auto uXShape1 = xShape[1];
auto uXShape2 = xShape[2];
auto loop = samediff::ThreadsHelper::pickLoop2d(numThreads, uXShape0, uXShape1);
auto span = samediff::Span2::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1);
@ -597,7 +598,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto z0 = i0 * zStride[0] + i1 * zStride[1];
auto x0 = i0 * xStride[0] + i1 * xStride[1];
for (uint i2 = 0; i2 < uXShape2; ++i2)
for (Nd4jLong i2 = 0; i2 < uXShape2; ++i2)
z[z0 + i2 * zStride[2]] = OpType::op(x[x0 + i2 * xStride[2]], extraParams);
}
}
@ -605,10 +606,10 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
//*********************************************//
case LoopKind::RANK4: {
auto uXShape0 = static_cast<uint>(xShape[0]);
auto uXShape1 = static_cast<uint>(xShape[1]);
auto uXShape2 = static_cast<uint>(xShape[2]);
auto uXShape3 = static_cast<uint>(xShape[3]);
auto uXShape0 = xShape[0];
auto uXShape1 = xShape[1];
auto uXShape2 = xShape[2];
auto uXShape3 = xShape[3];
auto loop = samediff::ThreadsHelper::pickLoop3d(numThreads, uXShape0, uXShape1, uXShape2);
auto span = samediff::Span3::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1, 0, uXShape2, 1);
@ -619,7 +620,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto x0 = i0 * xStride[0] + i1 * xStride[1] + i2 * xStride[2];
auto z0 = i0 * zStride[0] + i1 * zStride[1] + i2 * zStride[2];
for (uint i3 = 0; i3 < uXShape3; ++i3)
for (Nd4jLong i3 = 0; i3 < uXShape3; ++i3)
z[z0 + i3 * zStride[3]] = OpType::op(x[x0 + i3 * xStride[3]], extraParams);
}
}
@ -627,11 +628,11 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
//*********************************************//
case LoopKind::RANK5: {
auto uXShape0 = static_cast<uint>(xShape[0]);
auto uXShape1 = static_cast<uint>(xShape[1]);
auto uXShape2 = static_cast<uint>(xShape[2]);
auto uXShape3 = static_cast<uint>(xShape[3]);
auto uXShape4 = static_cast<uint>(xShape[4]);
auto uXShape0 = xShape[0];
auto uXShape1 = xShape[1];
auto uXShape2 = xShape[2];
auto uXShape3 = xShape[3];
auto uXShape4 = xShape[4];
auto loop = samediff::ThreadsHelper::pickLoop3d(numThreads, uXShape0, uXShape1, uXShape2);
auto span = samediff::Span3::build(loop, threadId, numThreads, 0, uXShape0, 1, 0, uXShape1, 1, 0, uXShape2, 1);
@ -643,12 +644,12 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
auto z0 = i0 * zStride[0] + i1 * zStride[1] + i2 * zStride[2];
auto x0 = i0 * xStride[0] + i1 * xStride[1] + i2 * xStride[2];
for (uint i3 = 0; i3 < uXShape3; ++i3) {
for (Nd4jLong i3 = 0; i3 < uXShape3; ++i3) {
auto z1 = z0 + i3 * zStride[3];
auto x1 = x0 + i3 * xStride[3];
for (uint i4 = 0; i4 < uXShape4; ++i4)
for (Nd4jLong i4 = 0; i4 < uXShape4; ++i4)
z[z1 + i4 * zStride[4]] = OpType::op(x[x1 + i4 * xStride[4]], extraParams);
}
@ -678,7 +679,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
}
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
template<typename X, typename Z>
template <typename OpType>
void nd4j::Reduction3Loops<X, Z>::loopReduce3(X* x, Nd4jLong* xShapeInfo,
@ -694,11 +695,11 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const Nd4jLong xLen = shape::length(xShapeInfo);
const Nd4jLong yLen = shape::length(yShapeInfo);
Nd4jLong *xTadShapeInfo = nullptr, *yTadShapeInfo = nullptr, *xTadOffsets = nullptr, *yTadOffsets = nullptr;
Nd4jLong* xTadShapeInfo = nullptr, * yTadShapeInfo = nullptr, * xTadOffsets = nullptr, * yTadOffsets = nullptr;
TadPack tadPackX, tadPackY;
std::vector<Nd4jLong> zeroOffsets;
if(xLen == yLen) {
if (xLen == yLen) {
tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dims, dimsLen);
tadPackY = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dims, dimsLen);
xTadShapeInfo = tadPackX.primaryShapeInfo();
@ -706,7 +707,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
xTadOffsets = tadPackX.primaryOffsets();
yTadOffsets = tadPackY.primaryOffsets();
}
else if(yLen > xLen) {
else if (yLen > xLen) {
tadPackY = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(yShapeInfo, dims, dimsLen);
xTadShapeInfo = xShapeInfo;
yTadShapeInfo = tadPackY.primaryShapeInfo();
@ -749,7 +750,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto yTad = yTadOffsets ? y + yTadOffsets[i] : y;
auto s = OpType::startingValue(xTad);
for (uint j = 0; j < tadLen; ++j)
for (Nd4jLong j = 0; j < tadLen; ++j)
s = OpType::update(s, OpType::op(xTad[j], yTad[j], extraParams), extraParams);
z[i] = OpType::postProcess(s, tadLen, extraParams);
@ -769,7 +770,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto yTad = yTadOffsets ? y + yTadOffsets[i] : y;
auto s = OpType::startingValue(xTad);
for (uint j = 0; j < tadLen; ++j)
for (Nd4jLong j = 0; j < tadLen; ++j)
s = OpType::update(s, OpType::op(xTad[j * xTadEws], yTad[j * yTadEws], extraParams), extraParams);
z[i * zEws] = OpType::postProcess(s, tadLen, extraParams);
@ -789,7 +790,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto yTad = yTadOffsets ? y + yTadOffsets[i] : y;
auto s = OpType::startingValue(xTad);
for (uint i0 = 0; i0 < tadLen; ++i0) {
for (Nd4jLong i0 = 0; i0 < tadLen; ++i0) {
const auto xTadOffset = i0 * xTadStride[0];
const auto yTadOffset = i0 * yTadStride[0];
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -812,8 +813,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto yTad = yTadOffsets ? y + yTadOffsets[i] : y;
auto s = OpType::startingValue(xTad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
const auto xTadOffset = i0 * xTadStride[0] + i1 * xTadStride[1];
const auto yTadOffset = i0 * yTadStride[0] + i1 * yTadStride[1];
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -836,9 +837,9 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto yTad = yTadOffsets ? y + yTadOffsets[i] : y;
auto s = OpType::startingValue(xTad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
for (uint i2 = 0; i2 < tadShape[2]; ++i2) {
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
const auto xTadOffset = i0 * xTadStride[0] + i1 * xTadStride[1] + i2 * xTadStride[2];
const auto yTadOffset = i0 * yTadStride[0] + i1 * yTadStride[1] + i2 * yTadStride[2];
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -862,10 +863,10 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto yTad = yTadOffsets ? y + yTadOffsets[i] : y;
auto s = OpType::startingValue(xTad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
for (uint i2 = 0; i2 < tadShape[2]; ++i2) {
for (uint i3 = 0; i3 < tadShape[3]; ++i3) {
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
for (Nd4jLong i3 = 0; i3 < tadShape[3]; ++i3) {
const auto xTadOffset = i0 * xTadStride[0] + i1 * xTadStride[1] + i2 * xTadStride[2] + i3 * xTadStride[3];
const auto yTadOffset = i0 * yTadStride[0] + i1 * yTadStride[1] + i2 * yTadStride[2] + i3 * yTadStride[3];
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -890,11 +891,11 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto yTad = yTadOffsets ? y + yTadOffsets[i] : y;
auto s = OpType::startingValue(xTad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
for (uint i2 = 0; i2 < tadShape[2]; ++i2) {
for (uint i3 = 0; i3 < tadShape[3]; ++i3) {
for (uint i4 = 0; i4 < tadShape[4]; ++i4) {
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
for (Nd4jLong i3 = 0; i3 < tadShape[3]; ++i3) {
for (Nd4jLong i4 = 0; i4 < tadShape[4]; ++i4) {
const auto xTadOffset = i0 * xTadStride[0] + i1 * xTadStride[1] + i2 * xTadStride[2] + i3 * xTadStride[3] + i4 * xTadStride[4];
const auto yTadOffset = i0 * yTadStride[0] + i1 * yTadStride[1] + i2 * yTadStride[2] + i3 * yTadStride[3] + i4 * yTadStride[4];
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -913,7 +914,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
uint castXTadShapeInfo[MAX_RANK];
const bool canCastXTad = nd4j::DataTypeUtils::castShapeInfo<uint>(xTadShapeInfo, castXTadShapeInfo);
if(shape::haveSameShapeAndStrides(xTadShapeInfo, yTadShapeInfo)) {
if (shape::haveSameShapeAndStrides(xTadShapeInfo, yTadShapeInfo)) {
Z extraParams[3];
for (auto i = start; i < stop; i++) {
extraParams[0] = param0;
@ -924,7 +925,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto yTad = yTadOffsets ? y + yTadOffsets[i] : y;
auto s = OpType::startingValue(xTad);
for (uint j = 0; j < tadLen; ++j) {
for (Nd4jLong j = 0; j < tadLen; ++j) {
const auto tadOffset = shape::indexOffset(j, xTadShapeInfo, castXTadShapeInfo, canCastXTad);
s = OpType::update(s, OpType::op(xTad[tadOffset], yTad[tadOffset], extraParams), extraParams);
}
@ -946,7 +947,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto yTad = yTadOffsets ? y + yTadOffsets[i] : y;
auto s = OpType::startingValue(xTad);
for (uint j = 0; j < tadLen; ++j) {
for (Nd4jLong j = 0; j < tadLen; ++j) {
const auto xTadOffset = shape::indexOffset(j, xTadShapeInfo, castXTadShapeInfo, canCastXTad);
const auto yTadOffset = shape::indexOffset(j, yTadShapeInfo, castYTadShapeInfo, canCastYTad);
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -958,7 +959,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
}
}
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
template<typename X, typename Z>
template <typename OpType>
void nd4j::Reduction3Loops<X, Z>::loopReduce3All(X* x, Nd4jLong* xShapeInfo,
@ -990,14 +991,14 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto startVal = OpType::startingValue(x);
int numThreads = OmpLaunchHelper::tadThreads(tadLen, numXTads*numYTads);
int numThreads = OmpLaunchHelper::tadThreads(tadLen, numXTads * numYTads);
switch (kindOfLoop) {
//*********************************************//
case LoopKind::EWS1: {
Z extraParams[3];
for (auto ix = 0; ix < numXTads; ix++) {
for (auto iy = 0; iy < numYTads; iy++) {
for (Nd4jLong ix = 0; ix < numXTads; ix++) {
for (Nd4jLong iy = 0; iy < numYTads; iy++) {
extraParams[0] = param0;
extraParams[1] = param1;
extraParams[2] = param2;
@ -1007,7 +1008,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto zInd = ix * numYTads + iy;
auto s = startVal;
for (uint j = 0; j < tadLen; ++j)
for (Nd4jLong j = 0; j < tadLen; ++j)
s = OpType::update(s, OpType::op(xTad[j], yTad[j], extraParams), extraParams);
z[zInd] = OpType::postProcess(s, tadLen, extraParams);
@ -1019,8 +1020,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
//*********************************************//
case LoopKind::EWSNONZERO: {
Z extraParams[3];
for (auto ix = 0; ix < numXTads; ix++) {
for (auto iy = 0; iy < numYTads; iy++) {
for (Nd4jLong ix = 0; ix < numXTads; ix++) {
for (Nd4jLong iy = 0; iy < numYTads; iy++) {
extraParams[0] = param0;
extraParams[1] = param1;
extraParams[2] = param2;
@ -1030,7 +1031,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto zInd = ix * numYTads + iy;
auto s = startVal;
for (uint j = 0; j < tadLen; ++j)
for (Nd4jLong j = 0; j < tadLen; ++j)
s = OpType::update(s, OpType::op(xTad[j * xTadEws], yTad[j * yTadEws], extraParams), extraParams);
z[zInd * zEws] = OpType::postProcess(s, tadLen, extraParams);
@ -1042,8 +1043,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
//*********************************************//
case LoopKind::RANK1: {
Z extraParams[3];
for (auto ix = 0; ix < numXTads; ix++) {
for (auto iy = 0; iy < numYTads; iy++) {
for (Nd4jLong ix = 0; ix < numXTads; ix++) {
for (Nd4jLong iy = 0; iy < numYTads; iy++) {
extraParams[0] = param0;
extraParams[1] = param1;
extraParams[2] = param2;
@ -1053,7 +1054,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto zInd = ix * numYTads + iy;
auto s = startVal;
for (uint i0 = 0; i0 < tadLen; ++i0) {
for (Nd4jLong i0 = 0; i0 < tadLen; ++i0) {
const auto xTadOffset = i0 * xTadStride[0];
const auto yTadOffset = i0 * yTadStride[0];
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -1067,8 +1068,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
//*********************************************//
case LoopKind::RANK2: {
Z extraParams[3];
for (auto ix = 0; ix < numXTads; ix++) {
for (auto iy = 0; iy < numYTads; iy++) {
for (Nd4jLong ix = 0; ix < numXTads; ix++) {
for (Nd4jLong iy = 0; iy < numYTads; iy++) {
extraParams[0] = param0;
extraParams[1] = param1;
extraParams[2] = param2;
@ -1078,8 +1079,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto zInd = ix * numYTads + iy;
auto s = startVal;
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
const auto xTadOffset = i0 * xTadStride[0] + i1 * xTadStride[1];
const auto yTadOffset = i0 * yTadStride[0] + i1 * yTadStride[1];
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -1094,8 +1095,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
//*********************************************//
case LoopKind::RANK3: {
Z extraParams[3];
for (auto ix = 0; ix < numXTads; ix++) {
for (auto iy = 0; iy < numYTads; iy++) {
for (Nd4jLong ix = 0; ix < numXTads; ix++) {
for (Nd4jLong iy = 0; iy < numYTads; iy++) {
extraParams[0] = param0;
extraParams[1] = param1;
extraParams[2] = param2;
@ -1105,9 +1106,9 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto zInd = ix * numYTads + iy;
auto s = startVal;
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
for (uint i2 = 0; i2 < tadShape[2]; ++i2) {
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
const auto xTadOffset = i0 * xTadStride[0] + i1 * xTadStride[1] + i2 * xTadStride[2];
const auto yTadOffset = i0 * yTadStride[0] + i1 * yTadStride[1] + i2 * yTadStride[2];
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -1123,8 +1124,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
//*********************************************//
case LoopKind::RANK4: {
Z extraParams[3];
for (auto ix = 0; ix < numXTads; ix++) {
for (auto iy = 0; iy < numYTads; iy++) {
for (Nd4jLong ix = 0; ix < numXTads; ix++) {
for (Nd4jLong iy = 0; iy < numYTads; iy++) {
extraParams[0] = param0;
extraParams[1] = param1;
extraParams[2] = param2;
@ -1134,10 +1135,10 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto zInd = ix * numYTads + iy;
auto s = startVal;
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
for (uint i2 = 0; i2 < tadShape[2]; ++i2) {
for (uint i3 = 0; i3 < tadShape[3]; ++i3) {
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
for (Nd4jLong i3 = 0; i3 < tadShape[3]; ++i3) {
const auto xTadOffset = i0 * xTadStride[0] + i1 * xTadStride[1] + i2 * xTadStride[2] + i3 * xTadStride[3];
const auto yTadOffset = i0 * yTadStride[0] + i1 * yTadStride[1] + i2 * yTadStride[2] + i3 * yTadStride[3];
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -1154,8 +1155,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
//*********************************************//
case LoopKind::RANK5: {
Z extraParams[3];
for (auto ix = 0; ix < numXTads; ix++) {
for (auto iy = 0; iy < numYTads; iy++) {
for (Nd4jLong ix = 0; ix < numXTads; ix++) {
for (Nd4jLong iy = 0; iy < numYTads; iy++) {
extraParams[0] = param0;
extraParams[1] = param1;
extraParams[2] = param2;
@ -1165,11 +1166,11 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto zInd = ix * numYTads + iy;
auto s = startVal;
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
for (uint i2 = 0; i2 < tadShape[2]; ++i2) {
for (uint i3 = 0; i3 < tadShape[3]; ++i3) {
for (uint i4 = 0; i4 < tadShape[4]; ++i4) {
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
for (Nd4jLong i3 = 0; i3 < tadShape[3]; ++i3) {
for (Nd4jLong i4 = 0; i4 < tadShape[4]; ++i4) {
const auto xTadOffset = i0 * xTadStride[0] + i1 * xTadStride[1] + i2 * xTadStride[2] + i3 * xTadStride[3] + i4 * xTadStride[4];
const auto yTadOffset = i0 * yTadStride[0] + i1 * yTadStride[1] + i2 * yTadStride[2] + i3 * yTadStride[3] + i4 * yTadStride[4];
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);
@ -1189,10 +1190,10 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
uint castXTadShapeInfo[MAX_RANK];
const bool canCastXTad = nd4j::DataTypeUtils::castShapeInfo<uint>(xTadShapeInfo, castXTadShapeInfo);
if(shape::haveSameShapeAndStrides(xTadShapeInfo, yTadShapeInfo)) {
if (shape::haveSameShapeAndStrides(xTadShapeInfo, yTadShapeInfo)) {
Z extraParams[3];
for (auto ix = 0; ix < numXTads; ix++) {
for (auto iy = 0; iy < numYTads; iy++) {
for (Nd4jLong ix = 0; ix < numXTads; ix++) {
for (Nd4jLong iy = 0; iy < numYTads; iy++) {
extraParams[0] = param0;
extraParams[1] = param1;
extraParams[2] = param2;
@ -1202,7 +1203,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto zInd = ix * numYTads + iy;
auto s = startVal;
for (uint j = 0; j < tadLen; ++j) {
for (Nd4jLong j = 0; j < tadLen; ++j) {
const auto tadOffset = shape::indexOffset(j, xTadShapeInfo, castXTadShapeInfo, canCastXTad);
s = OpType::update(s, OpType::op(xTad[tadOffset], yTad[tadOffset], extraParams), extraParams);
}
@ -1215,8 +1216,8 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const bool canCastYTad = nd4j::DataTypeUtils::castShapeInfo<uint>(yTadShapeInfo, castYTadShapeInfo);
Z extraParams[3];
for (auto ix = 0; ix < numXTads; ix++) {
for (auto iy = 0; iy < numYTads; iy++) {
for (Nd4jLong ix = 0; ix < numXTads; ix++) {
for (Nd4jLong iy = 0; iy < numYTads; iy++) {
extraParams[0] = param0;
extraParams[1] = param1;
extraParams[2] = param2;
@ -1226,7 +1227,7 @@ void Loops::loopXYZ(const X* x, const Nd4jLong* xShapeInfo,
const auto zInd = ix * numYTads + iy;
auto s = startVal;
for (uint j = 0; j < tadLen; ++j) {
for (Nd4jLong j = 0; j < tadLen; ++j) {
const auto xTadOffset = shape::indexOffset(j, xTadShapeInfo, castXTadShapeInfo, canCastXTad);
const auto yTadOffset = shape::indexOffset(j, yTadShapeInfo, castYTadShapeInfo, canCastYTad);
s = OpType::update(s, OpType::op(xTad[xTadOffset], yTad[yTadOffset], extraParams), extraParams);

View File

@ -50,12 +50,12 @@ namespace nd4j {
1 == zArr.ews() && 'c' == zArr.ordering());
if (bSpecialCase && yArr.isColumnVector() && 1 == xArr.sizeAt(-1) ) {
auto yLen = (uint32_t)yArr.lengthOf();
auto yLen = yArr.lengthOf();
auto func = PRAGMA_THREADS_FOR{
for (uint32_t i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
auto rZ = z + (i * yLen);
auto v = x[i];
for (uint32_t j = 0; j < yLen; j++) {
for (Nd4jLong j = 0; j < yLen; j++) {
rZ[j] = OpType::op(v, y[j]);
}
}
@ -74,13 +74,13 @@ namespace nd4j {
if (bSpecialCase && bSpecialCase2) {
int zDim1 = zArr.sizeAt(-2);
int zDim2 = zArr.sizeAt(-1);
uint32_t zDim1 = zArr.sizeAt(-2);
uint32_t zDim2 = zArr.sizeAt(-1);
int nLen = zArr.lengthOf() / yArr.sizeAt(-1);
uint32_t nLen = zArr.lengthOf() / yArr.sizeAt(-1);
auto func = PRAGMA_THREADS_FOR{
for (uint32_t total = start; total < stop; total++) {
for (auto total = start; total < stop; total++) {
uint32_t i = total / zDim1;
uint32_t j = total % zDim1;

View File

@ -184,7 +184,7 @@ namespace functions {
const auto oX = x[i];
PRAGMA_OMP_SIMD
for (unsigned int f = 0; f < loopLength; f++)
for (Nd4jLong f = 0; f < loopLength; f++)
oZ[f] = OpType::op(oX, oY[f]);
}
} else if(kindOfLoop == nd4j::LoopKind::BROADCAST_SCALAR_Y){
@ -198,7 +198,7 @@ namespace functions {
const auto oY = y[i];
PRAGMA_OMP_SIMD
for (unsigned int f = 0; f < loopLength; f++)
for (Nd4jLong f = 0; f < loopLength; f++)
oZ[f] = OpType::op(oX[f], oY);
}
}
@ -213,14 +213,14 @@ namespace functions {
Nd4jLong yStrides[3] = { 0,0,0 };
nd4j::ShapeUtils::copyCertainStridesFromShapeInfo(yShapeInfo, xRank, dimensionLength, dimension, yStrides);
uint32_t nSize1 = shape::sizeAt(zShapeInfo, 1);
uint32_t nSize2 = shape::sizeAt(zShapeInfo, 2);
uint64_t nSize1 = shape::sizeAt(zShapeInfo, 1);
uint64_t nSize2 = shape::sizeAt(zShapeInfo, 2);
for (uint32_t index0 = start; index0 < stop; index0++) {
for (auto index0 = start; index0 < stop; index0++) {
PRAGMA_OMP_SIMD
for (uint32_t index1 = 0; index1 < nSize1; index1++) {
for (uint32_t index2 = 0; index2 < nSize2; index2++) {
for (uint64_t index1 = 0; index1 < nSize1; index1++) {
for (uint64_t index2 = 0; index2 < nSize2; index2++) {
auto rX = x + (xStrides[0] * index0 + xStrides[1] * index1 + xStrides[2] * index2);
auto rY = y + (yStrides[0] * index0 + yStrides[1] * index1 + yStrides[2] * index2);
auto rZ = z + (zStrides[0] * index0 + zStrides[1] * index1 + zStrides[2] * index2);
@ -242,18 +242,18 @@ namespace functions {
Nd4jLong yStrides[4] = { 0,0,0,0 };
nd4j::ShapeUtils::copyCertainStridesFromShapeInfo(yShapeInfo, xRank, dimensionLength, dimension, yStrides);
uint32_t nSize1 = shape::sizeAt(zShapeInfo, 1);
uint32_t nSize2 = shape::sizeAt(zShapeInfo, 2);
uint32_t nSize3 = shape::sizeAt(zShapeInfo, 3);
uint64_t nSize1 = shape::sizeAt(zShapeInfo, 1);
uint64_t nSize2 = shape::sizeAt(zShapeInfo, 2);
uint64_t nSize3 = shape::sizeAt(zShapeInfo, 3);
for (uint32_t i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
uint32_t index0 = i / nSize1;
uint32_t index1 = i % nSize1;
uint64_t index0 = i / nSize1;
uint64_t index1 = i % nSize1;
PRAGMA_OMP_SIMD
for (uint32_t index2 = 0; index2 < nSize2; index2++) {
for (uint32_t index3 = 0; index3 < nSize3; index3++) {
for (uint64_t index2 = 0; index2 < nSize2; index2++) {
for (uint64_t index3 = 0; index3 < nSize3; index3++) {
auto rX = x + (xStrides[0] * index0 + xStrides[1] * index1 + xStrides[2] * index2 + xStrides[3] * index3);
auto rY = y + (yStrides[0] * index0 + yStrides[1] * index1 + yStrides[2] * index2 + yStrides[3] * index3);
auto rZ = z + (zStrides[0] * index0 + zStrides[1] * index1 + zStrides[2] * index2 + zStrides[3] * index3);
@ -279,7 +279,7 @@ namespace functions {
uint32_t nSize3 = shape::sizeAt(zShapeInfo, 3);
uint32_t nSize4 = shape::sizeAt(zShapeInfo, 4);
for (uint32_t i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
uint32_t index0 = i / nSize1;
uint32_t index1 = i % nSize1;
@ -326,7 +326,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);
oZ[zOffset] = OpType::op(oX[offset], y[offset]);
@ -344,7 +344,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(f, yShapeInfo, yShapeInfoCast, canCastY);
oZ[offset] = OpType::op(oX[offset], y[yOffset]);
@ -362,7 +362,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto xOffset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto offset = shape::indexOffset(f, yShapeInfo, yShapeInfoCast, canCastY);
oZ[offset] = OpType::op(oX[xOffset], y[offset]);
@ -382,7 +382,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto xOffset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(f, yShapeInfo, yShapeInfoCast, canCastY);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);
@ -497,7 +497,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);
oZ[zOffset] = OpType::op(x[offset], oY[offset]);
@ -515,7 +515,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto xOffset = shape::indexOffset(f, yShapeInfo, xShapeInfoCast, canCastX);
oZ[offset] = OpType::op(x[xOffset], oY[offset]);
@ -533,7 +533,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto yOffset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto offset = shape::indexOffset(f, xShapeInfo, xShapeInfoCast, canCastX);
oZ[offset] = OpType::op(x[offset], oY[yOffset]);
@ -553,7 +553,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto xOffset = shape::indexOffset(f, xShapeInfo, xShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);

View File

@ -183,7 +183,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
oZ[offset] = OpType::op(oX[offset], y[offset], extraParams);
}
@ -200,7 +200,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);
oZ[zOffset] = OpType::op(oX[offset], y[offset], extraParams);
@ -218,7 +218,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(f, yShapeInfo, yShapeInfoCast, canCastY);
oZ[offset] = OpType::op(oX[offset], y[yOffset], extraParams);
@ -237,7 +237,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto xOffset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto offset = shape::indexOffset(f, yShapeInfo, yShapeInfoCast, canCastY);
oZ[offset] = OpType::op(oX[xOffset], y[offset], extraParams);
@ -257,7 +257,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto xOffset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(f, yShapeInfo, yShapeInfoCast, canCastY);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);
@ -357,7 +357,7 @@ namespace functions {
auto oZ = z + zTadOffset[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
oZ[offset] = OpType::op(x[offset], oY[offset], extraParams);
}
@ -375,7 +375,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);
oZ[zOffset] = OpType::op(x[offset], oY[offset], extraParams);
@ -394,7 +394,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto xOffset = shape::indexOffset(f, xShapeInfo, xShapeInfoCast, canCastX);
oZ[offset] = OpType::op(x[xOffset], oY[offset], extraParams);
@ -413,7 +413,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto yOffset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto offset = shape::indexOffset(f, xShapeInfo, xShapeInfoCast, canCastX);
oZ[offset] = OpType::op(x[offset], oY[yOffset], extraParams);
@ -434,7 +434,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto xOffset = shape::indexOffset(f, xShapeInfo, xShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);

View File

@ -177,7 +177,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
oZ[offset] = OpType::op(oX[offset], y[offset]);
}
@ -194,7 +194,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);
oZ[zOffset] = OpType::op(oX[offset], y[offset]);
@ -212,7 +212,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(f, yShapeInfo, yShapeInfoCast, canCastY);
oZ[offset] = OpType::op(oX[offset], y[yOffset]);
@ -230,7 +230,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto xOffset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto offset = shape::indexOffset(f, yShapeInfo, yShapeInfoCast, canCastY);
oZ[offset] = OpType::op(oX[xOffset], y[offset]);
@ -250,7 +250,7 @@ namespace functions {
auto oX = x + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (unsigned int f = 0; f < tadLength; f++) {
auto xOffset = shape::indexOffset(f, xTadShapeShapeInfo, tadShapeShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(f, yShapeInfo, yShapeInfoCast, canCastY);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);
@ -347,7 +347,7 @@ namespace functions {
auto oZ = z + zTadOffset[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (uint f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
oZ[offset] = OpType::op(x[offset], oY[offset]);
}
@ -364,7 +364,7 @@ namespace functions {
auto oZ = z + zTadOffset[i];
auto oY = y + tadOffsets[i];
for (int f = 0; f < tadLength; f++) {
for (uint f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);
oZ[zOffset] = OpType::op(x[offset], oY[offset]);
@ -382,7 +382,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (uint f = 0; f < tadLength; f++) {
auto offset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto xOffset = shape::indexOffset(f, xShapeInfo, xShapeInfoCast, canCastX);
oZ[offset] = OpType::op(x[xOffset], oY[offset]);
@ -400,7 +400,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (uint f = 0; f < tadLength; f++) {
auto yOffset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto offset = shape::indexOffset(f, xShapeInfo, xShapeInfoCast, canCastX);
oZ[offset] = OpType::op(x[offset], oY[yOffset]);
@ -420,7 +420,7 @@ namespace functions {
auto oY = y + tadOffsets[i];
PRAGMA_OMP_SIMD
for (int f = 0; f < tadLength; f++) {
for (uint f = 0; f < tadLength; f++) {
auto xOffset = shape::indexOffset(f, xShapeInfo, xShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(f, yTadShapeShapeInfo, tadShapeShapeInfoCast, canCastY);
auto zOffset = shape::indexOffset(f, zTadShapeInfo, tadShapeInfoZCast, canCastZ);

View File

@ -124,7 +124,7 @@ void IndexReduce<X, Z>::exec(void *vx, Nd4jLong *xShapeInfo,
return;
const auto indexValue = OpType::startingIndexValue(x);
for (uint i = 0; i < zLen; i++)
for (Nd4jLong i = 0; i < zLen; i++)
z[i] = (Z) indexValue.index;
return;

View File

@ -93,7 +93,7 @@ namespace functions {
auto func = PRAGMA_THREADS_FOR {
PRAGMA_OMP_SIMD
for (uint64_t i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
auto offset = shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX);
auto zOffset = shape::indexOffset(i, zShapeInfo, zShapeInfoCast, canCastZ);
z[zOffset] = OpClass::op(x[offset], y[offset], i, length, rng, extraArguments);
@ -111,7 +111,7 @@ namespace functions {
auto func = PRAGMA_THREADS_FOR {
PRAGMA_OMP_SIMD
for (uint64_t i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
auto offset = shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(i, yShapeInfo, yShapeInfoCast, canCastY);
z[offset] = OpClass::op(x[offset], y[yOffset], i, length, rng, extraArguments);
@ -129,7 +129,7 @@ namespace functions {
auto func = PRAGMA_THREADS_FOR {
PRAGMA_OMP_SIMD
for (uint64_t i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
auto xOffset = shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX);
auto offset = shape::indexOffset(i, yShapeInfo, yShapeInfoCast, canCastY);
z[offset] = OpClass::op(x[xOffset], y[offset], i, length, rng, extraArguments);
@ -149,7 +149,7 @@ namespace functions {
auto func = PRAGMA_THREADS_FOR {
PRAGMA_OMP_SIMD
for (uint64_t i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
auto xOffset = shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX);
auto yOffset = shape::indexOffset(i, yShapeInfo, yShapeInfoCast, canCastY);
auto zOffset = shape::indexOffset(i, zShapeInfo, zShapeInfoCast, canCastZ);
@ -197,7 +197,7 @@ namespace functions {
else{
auto func = PRAGMA_THREADS_FOR {
PRAGMA_OMP_SIMD
for (uint64_t i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
auto offset = shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX);
z[offset] = OpClass::op(x[offset], i, length, rng, extraArguments);
}
@ -213,7 +213,7 @@ namespace functions {
auto func = PRAGMA_THREADS_FOR {
PRAGMA_OMP_SIMD
for (uint64_t i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
auto xOffset = shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX);
auto zOffset = shape::indexOffset(i, zShapeInfo, zShapeInfoCast, canCastZ);
z[zOffset] = OpClass::op(x[xOffset], i, length, rng, extraArguments);
@ -255,7 +255,7 @@ namespace functions {
auto func = PRAGMA_THREADS_FOR {
PRAGMA_OMP_SIMD
for (uint64_t i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
auto offset = shape::indexOffset(i, zShapeInfo, zShapeInfoCast, canCastZ);
z[offset] = OpClass::op(i, length, rng, extraArguments);
}

View File

@ -55,7 +55,7 @@ namespace functions {
return;
const auto startingVal = OpType::startingValue(x);
for (uint i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
z[i] = startingVal;
return;
}
@ -68,7 +68,7 @@ namespace functions {
uint xShapeInfoCast[MAX_RANK];
const bool canCastX = nd4j::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast);
for (auto i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
startingValue = OpType::update(startingValue, OpType::op(x[shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX)], extraParams), extraParams);
z[0] = OpType::postProcess(startingValue, length, extraParams);
@ -94,7 +94,7 @@ namespace functions {
uint xShapeInfoCast[MAX_RANK];
bool canCastX = nd4j::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast);
for (auto i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
startingValue = OpType::update(startingValue, OpType::op(x[shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX)], extraParams), extraParams);
return OpType::postProcess(startingValue, length, extraParams);
@ -156,7 +156,7 @@ namespace functions {
return;
const auto startingVal = OpType::startingValue(x);
for (uint i = 0; i < resultLength; i++)
for (Nd4jLong i = 0; i < resultLength; i++)
z[i] = startingVal;
return;
}

View File

@ -59,7 +59,7 @@ namespace functions {
return;
const auto startingVal = OpType::startingValue(x);
for (uint i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
z[i] = startingVal;
return;
@ -113,7 +113,7 @@ namespace functions {
uint xShapeInfoCast[MAX_RANK];
bool canCastX = nd4j::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast);
for (auto i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
startingValue = OpType::update(startingValue, OpType::op(x[shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX)], extraParams), extraParams);
return OpType::postProcess(startingValue, length, extraParams);
@ -184,7 +184,7 @@ namespace functions {
return;
const auto startingVal = std::is_same<OpType, simdOps::Mean<X,Z>>::value ? nd4j::DataTypeUtils::nanOrZero<Z>() : static_cast<Z>(OpType::startingValue(x));
for (uint i = 0; i < resultLength; i++)
for (Nd4jLong i = 0; i < resultLength; i++)
z[i] = startingVal;
return;
}

View File

@ -55,7 +55,7 @@ namespace functions {
return;
const auto startingVal = OpType::startingValue(x);
for (uint i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
z[i] = startingVal;
return;
}
@ -110,7 +110,7 @@ namespace functions {
uint xShapeInfoCast[MAX_RANK];
bool canCastX = nd4j::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast);
for (auto i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
startingValue = OpType::update(startingValue, OpType::op(x[shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX)], extraParams), extraParams);
return OpType::postProcess(startingValue, length, extraParams);
@ -173,7 +173,7 @@ namespace functions {
return;
const auto startingVal = OpType::startingValue(x);
for (uint i = 0; i < resultLength; i++)
for (Nd4jLong i = 0; i < resultLength; i++)
z[i] = startingVal;
return;
}

View File

@ -57,7 +57,7 @@ namespace functions {
return;
const auto startingVal = OpType::startingValue(x);
for (uint i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
z[i] = startingVal;
return;
}
@ -111,7 +111,7 @@ namespace functions {
uint xShapeInfoCast[MAX_RANK];
bool canCastX = nd4j::DataTypeUtils::castShapeInfo(xShapeInfo, xShapeInfoCast);
for (auto i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
startingValue = OpType::update(startingValue, OpType::op(x[shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCastX)], extraParams), extraParams);
return OpType::postProcess(startingValue, length, extraParams);
@ -182,7 +182,7 @@ namespace functions {
return;
const auto startingVal = OpType::startingValue(x);
for (uint i = 0; i < zLength; i++)
for (Nd4jLong i = 0; i < zLength; i++)
z[i] = startingVal;
return;
}

View File

@ -53,7 +53,7 @@ void Reduce3<X,Z>::execScalar(void *vx, Nd4jLong *xShapeInfo,
return;
const auto startingVal = OpType::startingValue(x);
for (uint i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
z[i] = startingVal;
return;

View File

@ -73,7 +73,7 @@ void ScalarTransform<X, Y, Z>::transform(void *vx, Nd4jLong *xShapeInfo,
auto oX = x + xTadOffsets[r];
PRAGMA_OMP_SIMD
for (unsigned int f = 0; f < tadLength; f++)
for (int f = 0; f < tadLength; f++)
oZ[f] = OpType::op(oX[f], scalars[r], extraParams);
};
}
@ -83,7 +83,7 @@ void ScalarTransform<X, Y, Z>::transform(void *vx, Nd4jLong *xShapeInfo,
auto oX = x + xTadOffsets[r];
PRAGMA_OMP_SIMD
for (unsigned int f = 0; f < tadLength; f++)
for (int f = 0; f < tadLength; f++)
oZ[f * zTadEws] = OpType::op(oX[f * xTadEws], scalars[r], extraParams);
};
}

View File

@ -74,7 +74,7 @@ namespace functions {
auto oX = x + xTadOffsets[r];
PRAGMA_OMP_SIMD
for (unsigned int f = 0; f < tadLength; f++)
for (int f = 0; f < tadLength; f++)
oZ[f] = OpType::op(oX[f], scalars[r], extraParams);
};
}
@ -84,7 +84,7 @@ namespace functions {
auto oX = x + xTadOffsets[r];
PRAGMA_OMP_SIMD
for (unsigned int f = 0; f < tadLength; f++)
for (int f = 0; f < tadLength; f++)
oZ[f * zTadEws] = OpType::op(oX[f * xTadEws], scalars[r], extraParams);
};
}

View File

@ -74,7 +74,7 @@ namespace functions {
auto oX = x + xTadOffsets[r];
PRAGMA_OMP_SIMD
for (unsigned int f = 0; f < tadLength; f++)
for (int f = 0; f < tadLength; f++)
oZ[f] = OpType::op(oX[f], scalars[r], extraParams);
};
}
@ -84,7 +84,7 @@ namespace functions {
auto oX = x + xTadOffsets[r];
PRAGMA_OMP_SIMD
for (unsigned int f = 0; f < tadLength; f++)
for (int f = 0; f < tadLength; f++)
oZ[f * zTadEws] = OpType::op(oX[f * xTadEws], scalars[r], extraParams);
};
}

View File

@ -91,7 +91,7 @@ namespace functions {
uint xShapeInfoCast[MAX_RANK];
const bool canCast = nd4j::DataTypeUtils::castShapeInfo<uint>(xShapeInfo, xShapeInfoCast);
for (uint64_t i = 0; i < length; i++) {
for (Nd4jLong i = 0; i < length; i++) {
auto xOffset = shape::indexOffset(i, xShapeInfo, xShapeInfoCast, canCast);
SummaryStatsData<X> curr;
@ -116,7 +116,7 @@ namespace functions {
auto x = reinterpret_cast<X *>(vx);
auto z = reinterpret_cast<Z *>(vz);
auto extraParams = reinterpret_cast<Z *>(vextraParams);
int resultLength = shape::length(zShapeInfo);
auto resultLength = shape::length(zShapeInfo);
if(nd4j::ArrayOptions::arrayType(xShapeInfo) == nd4j::ArrayType::EMPTY) {
if(nd4j::ArrayOptions::arrayType(zShapeInfo) == nd4j::ArrayType::EMPTY)
@ -124,7 +124,7 @@ namespace functions {
SummaryStatsData<X> comp;
comp.initWithValue(x[0]);
for (uint i = 0; i < resultLength; i++)
for (Nd4jLong i = 0; i < resultLength; i++)
z[i] = OpType::getValue(biasCorrected, comp);
return;
}
@ -166,14 +166,14 @@ namespace functions {
comp.initWithValue(tx[0]);
if (tadEWS == 1 && tadOrder == 'c') {
for (int i = 1; i < tadLength; i++) {
for (Nd4jLong i = 1; i < tadLength; i++) {
SummaryStatsData <X> indexVal2;
indexVal2.initWithValue(tx[i]);
comp = update(comp, OpType::op(indexVal2, extraParams), extraParams);
}
} else {
for (int i = 1; i < tadLength; i++) {
for (Nd4jLong i = 1; i < tadLength; i++) {
auto xOffset = shape::indexOffset(i, tadShapeShapeInfo, tadShapeShapeInfoCast, canCast);
SummaryStatsData <X> indexVal2;

View File

@ -61,7 +61,7 @@ CUSTOM_OP_IMPL(batchnorm, 3, 1, false, 1, 2) {
else
axes.push_back(inRank-1); // default dimension to reduce along is last dimension
const int numOfAxes = axes.size();
const uint numOfAxes = axes.size();
REQUIRE_TRUE(numOfAxes <= inRank, 0, "BATCHNORM op: too big number of input axes to normalize over, expected number should be less or equal to rank of input array, but got %i and %i correspondingly !", numOfAxes, inRank);
// evaluate expected shape for mean, variance and gamma. These 3 arrays should have identical shapes
@ -83,7 +83,7 @@ CUSTOM_OP_IMPL(batchnorm, 3, 1, false, 1, 2) {
REQUIRE_TRUE(beta->isSameShape(expShape), 0, "BATCHNORM op: wrong shape of beta array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(expShape).c_str(), ShapeUtils::shapeAsString(beta).c_str());
// types of all input arrays should be the same
for(int i = 1; i < block.width(); ++i)
for(unsigned long i = 1; i < block.width(); ++i)
REQUIRE_TRUE(INPUT_VARIABLE(0)->dataType() == INPUT_VARIABLE(i)->dataType(), 0, "BATCHNORM op: types of all input arrays should be the same !");
nd4j_debug("MKL-DNN is not used for batchnorm!\n", 0);
@ -167,7 +167,7 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) {
else
axes.push_back(inRank-1); // default dimension to reduce along is last dimension
const int numOfAxes = axes.size();
const uint numOfAxes = axes.size();
REQUIRE_TRUE(numOfAxes <= inRank, 0, "BATCHNORM_BP op: too big number of input axes to normalize over, expected number should be less or equal to rank of input array, but got %i and %i correspondingly !", numOfAxes, inRank);
// evaluate expected shape for mean, variance and gamma. These 3 arrays should have identical shapes
@ -191,7 +191,7 @@ CUSTOM_OP_IMPL(batchnorm_bp, 4, 3, false, 1, 2) {
REQUIRE_TRUE(input->isSameShape(dLdO), 0, "BATCHNORM_BP op: wrong shape of output gradients array, expected is %s, but got %s instead !", ShapeUtils::shapeAsString(input).c_str(), ShapeUtils::shapeAsString(dLdO).c_str());
// types of all input arrays should be the same (except dLdO)
for(int i = 1; i < block.width() - 2; ++i)
for(unsigned long i = 1; i < block.width() - 2; ++i)
REQUIRE_TRUE(INPUT_VARIABLE(0)->dataType() == INPUT_VARIABLE(i)->dataType(), 0, "BATCHNORM_BP op: types of arrays (input, mean, variance, gamma, beta) should be the same !");
// ***** calculations ***** //

View File

@ -30,7 +30,7 @@ namespace helpers {
int* pRowCounts = reinterpret_cast<int*>(rowCounts.buffer());
int const* pRows = reinterpret_cast<int const*>(rowP->getBuffer());
int const* pCols = reinterpret_cast<int const*>(colP->getBuffer());
for (int n = 0; n < N; n++) {
for (Nd4jLong n = 0; n < N; n++) {
int begin = pRows[n];//->e<int>(n);
int end = pRows[n + 1];//rowP->e<int>(n + 1);
for (int i = begin; i < end; i++) {
@ -72,7 +72,7 @@ namespace helpers {
int const* pRows = reinterpret_cast<int const*>(rowP->getBuffer());
int* symRowP = reinterpret_cast<int*>(outputRows->buffer());
symRowP[0] = 0;
for (int n = 0; n < N; n++)
for (Nd4jLong n = 0; n < N; n++)
symRowP[n + 1] = symRowP[n] + rowCounts->e<int>(n);
// outputRows->printBuffer("output rows");
@ -86,7 +86,7 @@ namespace helpers {
std::vector<int> offset(N);// = NDArrayFactory::create<int>('c', {N});
//PRAGMA_OMP_PARALLEL_FOR_SIMD_ARGS(schedule(guided) shared(offset))
for (int n = 0; n < N; n++) {
for (Nd4jLong n = 0; n < N; n++) {
int begin = pRows[n];
int bound = pRows[n + 1];

View File

@ -146,17 +146,17 @@ void softMaxForVector(nd4j::LaunchContext * context, const NDArray& input, NDArr
auto length = shape::length(inShapeInfo);
if (inEWS == 1) {
for (int i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
max = nd4j::math::nd4j_max<T>(max, inBuff[i]);
PRAGMA_OMP_SIMD_SUM(sum)
for (int i = 0; i < length; i++) {
for (Nd4jLong i = 0; i < length; i++) {
outBuff[i] = nd4j::math::nd4j_exp<T,T>(inBuff[i] - max);
sum += outBuff[i];
}
PRAGMA_OMP_SIMD
for (int i = 0; i < length; i++) {
for (Nd4jLong i = 0; i < length; i++) {
outBuff[i] /= sum;
outBuff[i] = nd4j::math::nd4j_log<T,T>(outBuff[i]);
}
@ -164,17 +164,17 @@ void softMaxForVector(nd4j::LaunchContext * context, const NDArray& input, NDArr
else if (inEWS > 1) {
PRAGMA_OMP_SIMD_MAX(max)
for (int i = 0; i < length; i++)
for (Nd4jLong i = 0; i < length; i++)
max = nd4j::math::nd4j_max<T>(max, inBuff[i * inEWS]);
PRAGMA_OMP_SIMD_SUM(sum)
for (int i = 0; i < length; i++) {
for (Nd4jLong i = 0; i < length; i++) {
outBuff[i * inEWS] = nd4j::math::nd4j_exp<T,T>(inBuff[i * inEWS] - max);
sum += outBuff[i * inEWS];
}
PRAGMA_OMP_SIMD
for (int i = 0; i < length; i++) {
for (Nd4jLong i = 0; i < length; i++) {
outBuff[i * inEWS] /= sum;
outBuff[i * inEWS] = nd4j::math::nd4j_log<T, T>(outBuff[i * inEWS]);
}

View File

@ -443,7 +443,7 @@ namespace nd4j {
const X* bias_new;
X* bias_extra = nullptr;
size_t total_num = 1;
for (size_t i = 0; i < rank; i++) {
for (Nd4jLong i = 0; i < rank; i++) {
total_num *= bases[i];
}
Nd4jLong inc;
@ -574,7 +574,7 @@ namespace nd4j {
for (size_t i = 0; i < 2; i++) {
numNC *= bases[i];
}
for (size_t i = 2; i < rank; i++) {
for (Nd4jLong i = 2; i < rank; i++) {
numHW *= bases[i];
}
Nd4jLong total_num = numNC * numHW;

View File

@ -27,7 +27,7 @@ namespace helpers {
void adjustAxis(Nd4jLong rank, NDArray* axisVector, std::vector<int>& output) {
output.resize(axisVector->lengthOf());
for (int e = 0; e < axisVector->lengthOf(); e++) {
for (Nd4jLong e = 0; e < axisVector->lengthOf(); e++) {
auto ca = axisVector->e<int>(e);
if (ca < 0)
ca += rank;
@ -37,7 +37,7 @@ namespace helpers {
}
void adjustAxis(Nd4jLong rank, std::vector<int> &axisVector) {
for (int e = 0; e < axisVector.size(); e++) {
for (size_t e = 0; e < axisVector.size(); e++) {
auto a = axisVector[e];
if (a < 0)
axisVector[e] = a + rank;

View File

@ -66,7 +66,7 @@ static void batchnorm_(const NDArray* input, const NDArray* mean, const NDArray*
Nd4jLong* zOffsets = xzSameOffset ? xOffsets : new Nd4jLong[steps];
Nd4jLong* auxBuff = new Nd4jLong[2 * input->rankOf()];
for (int j = 0; j < lenSmall; ++j) {
for (Nd4jLong j = 0; j < lenSmall; ++j) {
const bool isOwner = (j < info._numThreads) ? thread_id == j : thread_id == (j % info._numThreads);
@ -96,7 +96,7 @@ static void batchnorm_(const NDArray* input, const NDArray* mean, const NDArray*
shape::outerArrayOffsets(zOffsets, j, output->getShapeInfo(), mean->getShapeInfo(), auxBuff, dimsToExclude.data());
PRAGMA_OMP_SIMD
for (uint i = 0; i < steps; ++i)
for (Nd4jLong i = 0; i < steps; ++i)
z[zOffsets[i]] = (x[xOffsets[i]] - meanVal) * sigmaInvGam + betaVal;
}

View File

@ -65,8 +65,8 @@ void col2im_(nd4j::LaunchContext & context, const NDArray& input, NDArray& outp
T *col, *im;
int imRow, imCol;
for (uint b = start_x; b < stop_x; b += inc_x) {
for (uint c = start_y; c < stop_y; c += inc_y) {
for (auto b = start_x; b < stop_x; b += inc_x) {
for (auto c = start_y; c < stop_y; c += inc_y) {
for (int kRow = 0; kRow < kH; ++kRow) {
for (int kCol = 0; kCol < kW; ++kCol) {
for (int colH = 0; colH < oH; ++colH) {
@ -96,7 +96,7 @@ void col2im_(nd4j::LaunchContext & context, const NDArray& input, NDArray& outp
auto func = PRAGMA_THREADS_FOR {
T *col, *im;
for (uint b = start; b < stop; b++) {
for (auto b = start; b < stop; b++) {
T *im0 = imBuff + b * imStride0;
T *col4 = colBuff + b * colStride0;
for (int colH = 0; colH < oH; ++colH, col4 += colStride4) {

View File

@ -55,8 +55,8 @@ static void dilation2d_(NDArray *input, NDArray *weights, NDArray *output, const
auto func = PRAGMA_THREADS_FOR_2D {
for (uint b = start_x; b < stop_x; b += inc_x) {
for (uint oh = start_y; oh < stop_y; oh += inc_y) {
for (auto b = start_x; b < stop_x; b += inc_x) {
for (auto oh = start_y; oh < stop_y; oh += inc_y) {
for (uint ow = 0; ow < oW; ++ow) {
for (uint c = 0; c < iC; ++c) {
@ -70,7 +70,7 @@ static void dilation2d_(NDArray *input, NDArray *weights, NDArray *output, const
const int iw = ow * sW - pW + kw * dW;
if (iw < 0 || iw >= iW) continue;
uint xCoords[4] = {b, (uint)ih, (uint)iw, c};
uint xCoords[4] = { static_cast<uint>(b), static_cast<uint>(ih), static_cast<uint>(iw), c};
uint yCoords[3] = {kh, kw, c};
const X val = x[shape::getOffset(xShapeInfo, xCoords)] + y[shape::getOffset(yShapeInfo, yCoords)];
@ -79,7 +79,7 @@ static void dilation2d_(NDArray *input, NDArray *weights, NDArray *output, const
}
}
uint zCoords[4] = {b, oh, ow, c};
uint zCoords[4] = { static_cast<uint>(b), static_cast<uint>(oh), ow, c};
z[shape::getOffset(zShapeInfo, zCoords)] = static_cast<Z>(max);
}
}

View File

@ -63,7 +63,7 @@ namespace helpers {
std::vector<Nd4jLong> dims(reduceShape->lengthOf());
bool fit = true;
for( int i = 0; i < dims.size(); i++ ) {
for(auto i = 0; i < dims.size(); i++ ) {
if (fit) {
dims[i] = reduceShape->e<Nd4jLong>(i);
for (int e = 0; e < input->rankOf(); ++e)

View File

@ -53,7 +53,7 @@ namespace nd4j {
outputs[i].second = 0;
//PRAGMA_OMP_PARALLEL_FOR_IF(indices->lengthOf() > Environment::getInstance()->elementwiseThreshold())
for (int e = 0; e < indices->lengthOf(); ++e)
for (Nd4jLong e = 0; e < indices->lengthOf(); ++e)
if ((*indices).e<Nd4jLong>(e) == i)
listOutForCurrent.at(outputs[i].second++)->assign(listOfTensors.at(e));
}
@ -65,7 +65,7 @@ namespace nd4j {
for (auto i = start; i < stop; i++) {
outputs[i].first = outputList[i];
outputs[i].second = 0;
for (int e = 0; e < indices->lengthOf(); ++e)
for (Nd4jLong e = 0; e < indices->lengthOf(); ++e)
if (indices->e<Nd4jLong>(e) == i)
outputs[i].first->p(outputs[i].second++, input->e<T>(e));
}
@ -83,7 +83,7 @@ namespace nd4j {
for (int e = 0; e < numOfData; e++) {
auto data = inputs[e];
auto index = indices[e];
for (int i = 0; i < index->lengthOf(); i++) {
for (Nd4jLong i = 0; i < index->lengthOf(); i++) {
Nd4jLong pos = index->e<Nd4jLong>(i);
if (pos < 0) {
nd4j_printf("dynamic_stitch: Index value should be non-negative. But %i was given", pos);
@ -100,7 +100,7 @@ namespace nd4j {
}
else {
std::vector<int> restDims(output->rankOf() - 1);
for (int i = restDims.size(); i > 0; i--)
for (auto i = restDims.size(); i > 0; i--)
restDims[restDims.size() - i] = output->rankOf() - i;
ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims);
@ -109,12 +109,12 @@ namespace nd4j {
auto data = inputs[e];
auto index = indices[e];
std::vector<int> sourceDims(data->rankOf() - index->rankOf());
for (int i = sourceDims.size(); i > 0; i--)
for (auto i = sourceDims.size(); i > 0; i--)
sourceDims[sourceDims.size() - i] = data->rankOf() - i;
ResultSet listOfTensors = data->allTensorsAlongDimension(sourceDims) ;
for (int i = 0; i < index->lengthOf(); i++) {
for (Nd4jLong i = 0; i < index->lengthOf(); i++) {
auto pos = index->e<Nd4jLong>(i);
if (pos < 0) {
nd4j_printf("dynamic_stitch: Index value should be non-negative. But %i was given", pos);
@ -146,7 +146,7 @@ namespace nd4j {
ResultSet listOfTensors = outputList[0]->allTensorsAlongDimension(sourceDims);
for (unsigned int i = 0; i < inputGradientList.size(); i++) {
for (auto i = 0; i < inputGradientList.size(); i++) {
outputs[i].first = inputGradientList[i];
if (outputs[i].first->rankOf() < 1) continue; // skip empty gradient outs
std::vector<int> outDims(outputs[i].first->rankOf() - 1);
@ -158,7 +158,7 @@ namespace nd4j {
outputs[i].second = 0;
for (int e = 0; e < indices->lengthOf(); ++e)
for (Nd4jLong e = 0; e < indices->lengthOf(); ++e)
if (indices->e<Nd4jLong>(e) == i)
listOfTensors.at(e)->assign(listOutForCurrent.at(outputs[i].second++));
}
@ -171,7 +171,7 @@ namespace nd4j {
for (auto i = start; i < stop; i++) {
outputs[i].first = inputGradientList[i];
outputs[i].second = 0;
for (int e = 0; e < indices->lengthOf(); ++e)
for (Nd4jLong e = 0; e < indices->lengthOf(); ++e)
if (indices->e<Nd4jLong>(e) == i)
output->p<T>(e, outputs[i].first->e<T>(outputs[i].second++));
}

View File

@ -45,7 +45,7 @@ namespace nd4j {
auto xShapeInfo = inputs[e]->shapeInfo();
auto xLength = inputs[e]->lengthOf();
for (uint i = 0; i < xLength; i++)
for (Nd4jLong i = 0; i < xLength; i++)
z[i] = xBuffer[getIndexOffsetOrdered(i, xShapeInfo, order)];
}
}

View File

@ -26,7 +26,7 @@ namespace nd4j {
namespace helpers {
template <typename T>
static void hashCode_(LaunchContext *context, NDArray &array, NDArray &result) {
auto blockSize = 32;
Nd4jLong blockSize = 32;
auto length = array.lengthOf();
int numBlocks = length / blockSize + ((length % blockSize == 0) ? 0 : 1);
auto tempA = NDArrayFactory::create<Nd4jLong>('c', {numBlocks}, context);
@ -42,11 +42,11 @@ namespace nd4j {
// we divide array into 32 element chunks, and store intermediate results once
auto func = PRAGMA_THREADS_FOR {
for (auto b = 0; b < stop; b++) {
for (auto b = start; b < stop; b++) {
auto blockBuffer = buffer + b * numBlocks;
Nd4jLong r = 1;
for (int e = 0; e < blockSize && e + (b * numBlocks) < length; e++) {
for (Nd4jLong e = 0; e < blockSize && e + (b * numBlocks) < length; e++) {
auto v = longBytes<T>(blockBuffer[e]);
r = 31 * r + v;
}
@ -68,7 +68,7 @@ namespace nd4j {
auto blockBuffer = tempBuffer + b * numBlocks;
Nd4jLong r = 1;
for (int e = 0; e < blockSize && e + (b * numBlocks) < lastLength; e++) {
for (Nd4jLong e = 0; e < blockSize && e + (b * numBlocks) < lastLength; e++) {
auto v = longBytes<T>(blockBuffer[e]);
r = 31 * r + v;
}
@ -103,4 +103,3 @@ namespace nd4j {
}
}
}

View File

@ -49,7 +49,7 @@ namespace nd4j {
}
PRAGMA_OMP_SIMD
for (int x = 0; x < numBins; x++) {
for (Nd4jLong x = 0; x < numBins; x++) {
result[x] += bins[x];
}

View File

@ -64,8 +64,8 @@ static void im2col_(nd4j::LaunchContext & context, const NDArray& input, NDArra
if (shape::order(imShapeBuffer) == 'c' && shape::order(colShapeBuffer) == 'c' && shape::strideDescendingCAscendingF(imShapeBuffer) && shape::strideDescendingCAscendingF(colShapeBuffer)) {
auto func = PRAGMA_THREADS_FOR_2D {
for (int b = start_x; b < stop_x; b++) {
for (int c = start_y; c < stop_y; c++) {
for (auto b = start_x; b < stop_x; b++) {
for (auto c = start_y; c < stop_y; c++) {
for (int kRow = 0; kRow < kH; ++kRow) {
for (int kCol = 0; kCol < kW; ++kCol) {
for (int colH = 0; colH < oH; ++colH) {
@ -98,8 +98,8 @@ static void im2col_(nd4j::LaunchContext & context, const NDArray& input, NDArra
T *col, *im;
int imRow, imCol;
for (int b = start_x; b < stop_x; b += inc_x) {
for (int colH = start_y; colH < stop_y; colH += inc_y) {
for (auto b = start_x; b < stop_x; b += inc_x) {
for (auto colH = start_y; colH < stop_y; colH += inc_y) {
for (int colW = 0; colW < oW; ++colW) {
for (int c = 0; c < iC; ++c) {
for (int kRow = 0; kRow < kH; ++kRow) {

View File

@ -219,16 +219,16 @@ namespace helpers {
auto func = PRAGMA_THREADS_FOR {
for (auto batch = start; batch < stop; ++batch) {
auto pInput = pInputBuf + batch * inBatchNumValues;
for (auto y = 0; y < outHeight; ++y) {
for (Nd4jLong y = 0; y < outHeight; ++y) {
auto pOutput = pOutputBuf + (batch * outHeight + y) * outRowSize;
const T* ysInputLowerPtr = pInput + ys[y]._bottomIndex * inRowSize;
const T* ysInputUpperPtr = pInput + ys[y]._topIndex * inRowSize;
double yVal = ys[y]._interpolarValue;
for (auto x = 0; x < outWidth; ++x) {
for (Nd4jLong x = 0; x < outWidth; ++x) {
auto xsBottom = xsPtr[x]._bottomIndex;
auto xsTop = xsPtr[x]._topIndex;
auto xVal = xsPtr[x]._interpolarValue;
for (auto c = 0; c < channels; ++c) {
for (Nd4jLong c = 0; c < channels; ++c) {
double topLeft(ysInputLowerPtr[xsBottom + c]);
double topRight(ysInputLowerPtr[xsTop + c]);
double bottomLeft(ysInputUpperPtr[xsBottom + c]);
@ -310,14 +310,14 @@ namespace helpers {
if (halfPixelCenter) {
inY = nd4j::math::nd4j_max(0LL, inY);
}
for (auto x = 0; x < outWidth; ++x) {
for (Nd4jLong x = 0; x < outWidth; ++x) {
auto posX = alignCorners ? static_cast<Nd4jLong>(nd4j::math::p_round<float>(scaler(x, st.widthScale))) : static_cast<Nd4jLong>(nd4j::math::p_floor<float>(scaler(x, st.widthScale)));
Nd4jLong inX = nd4j::math::nd4j_min(posX,inWidth - 1);
if (halfPixelCenter) {
inX = nd4j::math::nd4j_max(0LL, inX);
}
// copy pixel over all channels
for (auto e = 0; e < channels; e++)
for (Nd4jLong e = 0; e < channels; e++)
output->t<T>(b, y, x, e) = images->t<T>(b, inY, inX, e);
}
}
@ -613,7 +613,7 @@ namespace helpers {
for (auto b = start; b < stop; ++b) {
auto pInput = inputPtr + b * inBatchWidth;
for (auto y = 0; y < outHeight; ++y) {
for (Nd4jLong y = 0; y < outHeight; ++y) {
auto pOutput = &pOutputY[(b * outHeight + y) * outWidth * numChannels];
WeightsAndIndices yWai;
@ -635,7 +635,7 @@ namespace helpers {
F cached_value_0[4] = {0};
F cached_value_1[4] = {0};
F cached_value_2[4] = {0};
for (auto x = 0; x < resizerState.outWidth; ++x) {
for (Nd4jLong x = 0; x < resizerState.outWidth; ++x) {
const WeightsAndIndices &xWai = xWais[x];
// Shift values in cached_value_* to fill first '_advance' values.
switch (xWai._advance) {
@ -712,7 +712,7 @@ namespace helpers {
xWai._weight2, xWai._weight3);
}
} else {
for (auto x = 0; x < resizerState.outWidth; ++x) {
for (Nd4jLong x = 0; x < resizerState.outWidth; ++x) {
const WeightsAndIndices &xWai = xWais[x];
// Shift values in cachedValue to fill first '_advance' values.
switch (xWai._advance) {
@ -828,7 +828,7 @@ namespace helpers {
float sum_0 = 0;
float sum_1 = 0;
float sum_2 = 0;
for (int i = 0; i < yPtrs.size(); ++i) {
for (size_t i = 0; i < yPtrs.size(); ++i) {
const T* ptr = yPtrs[i].yPtr;
float scaleX = xCache.startScale;
Nd4jLong offset = 3 * boundIfNeeded(xCache.start, st.inWidth);
@ -879,7 +879,7 @@ namespace helpers {
const auto numChannels = st.channels;
for (Nd4jLong c = 0; c < numChannels; ++c) {
float sum = 0;
for (int i = 0; i < yPtrs.size(); ++i) {
for (size_t i = 0; i < yPtrs.size(); ++i) {
T const* ptr = yPtrs[i].yPtr;
float scaleX = xCache.startScale;
float sumY = static_cast<float>(ptr[numChannels * boundIfNeeded(xCache.start, st.inWidth) + c]) * scaleX;

View File

@ -62,7 +62,7 @@ static int lrnFunctor_(nd4j::graph::Context& block, NDArray* input, NDArray* out
if(inTadEws == 1 && outTadEws == 1) {
auto func = PRAGMA_THREADS_FOR {
for (uint i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
const T *x = inBuff + inTadOffsets[i];
T *y = outBuff + outTadOffsets[i];
@ -70,7 +70,7 @@ static int lrnFunctor_(nd4j::graph::Context& block, NDArray* input, NDArray* out
// calculate squared sum of elements per each j-th element range [j - depth, j + depth + 1]
// we store each squared sum in corresponding element of y array
for (uint j = 0; j < tadLen; ++j) {
for (Nd4jLong j = 0; j < tadLen; ++j) {
const uint begin = nd4j::math::nd4j_max<int>(0, j - depth);
const uint last = depth + j + 1;
const uint end = nd4j::math::nd4j_min<int>(last, tadLen);
@ -100,7 +100,7 @@ static int lrnFunctor_(nd4j::graph::Context& block, NDArray* input, NDArray* out
}
else {
auto func = PRAGMA_THREADS_FOR {
for (uint i = 0; i < numOfTads; ++i) {
for (Nd4jLong i = 0; i < numOfTads; ++i) {
const T *x = inBuff + inTadOffsets[i];
T *y = outBuff + outTadOffsets[i];
@ -108,7 +108,7 @@ static int lrnFunctor_(nd4j::graph::Context& block, NDArray* input, NDArray* out
// calculate squared sum of elements per each j-th element range [j - depth, j + depth + 1]
// we store each squared sum in corresponding element of y array
for (uint j = 0; j < tadLen; ++j) {
for (Nd4jLong j = 0; j < tadLen; ++j) {
const uint begin = nd4j::math::nd4j_max<int>(0, j - depth);
const uint last = depth + j + 1;
const uint end = nd4j::math::nd4j_min<int>(last, tadLen);
@ -179,13 +179,13 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c
if(inTadEws == 1 && gradITadEws == 1) {
auto func = PRAGMA_THREADS_FOR {
for (uint i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
const X *x = inBuff + inTadOffsets[i];
Y *y = gradIBuff + gradITadOffsets[i];
// this loop calculates squared sum of elements per each j-th element range [j - depth, j + depth + 1]
// we store each squared sum in corresponding element of y array
for (uint j = 0; j < tadLen; ++j) {
for (Nd4jLong j = 0; j < tadLen; ++j) {
const uint begin = nd4j::math::nd4j_max<int>(0, j - depth);
const uint last = depth + j + 1;
const uint end = nd4j::math::nd4j_min<int>(last, tadLen);
@ -208,7 +208,7 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c
Y prev = 0;
// second loop calculates derivatives using information gained in first loop above
for (uint j = 0; j < tadLen; ++j) {
for (Nd4jLong j = 0; j < tadLen; ++j) {
const uint begin = nd4j::math::nd4j_max<int>(0, j - depth);
const uint last = depth + j + 1;
const uint end = nd4j::math::nd4j_min<int>(last, tadLen);
@ -247,13 +247,13 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c
else {
auto func = PRAGMA_THREADS_FOR {
for (uint i = start; i < stop; i++) {
for (auto i = start; i < stop; i++) {
const X *x = inBuff + inTadOffsets[i];
Y *y = gradIBuff + gradITadOffsets[i];
// this loop calculates squared sum of elements per each j-th element range [j - depth, j + depth + 1]
// we store each squared sum in corresponding element of y array
for (uint j = 0; j < tadLen; ++j) {
for (Nd4jLong j = 0; j < tadLen; ++j) {
const uint begin = nd4j::math::nd4j_max<int>(0, j - depth);
const uint last = depth + j + 1;
const uint end = nd4j::math::nd4j_min<int>(last, tadLen);
@ -280,7 +280,7 @@ static void lrnBP_(const NDArray& input, const NDArray& gradO, NDArray& gradI, c
Y prev = 0;
// second loop calculates derivatives using information gained in first loop above
for (uint j = 0; j < tadLen; ++j) {
for (Nd4jLong j = 0; j < tadLen; ++j) {
const uint begin = nd4j::math::nd4j_max<int>(0, j - depth);
const uint last = depth + j + 1;
const uint end = nd4j::math::nd4j_min<int>(last, tadLen);

View File

@ -124,7 +124,7 @@ static void fusedTanh(NDArray *z, NDArray *i, NDArray *c, const NDArray *cLast,
auto h_ = h->bufferAsT<T>();
auto func = PRAGMA_THREADS_FOR {
for (uint e = start; e < stop; e++) {
for (auto e = start; e < stop; e++) {
c_[e] = z_[e] * i_[e] + (f_[e] * cLast_[e]);
h_[e] = nd4j::math::nd4j_tanh<T, T>(c_[e]);
}

View File

@ -32,7 +32,7 @@ namespace helpers {
Nd4jLong preLastDim = input->rankOf() - 2;
ResultSet listOut = output->allTensorsAlongDimension({(int)preLastDim, (int)lastDim});
ResultSet listDiag = input->allTensorsAlongDimension({(int)preLastDim, (int)lastDim});
for (Nd4jLong e = 0; e < listOut.size(); ++e) {
for (Nd4jLong e = 0; e < static_cast<Nd4jLong>(listOut.size()); ++e) {
NDArray* inputMatrix = listDiag.at(e);
NDArray* outputMatrix = listOut.at(e);
if (outputMatrix != inputMatrix) // if not inplace

View File

@ -68,7 +68,7 @@ namespace nd4j {
if (shape::elementWiseStride(xShapeInfo) == 1 && shape::elementWiseStride(zShapeInfo) == 1 &&
shape::order(xShapeInfo) == 'c' && shape::order(zShapeInfo) == 'c') {
for (int e = 0; e < length; e++) {
for (Nd4jLong e = 0; e < length; e++) {
sum = op == scalar::Add ? simdOps::Add<T, T, T>::op(sum, x[e]) : simdOps::Multiply<T, T, T>::op(sum, x[e]);
if (!exclusive)
@ -81,7 +81,7 @@ namespace nd4j {
}
else {
for (int e = 0; e < length; e++) {
for (Nd4jLong e = 0; e < length; e++) {
auto xOffset = shape::getIndexOffset(e, xShapeInfo);
auto zOffset = shape::getIndexOffset(e, zShapeInfo);

View File

@ -43,8 +43,8 @@ namespace helpers {
T const* vBuf = v.getDataBuffer()->primaryAsT<T>();
T* resBuf = res.dataBuffer()->primaryAsT<T>();
auto interloop = PRAGMA_THREADS_FOR_2D {
for (int i = start_x; i < n; i += inc_x)
for (int j = start_y; j < n; j += inc_y)
for (auto i = start_x; i < n; i += inc_x)
for (auto j = start_y; j < n; j += inc_y)
resBuf[i * n + j] = -2 * vBuf[i] * vBuf[j] + (i == j ? T(1) : T(0));
};
@ -63,7 +63,7 @@ namespace helpers {
NDArray z = *matrix;
NDArray e('c', {M}, DataTypeUtils::fromT<T>()); // two internal buffers and scalar for squared norm
for (auto k = 0; k < N && k < M - 1; k++) { // loop for columns, but not further then row number
for (Nd4jLong k = 0; k < N && k < M - 1; k++) { // loop for columns, but not further then row number
e.nullify();
z = matrixMinor<T>(z, k); // minor computing for current column with given matrix z (initally is a input matrix)
// z.printIndexedBuffer("Minor!!!");
@ -87,7 +87,7 @@ namespace helpers {
}
resQ.assign(q[0]); //
// MmulHelper::matmul(&q[0], matrix, &resR, false, false);
for (int i = 1; i < N && i < M - 1; i++) {
for (Nd4jLong i = 1; i < N && i < M - 1; i++) {
auto tempResQ = resQ;
MmulHelper::matmul(&q[i], &resQ, &tempResQ, false, false); // use mmulMxM?
resQ = std::move(tempResQ);

View File

@ -57,10 +57,10 @@ namespace helpers {
T* outputBuf = output->dataBuffer()->primaryAsT<T>();
PRAGMA_OMP_PARALLEL_FOR
for (auto k = 0; k < shift; k++) {
for (Nd4jLong k = 0; k < shift; k++) {
auto pos = k * step;
auto u = rng.relativeT<T>(k, 0., 1.);
for (auto e = 0; e < step; e++)
for (Nd4jLong e = 0; e < step; e++)
if (directOutput) {
outputBuf[pos + e] = math::nd4j_igamma<T, T, T>(copyAlpha->t<T>(e),
beta != nullptr ? copyBeta->t<T>(e) * u : u);
@ -104,10 +104,10 @@ namespace helpers {
bool directLa = lambda->ews() == 1 && lambda->ordering() == 'c';
bool directOut = output->ews() == 1 && output->ordering() == 'c';
PRAGMA_OMP_PARALLEL_FOR
for (auto k = 0; k < shift; k++) {
for (Nd4jLong k = 0; k < shift; k++) {
auto pos = k * step;
auto u = rng.relativeT<T>(k, 0., 1.);
for (auto e = 0; e < step; e++) {
for (Nd4jLong e = 0; e < step; e++) {
auto p = math::nd4j_exp<T, T>(-lambda->t<T>(e));
auto s = p;
auto x = T(0.f);
@ -143,7 +143,7 @@ namespace helpers {
RandomLauncher::fillUniform(context, rng, output, minVal, maxVal);
else {
PRAGMA_OMP_PARALLEL_FOR
for (auto i = 0; i < output->lengthOf(); i++) {
for (Nd4jLong i = 0; i < output->lengthOf(); i++) {
output->t<T>(i) = rng.relativeT<T>(i, minVal, maxVal);
}
}
@ -184,7 +184,7 @@ namespace helpers {
auto nSamplesPerBatch = nBatchIndex * numOfClassX * numOfSamples;
auto nClassesPerSample = nSampleIndexInBatch * numOfClassX;
for (auto nClass = 0; nClass < numOfClassX; nClass += 1) {
for (Nd4jLong nClass = 0; nClass < numOfClassX; nClass += 1) {
auto nIndex = nSamplesPerBatch + nClassesPerSample + nClass;
auto unifornLog = nd4j::math::nd4j_log<Tx, Tx>(-nd4j::math::nd4j_log<Tx, Tx>(rng.relativeT<Tx>(nIndex, minVal, maxVal)));
Tx tValue = (xTad[nClass * xDimAstride] - unifornLog);

View File

@ -50,7 +50,7 @@ namespace helpers {
width = lastDim;
}
for (int i = 0; i < input->lengthOf(); i += lastDim) {
for (Nd4jLong i = 0; i < input->lengthOf(); i += lastDim) {
for (Nd4jLong k = startPos; k < width && pos < output->lengthOf(); k++) {
output->p(pos++, input->e<T>(i + k));
}

View File

@ -110,7 +110,7 @@ namespace helpers {
}
else {
std::vector<int> dims(source->rankOf() - axe - 1);
for (int i = 0; i < dims.size(); ++i)
for (size_t i = 0; i < dims.size(); ++i)
dims[i] = axe + 1 + i;
ResultSet listOfTensors = source->allTensorsAlongDimension({dims});

View File

@ -55,9 +55,9 @@ static void batchToSpace_(const NDArray& input, NDArray& output, const uint crop
// loop through output array
auto func = PRAGMA_THREADS_FOR_3D {
for (uint b = start_x; b < stop_x; b += inc_x) {
for (uint h = start_y; h < stop_y; h += inc_y) {
for (uint w = start_z; w < stop_z; w += inc_z) {
for (auto b = start_x; b < stop_x; b += inc_x) {
for (auto h = start_y; h < stop_y; h += inc_y) {
for (auto w = start_z; w < stop_z; w += inc_z) {
for (uint c = 0; c < iC; ++c) {
const Nd4jLong xOffset = b * xShapeInfo[5] + h * xShapeInfo[6] + w * xShapeInfo[7] + c * xShapeInfo[8];
const Nd4jLong zOffset = b * zShapeInfo[5] + (h - cropBottom) * zShapeInfo[6] + (w - cropLeft) * zShapeInfo[7] + c * zShapeInfo[8];
@ -146,11 +146,11 @@ void batchToSpaceND(nd4j::LaunchContext* context, const NDArray& input, const ND
std::vector<Nd4jLong> temp(numOfSpatialDims + rank);
int i;
uint i;
for(i = 0; i < numOfSpatialDims; ++i)
temp[i] = blockShape.e<Nd4jLong>(i);
temp[i++] = output.sizeAt(0);
for(int j = 1; j < rank; ++i, ++j)
for(uint j = 1; j < rank; ++i, ++j)
temp[i] = input.sizeAt(j);
NDArray inputRearranged0 = input.reshape(input.ordering(), temp);
@ -163,7 +163,7 @@ void batchToSpaceND(nd4j::LaunchContext* context, const NDArray& input, const ND
temp[2*i - 1] = numOfSpatialDims + i;
temp[2*i] = i - 1;
}
for(i = 2 * numOfSpatialDims + 1; i < temp.size(); ++i)
for(i = 2 * numOfSpatialDims + 1; i < static_cast<uint>(temp.size()); ++i)
temp[i] = i;
inputRearranged0.permutei(temp);
@ -216,8 +216,8 @@ static void spaceToBatch_(const NDArray& input, NDArray& output, const uint padB
// loop through output array
auto func = PRAGMA_THREADS_FOR_2D {
for (uint b = start_x; b < stop_x; b += inc_x) {
for (uint h = start_y; h < stop_y; h += inc_y) {
for (auto b = start_x; b < stop_x; b += inc_x) {
for (auto h = start_y; h < stop_y; h += inc_y) {
for (uint w = 0; w < oW; ++w) {
for (uint c = 0; c < iC; ++c) {

View File

@ -87,7 +87,7 @@ namespace helpers {
if (input->isVector()) {
T val = input->e<T>(0);
for (int e = 1; e < indices->lengthOf(); e++) {
for (Nd4jLong e = 1; e < indices->lengthOf(); e++) {
if (idx == indices->e<Nd4jLong>(e)) {
// min
val = nd4j::math::nd4j_min<T>(val, input->t<T>(e));
@ -115,7 +115,7 @@ namespace helpers {
for (Nd4jLong i = 1; i < indices->lengthOf(); i++) {
if (indices->e<Nd4jLong>(i) == idx) {
for (int e = 0; e < minT->lengthOf(); e++) {
for (Nd4jLong e = 0; e < minT->lengthOf(); e++) {
minT->p(e, nd4j::math::nd4j_min(minT->e<T>(e), listOfTensors.at(i)->e<T>(e)));
}
}
@ -138,7 +138,7 @@ namespace helpers {
T val = T(0.f);
int count = 0;
for (int e = 0; e < indices->lengthOf(); e++) {
for (Nd4jLong e = 0; e < indices->lengthOf(); e++) {
if (idx == indices->e<int>(e)) {
// mean
val += input->e<T>(e);
@ -166,7 +166,7 @@ namespace helpers {
auto meanV = meanT->dup();
meanV.assign(listOfTensors.at(0));
for (int i = 1; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 1; i < indices->lengthOf(); i++) {
if (indices->e<int>(i) == idx) {
auto func = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e++) {
@ -198,7 +198,7 @@ namespace helpers {
if (input->isVector()) {
T val = T(0.f);
int count = 0;
for (int e = 0; e < indices->lengthOf(); e++) {
for (Nd4jLong e = 0; e < indices->lengthOf(); e++) {
if (idx == indices->e<int>(e)) {
// sum
val += input->t<T>(e);
@ -220,7 +220,7 @@ namespace helpers {
std::vector<std::pair<NDArray*, int>> outputs(numOfClasses);
auto sumT = listOfOutTensors.at(idx);
for (int i = 0; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 0; i < indices->lengthOf(); i++) {
if (indices->e<int>(i) == idx) {
auto func = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e++) {
@ -248,7 +248,7 @@ namespace helpers {
T val = input->e<T>(0);
int count = 0;
for (int e = 1; e < indices->lengthOf(); e++) {
for (Nd4jLong e = 1; e < indices->lengthOf(); e++) {
if (idx == indices->e<int>(e)) {
// sum
val *= input->e<T>(e);
@ -269,7 +269,7 @@ namespace helpers {
int numOfClasses = output->sizeAt(0); // number of classes
auto sumT = listOfOutTensors.at(idx);
sumT->assign(listOfTensors.at(0));
for (int i = 1; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 1; i < indices->lengthOf(); i++) {
if (indices->e<int>(i) == idx) {
auto func = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e++) {
@ -313,7 +313,7 @@ namespace helpers {
bool segmentIndicesValidate(nd4j::LaunchContext * context, NDArray* indices, NDArray& expected, NDArray& output) {
auto val = indices->e(0);
for (int e = 1; e < indices->lengthOf(); e++) {
for (Nd4jLong e = 1; e < indices->lengthOf(); e++) {
output = indices->e(e);
if (val.e<Nd4jLong>(0) > output.e<Nd4jLong>(0))
return false;
@ -362,7 +362,7 @@ namespace helpers {
for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) {
T val = input->e<T>(fi->second.at(0));
for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) {
for (Nd4jLong idx = 1; idx < static_cast<Nd4jLong>(fi->second.size()); ++idx) {
val = nd4j::math::nd4j_max(val, input->e<T>(fi->second.at(idx)));
}
output->p(fi->first, val);
@ -380,7 +380,7 @@ namespace helpers {
for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) {
auto outputT = listOfOutTensors.at(fi->first);
outputT->assign(listOfTensors.at(fi->second.at(0)));
for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) {
for (Nd4jLong idx = 1; idx < static_cast<Nd4jLong>(fi->second.size()); ++idx) {
auto maxT = listOfTensors.at(fi->second.at(idx));
for (Nd4jLong e = 0; e < outputT->lengthOf(); ++e) {
T val = nd4j::math::nd4j_max(maxT->e<T>(e), outputT->e<T>(e));
@ -432,7 +432,7 @@ namespace helpers {
for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) {
auto outputT = listOfOutTensors.at(fi->first);
outputT->assign(listOfTensors.at(fi->second.at(0)));
for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) {
for (size_t idx = 1; idx < fi->second.size(); ++idx) {
auto minT = listOfTensors.at(fi->second.at(idx));
for (Nd4jLong e = 0; e < outputT->lengthOf(); ++e) {
@ -560,7 +560,7 @@ namespace helpers {
for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) {
auto outputT = listOfOutTensors.at(fi->first);
outputT->assign(listOfTensors.at(fi->second.at(0)));
for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) {
for (size_t idx = 1; idx < fi->second.size(); ++idx) {
auto current = listOfTensors.at(fi->second.at(idx));
*outputT *= *current;
@ -584,7 +584,7 @@ namespace helpers {
if (input->isVector()) { // 1D case
for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) {
double sumValue = input->e<double>(fi->second.at(0));
for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) {
for (size_t idx = 1; idx < fi->second.size(); ++idx) {
sumValue += input->e<double>(fi->second.at(idx));
}
output->p(fi->first, sumValue / nd4j::math::nd4j_sqrt<Nd4jLong, double>(fi->second.size()));
@ -599,7 +599,7 @@ namespace helpers {
for (auto fi = idxs.begin(); fi != idxs.end(); ++fi) {
auto outputT = listOfOutTensors.at(fi->first);
outputT->assign(listOfTensors.at(fi->second.at(0)));
for (Nd4jLong idx = 1; idx < fi->second.size(); ++idx) {
for (size_t idx = 1; idx < fi->second.size(); ++idx) {
auto current = listOfTensors.at(fi->second.at(idx));
*outputT += *current;
}
@ -651,7 +651,7 @@ namespace helpers {
auto currentOut = listOfOutTensors.at(i);
auto currentGradOut = listOfGradOuts.at(classNum);
for (uint64_t e = 0; e < current->lengthOf(); e++) {
for (Nd4jLong e = 0; e < current->lengthOf(); e++) {
if (nd4j::math::nd4j_abs(listOfBPTensors.at(classNum)->e<T>(e) - current->e<T>(e)) <= T(1.e-6))
currentOut->p(e, currentGradOut->e<T>(e));
}
@ -703,7 +703,7 @@ namespace helpers {
auto currentOut = listOfOutTensors.at(i);
auto currentGradOut = listOfGradOuts.at(classNum);
for (int e = 0; e < current->lengthOf(); e++) {
for (Nd4jLong e = 0; e < current->lengthOf(); e++) {
if (nd4j::math::nd4j_abs(listOfBPTensors.at(classNum)->e<double>(e) - current->e<double>(e)) <
1.e-5)
currentOut->p(e, currentGradOut->e<double>(e));
@ -746,13 +746,13 @@ namespace helpers {
int pos = 0;
//auto func = [&](uint64_t thread_id, uint64_t start, uint64_t stop, uint64_t increment) -> void {
for (auto i = 0; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 0; i < indices->lengthOf(); i++) {
auto classNum = indices->e<Nd4jLong>(i);
auto current = listOfTensors.at(i);
auto currentOut = listOfOutTensors.at(i);
auto currentGradOut = listOfGradOuts.at(classNum);
for (int e = 0; e < current->lengthOf(); e++) {
for (Nd4jLong e = 0; e < current->lengthOf(); e++) {
currentOut->p(e, currentGradOut->e<double>(e) / classCount.at(classNum));
}
}
@ -781,7 +781,7 @@ namespace helpers {
ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims);
//auto func = PRAGMA_THREADS_FOR {
for (auto i = 0; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 0; i < indices->lengthOf(); i++) {
auto classNum = indices->e<Nd4jLong>(i);
auto current = listOfTensors.at(i);
auto currentOut = listOfOutTensors.at(i);
@ -817,7 +817,7 @@ namespace helpers {
//std::vector<std::pair<NDArray*, int>> outputs(numOfClasses);
//auto func = PRAGMA_THREADS_FOR {
for (auto i = 0; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 0; i < indices->lengthOf(); i++) {
auto classNum = indices->e<Nd4jLong>(i);
auto current = listOfTensors.at(i);
auto currentOut = listOfOutTensors.at(i);
@ -860,7 +860,7 @@ namespace helpers {
ResultSet listOfTensors = input->allTensorsAlongDimension(restDims);
ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims);
for (int i = 0; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 0; i < indices->lengthOf(); i++) {
Nd4jLong classNum = indices->e<Nd4jLong>(i);
NDArray* current = listOfTensors.at(i);
NDArray* currentOut = listOfOutTensors.at(i);
@ -905,13 +905,13 @@ namespace helpers {
ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims);
//auto func = PRAGMA_THREADS_FOR {
for (auto i = 0; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 0; i < indices->lengthOf(); i++) {
auto classNum = indices->e<Nd4jLong>(i);
auto current = listOfTensors.at(i);
auto currentOut = listOfOutTensors.at(i);
auto currentGradOut = listOfGradOuts.at(classNum);
for (int e = 0; e < current->lengthOf(); e++) {
for (Nd4jLong e = 0; e < current->lengthOf(); e++) {
if (nd4j::math::nd4j_abs(listOfBPTensors.at(classNum)->t<T>(e) - current->t<T>(e)) < 1.e-6)
currentOut->t<T>(e) = currentGradOut->t<T>(e);
}
@ -955,7 +955,7 @@ namespace helpers {
ResultSet listOfTensors = input->allTensorsAlongDimension(restDims);
ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims);
for (int i = 0; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 0; i < indices->lengthOf(); i++) {
Nd4jLong classNum = indices->e<Nd4jLong>(i);
NDArray* current = listOfTensors.at(i);
NDArray* currentOut = listOfOutTensors.at(i);
@ -984,7 +984,7 @@ namespace helpers {
ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims);
//auto func = PRAGMA_THREADS_FOR {
for (auto i = 0; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 0; i < indices->lengthOf(); i++) {
auto classNum = indices->e<Nd4jLong>(i);
auto currentOut = listOfOutTensors.at(i);
auto currentGradOut = listOfGradOuts.at(classNum);
@ -1021,7 +1021,7 @@ namespace helpers {
ResultSet listOfOutTensors = output->allTensorsAlongDimension(restDims);
//auto func = PRAGMA_THREADS_FOR {
for (auto i = 0; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 0; i < indices->lengthOf(); i++) {
auto classNum = indices->e<Nd4jLong>(i);
auto current = listOfTensors.at(i);
auto currentOut = listOfOutTensors.at(i);
@ -1053,7 +1053,7 @@ namespace helpers {
// if input is a vector: (as if in doc sample)
if (input->isVector()) {
//auto func = PRAGMA_THREADS_FOR {
for (auto e = 0; e < indices->lengthOf(); e++) {
for (Nd4jLong e = 0; e < indices->lengthOf(); e++) {
auto classNum = indices->e<Nd4jLong>(e);
output->p(e, gradOut->e<double>(classNum) / nd4j::math::nd4j_sqrt<double, double>(classCount[classNum]));
}
@ -1069,7 +1069,7 @@ namespace helpers {
ResultSet listOfOutTensors =output->allTensorsAlongDimension(restDims);
//auto func = PRAGMA_THREADS_FOR {
for (auto i = 0; i < indices->lengthOf(); i++) {
for (Nd4jLong i = 0; i < indices->lengthOf(); i++) {
auto classNum = indices->e<Nd4jLong>(i);
auto current = listOfTensors.at(i);
auto currentOut = listOfOutTensors.at(i);

View File

@ -378,7 +378,7 @@ namespace nd4j {
int irow = 0;
auto cShift = t * idxShift;
for (int e = 0; e < hsRounds; e++) {
for (Nd4jLong e = 0; e < hsRounds; e++) {
irow = bIndices[e + cShift];
if (irow < 0 || irow >= vocabSize)
continue;
@ -457,7 +457,7 @@ namespace nd4j {
T sneu1[600];
T sneu1e[600];
for (int e = start; e < stop; e++) {
for (auto e = start; e < stop; e++) {
T *neu1 = vectorLength <= 600 ? sneu1 : new T[vectorLength];
T *neu1e = vectorLength <= 600 ? sneu1e : new T[vectorLength];
@ -500,7 +500,7 @@ namespace nd4j {
// hierarchic softmax step
if (!indices.isEmpty()) {
for (int i = 0; i < numIndices; i++) {
for (Nd4jLong i = 0; i < numIndices; i++) {
const int cIndex = bIndices[(e * numIndices) + i];
const int cCode = bCodes[(e * numIndices) + i];

View File

@ -41,8 +41,8 @@ namespace helpers {
auto batchLoop = PRAGMA_THREADS_FOR {
for (auto batch = start; batch < stop; batch++) {
for (auto r = 0; r < rows; r++) {
for (auto c = 0; c < r; c++) {
for (Nd4jLong r = 0; r < rows; r++) {
for (Nd4jLong c = 0; c < r; c++) {
math::nd4j_swap(outputPart[batch]->t<T>(r, c) , outputPart[batch]->t<T>(c, r));
}
}
@ -66,7 +66,7 @@ namespace helpers {
auto permutationsPart = permutations.allTensorsAlongDimension({-1});
for (auto batch = 0; batch < permutationsPart.size(); ++batch) {
for (auto row = 0; row < PPart[batch]->rows(); ++row) {
for (Nd4jLong row = 0; row < PPart[batch]->rows(); ++row) {
PPart[batch]->t<T>(row, permutationsPart[batch]->t<int>(row)) = T(1.f);
}
}
@ -77,7 +77,7 @@ namespace helpers {
MmulHelper::matmul(&P, rightInput, &rightPermuted, 0, 0);
ResultSet leftLowerPart = leftLower.allTensorsAlongDimension({-2, -1});
for (auto i = 0; i < leftLowerPart.size(); i++) {
for (auto r = 0; r < leftLowerPart[i]->rows(); r++)
for (Nd4jLong r = 0; r < leftLowerPart[i]->rows(); r++)
leftLowerPart[i]->t<T>(r,r) = (T)1.f;
}
// stage 2: triangularSolveFunctor for Lower with given b

View File

@ -29,7 +29,7 @@ namespace helpers {
//////////////////////////////////////////////////////////////////////////
template <typename T>
static void split_(const NDArray& input, const std::vector<NDArray*>& outArrs, const int axis) {
int numSplits = outArrs.size();
uint numSplits = outArrs.size();
const auto sizeofT = input.sizeOfT();
@ -73,9 +73,9 @@ namespace helpers {
if (luckCase2) {
const uint xDim = input.sizeAt(axis);
const auto xDim = input.sizeAt(axis);
for (uint i = 0; i < input.lengthOf() / xDim; ++i) {
for (Nd4jLong i = 0; i < input.lengthOf() / xDim; ++i) {
T* x = xBuff + xDim * i;

View File

@ -39,7 +39,7 @@ namespace helpers {
// }
// ----------------------------------------------------------------------------------------------- //
std::vector<int> dimsToExclude(input->rankOf() - 1);
for (int d = 0; d < dimsToExclude.size(); ++d)
for (size_t d = 0; d < dimsToExclude.size(); ++d)
dimsToExclude[d] = d;
const Nd4jLong numOfSubArrs = ShapeUtils::getNumOfSubArrs(input->getShapeInfo(), dimsToExclude);
@ -72,7 +72,7 @@ namespace helpers {
NDArray topValues = NDArrayFactory::create<T>('c', {k});
NDArray sortedVals = NDArrayFactory::create<T>('c', {k});
NDArray topIndices = NDArrayFactory::create<Nd4jLong>('c', {k});
for (Nd4jLong pos = 0; pos < k; ++pos) {
for (uint pos = 0; pos < k; ++pos) {
topIndices.t<Nd4jLong>(pos) = pos;
topValues.t<T>(pos) = trial.t<T>(pos);
}
@ -80,7 +80,7 @@ namespace helpers {
sortedVals.assign(topValues);// = NDArrayFactory::create<T>('c', {k});
//std::sort(sortedVals.begin(), sortedVals.end()); // sorted in ascending order
SpecialMethods<T>::sortGeneric(sortedVals.buffer(), sortedVals.shapeInfo(), false);
for (int i = k; i < width; ++i) {
for (Nd4jLong i = static_cast<Nd4jLong>(k); i < width; ++i) {
T val = trial.e<T>(i);
T minTopVal = sortedVals.t<T>(0);
if (minTopVal < val) { // value should be inserted to top k
@ -104,15 +104,15 @@ namespace helpers {
if (needSort) {
SpecialMethods<T>::sortGeneric(topValues.buffer(), topValues.shapeInfo(), true);
for (int j = 0; j < width; j++)
for (int pos = 0; pos < k; ++pos)
for (Nd4jLong j = 0; j < width; j++)
for (uint pos = 0; pos < k; ++pos)
if (topValues.t<T>(pos) == trial.t<T>(j))
topIndices.t<Nd4jLong>(pos) = j;
}
else { // else sort by indices
std::map<Nd4jLong, T> sortValsMap;
//std::vector<std::pair<int, T>> data(topValues.lengthOf());
for (size_t e = 0; e < topValues.lengthOf(); ++e) {
for (Nd4jLong e = 0; e < topValues.lengthOf(); ++e) {
sortValsMap[topIndices.t<Nd4jLong>(e)] = topValues.t<T>(e);
}
@ -152,7 +152,7 @@ namespace helpers {
auto func = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e++) {
bool found = false;
for (int j = 0; j < k; j++) {
for (uint j = 0; j < k; j++) {
if (target->e<Nd4jLong>(e) == indices->e<Nd4jLong>(e * k + j)) {
found = true;
break;

View File

@ -597,7 +597,7 @@ static void gatherND_(NDArray& input, NDArray& indices, NDArray& output) {
zCoordStart[yRank - 1] = coordToRestore;
// construct coordinates for x
for (uint j = 0; j < yLastDim; ++j)
for (int j = 0; j < yLastDim; ++j)
xCoordStart[j] = y[yOffset + j * indices.stridesOf()[yRank - 1]]; // last stride
const auto xOffset = shape::getOffset(input.getShapeInfo(), xCoordStart);
@ -628,7 +628,7 @@ static void gather_(NDArray* input, const NDArray* indices, NDArray* output, con
if (indices != nullptr) {
for(int i = 0; i < indices->lengthOf(); ++i)
for(Nd4jLong i = 0; i < indices->lengthOf(); ++i)
if(indices->e<Nd4jLong>(i) >= input->sizeAt(axis))
throw std::runtime_error("helpers::gather function: indices array contains wrong elements, each element must be smaller than corresponding dimension of input array !");
@ -733,7 +733,7 @@ void scatterUpdate(nd4j::LaunchContext * context, NDArray& input, NDArray& updat
// increasing counter to skip numIndices
e++;
std::vector<int> indices;
for (; e < intArgs->size(); e++)
for (; e < static_cast<Nd4jLong>(intArgs->size()); e++)
indices.push_back((*intArgs)[e]);
auto func = PRAGMA_THREADS_FOR {
@ -813,7 +813,7 @@ static void mergeMaxIndex_(const std::vector<NDArray*>& inArrs, NDArray& output)
T max = -DataTypeUtils::max<T>();
Nd4jLong idx = 0;
for (int i = 0; i < numArgs; i++) {
for (Nd4jLong i = 0; i < numArgs; i++) {
T v = inArrs[i]->e<T>(e);
if (v > max) {
max = v;
@ -841,7 +841,7 @@ static void mergeMax_(const std::vector<NDArray*>& inArrs, NDArray& output) {
auto func = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e++) {
T max = -DataTypeUtils::max<T>();
for (int i = 0; i < numArgs; i++) {
for (Nd4jLong i = 0; i < numArgs; i++) {
T v = inArrs[i]->e<T>(e);
if (v > max)
max = v;
@ -867,7 +867,7 @@ static void mergeAvg_(const std::vector<NDArray*>& inArrs, NDArray& output) {
auto func = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e++) {
T sum = 0.;
for (int i = 0; i < numArgs; i++) {
for (Nd4jLong i = 0; i < numArgs; i++) {
T v = inArrs[i]->e<T>(e);
sum += v;
}
@ -893,7 +893,7 @@ static void mergeAdd_(const std::vector<NDArray*>& inArrs, NDArray& output) {
auto func = PRAGMA_THREADS_FOR {
for (auto e = start; e < stop; e++) {
T sum = (T) 0.f;
for (int i = 0; i < numArgs; i++)
for (Nd4jLong i = 0; i < numArgs; i++)
sum += inArrs[i]->e<T>(e);
output.p(e, sum);
@ -1242,7 +1242,7 @@ static void tileBP_(const NDArray& gradO /*input*/, NDArray& gradI /*output*/, c
memset(gradIBuff, 0, gradILen * sizeof(T));
else {
//PRAGMA_OMP_PARALLEL_FOR_SIMD
for (int i = 0; i < gradILen * gradIEWS; i += gradIEWS)
for (Nd4jLong i = 0; i < gradILen * gradIEWS; i += gradIEWS)
gradIBuff[i] = static_cast<T>(0.f);
}

View File

@ -43,10 +43,10 @@ namespace helpers {
auto rows = leftInput->rows();
auto cols = rightInput->columns();
//output->t<T>(0,0) = rightInput->t<T>(0,0) / leftInput->t<T>(0,0);
for (auto r = 0; r < rows; r++) {
for (auto j = 0; j < cols; j++) {
for (Nd4jLong r = 0; r < rows; r++) {
for (Nd4jLong j = 0; j < cols; j++) {
auto sum = rightInput->t<T>(r, j);
for (auto c = 0; c < r; c++) {
for (Nd4jLong c = 0; c < r; c++) {
sum -= leftInput->t<T>(r, c) * output->t<T>(c, j);
}
output->t<T>(r, j) = sum / leftInput->t<T>(r, r);
@ -72,10 +72,10 @@ namespace helpers {
static void upperTriangularSolve(nd4j::LaunchContext * context, NDArray* leftInput, NDArray* rightInput, bool adjoint, NDArray* output) {
auto rows = leftInput->rows();
auto cols = rightInput->columns();
for (auto r = rows; r > 0; r--) {
for (auto j = 0; j < cols; j++) {
for (Nd4jLong r = rows; r > 0; r--) {
for (Nd4jLong j = 0; j < cols; j++) {
auto sum = rightInput->t<T>(r - 1, j);
for (auto c = r; c < rows; c++) {
for (Nd4jLong c = r; c < rows; c++) {
sum -= leftInput->t<T>(r - 1, c) * output->t<T>(c, j);
}
output->t<T>(r - 1, j) = sum / leftInput->t<T>(r - 1, r - 1);
@ -114,14 +114,14 @@ namespace helpers {
auto batchLoop = PRAGMA_THREADS_FOR {
for (auto batch = start; batch < stop; batch++) {
if (!lower) {
for (auto r = 0; r < rows; r++) {
for (auto c = 0; c <= r; c++) {
for (Nd4jLong r = 0; r < rows; r++) {
for (Nd4jLong c = 0; c <= r; c++) {
outputPart[batch]->t<T>(r, c) = inputPart[batch]->t<T>(c, r);
}
}
} else {
for (auto r = 0; r < rows; r++) {
for (auto c = r; c < cols; c++) {
for (Nd4jLong r = 0; r < rows; r++) {
for (Nd4jLong c = r; c < cols; c++) {
outputPart[batch]->t<T>(r, c) = inputPart[batch]->t<T>(c, r);
}
}

View File

@ -26,7 +26,7 @@ namespace helpers {
template <typename T>
static void adjustWeights_(NDArray* input, NDArray* weights, NDArray* output, int minLength, int maxLength) {
for (int e = 0; e < input->lengthOf(); e++) {
for (Nd4jLong e = 0; e < input->lengthOf(); e++) {
int val = input->e<int>(e);
if (val < maxLength) {
if (weights != nullptr)