Loops auto-vectorization problem fix (#277)

* libnd4j cast loop types

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more type castination added to loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j sync casting types of iterated variable in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more loops reviewed for vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed several typos

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several more files reviewed to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j several type casting added in broadcasting that were missed, fixed mac builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j double check all files and fix several more places in loops

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j fixed builds

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j revert changes for lup.cpp

Signed-off-by: Oleg <oleg.semeniv@gmail.com>

* libnd4j more files reviewed for auto-vectorization problem fix

Signed-off-by: Oleg <oleg.semeniv@gmail.com>
master
Oleh 2020-02-28 16:04:45 +02:00 committed by GitHub
parent 5332ace32b
commit f116f53d61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 60 additions and 61 deletions

View File

@ -1702,7 +1702,7 @@ bool NDArray::isSameShape(const std::vector<Nd4jLong>& shape) const{
if (this->rankOf() != (int) shape.size()) if (this->rankOf() != (int) shape.size())
return false; return false;
for (int e = 0; e < this->rankOf(); e++) { for (int e = 0; e < this->rankOf(); e++) {
if (this->shapeOf()[e] != shape.at(e) && shape.at(e) != -1) if (this->shapeOf()[e] != shape[e] && shape[e] != -1)
return false; return false;
} }
return true; return true;

View File

@ -980,7 +980,7 @@ std::string NDArray::asString(Nd4jLong limit) {
template<typename T> template<typename T>
std::vector<T> NDArray::getBufferAsVector() { std::vector<T> NDArray::getBufferAsVector() {
std::vector<T> vector(lengthOf()); std::vector<T> vector(lengthOf());
for (int e = 0; e < lengthOf(); e++) for (Nd4jLong e = 0; e < lengthOf(); e++)
vector[e] = this->e<T>(e); vector[e] = this->e<T>(e);
return vector; return vector;
} }
@ -2128,12 +2128,12 @@ bool NDArray::isIdentityMatrix() {
throw std::runtime_error("isIdentityMatrix method: matrix must be square and have rank = 2 !"); throw std::runtime_error("isIdentityMatrix method: matrix must be square and have rank = 2 !");
const double eps = 1e-5f; const double eps = 1e-5f;
for(int i=0; i<rows(); ++i) for(Nd4jLong i=0; i<rows(); ++i)
if(nd4j::math::nd4j_abs(e<double>(i,i) - 1.f) > eps) if(nd4j::math::nd4j_abs(e<double>(i,i) - 1.f) > eps)
return false; return false;
for(int i=0; i<rows(); ++i) { for(Nd4jLong i=0; i<rows(); ++i) {
for(int j=0; j<columns(); ++j) { for(Nd4jLong j=0; j<columns(); ++j) {
if (i == j) if (i == j)
continue; continue;
if(nd4j::math::nd4j_abs(e<double>(i,j)) > eps) if(nd4j::math::nd4j_abs(e<double>(i,j)) > eps)
@ -2335,7 +2335,7 @@ NDArray NDArray::asS() const {
Nd4jLong dataLength = 0; Nd4jLong dataLength = 0;
auto data = bufferAsT<int8_t>() + offsetsLength; auto data = bufferAsT<int8_t>() + offsetsLength;
for (int e = 0; e < lengthOf(); e++) { for (Nd4jLong e = 0; e < lengthOf(); e++) {
offsets[e] = dataLength; offsets[e] = dataLength;
start = nInputoffsets[e]; start = nInputoffsets[e];
stop = nInputoffsets[e + 1]; stop = nInputoffsets[e + 1];
@ -3524,7 +3524,7 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const {
// string is special case, we'll compare them one by one, considering both arrays are guaranteed to have the same length // string is special case, we'll compare them one by one, considering both arrays are guaranteed to have the same length
if (dataType() == DataType::UTF8) { if (dataType() == DataType::UTF8) {
for (int e = 0; e < this->lengthOf(); e++) { for (Nd4jLong e = 0; e < this->lengthOf(); e++) {
auto s1 = this->e<std::string>(e); auto s1 = this->e<std::string>(e);
auto s2 = other->e<std::string>(e); auto s2 = other->e<std::string>(e);
@ -3533,7 +3533,7 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const {
} }
} }
else if (dataType() == DataType::UTF16) { else if (dataType() == DataType::UTF16) {
for (int e = 0; e < this->lengthOf(); e++) { for (Nd4jLong e = 0; e < this->lengthOf(); e++) {
auto s1 = this->e<std::u16string>(e); auto s1 = this->e<std::u16string>(e);
auto s2 = other->e<std::u16string>(e); auto s2 = other->e<std::u16string>(e);
@ -3542,7 +3542,7 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const {
} }
} }
else { else {
for (int e = 0; e < this->lengthOf(); e++) { for (Nd4jLong e = 0; e < this->lengthOf(); e++) {
auto s1 = this->e<std::u32string>(e); auto s1 = this->e<std::u32string>(e);
auto s2 = other->e<std::u32string>(e); auto s2 = other->e<std::u32string>(e);
@ -4801,7 +4801,7 @@ ResultSet NDArray::allTensorsAlongDimension(const std::vector<int> &dimensions)
auto pack = ConstantTadHelper::getInstance()->tadForDimensions(_shapeInfo, const_cast<int*>(dimensions.data()), dimensions.size()); auto pack = ConstantTadHelper::getInstance()->tadForDimensions(_shapeInfo, const_cast<int*>(dimensions.data()), dimensions.size());
auto numTads = pack.numberOfTads(); auto numTads = pack.numberOfTads();
for (int idx = 0; idx < numTads; idx++ ) { for (Nd4jLong idx = 0; idx < numTads; idx++ ) {
auto array = new NDArray(_buffer, ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + getBufferOffset()); auto array = new NDArray(_buffer, ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + getBufferOffset());
array->_isView = true; array->_isView = true;
result.push_back(array); result.push_back(array);
@ -4872,7 +4872,7 @@ NDArray NDArray::operator()(const std::vector<Nd4jLong>& idx, const bool keepUni
std::vector<int> dimsWithUnities; std::vector<int> dimsWithUnities;
for (uint d = 0; d < rank; ++d) for (int d = 0; d < rank; ++d)
if(idx[n*d] != idx[n*d+1] && shapeOf[d] == 1) if(idx[n*d] != idx[n*d+1] && shapeOf[d] == 1)
dimsWithUnities.push_back(d); dimsWithUnities.push_back(d);

View File

@ -308,7 +308,7 @@ void NDArray::tile(const std::vector<Nd4jLong>& reps, NDArray& target) const {
// fill newBuff, loop through all elements of newBuff // fill newBuff, loop through all elements of newBuff
// looping through _buffer goes automatically by means of getSubArrayIndex applying // looping through _buffer goes automatically by means of getSubArrayIndex applying
const int ews = target.ews(); const int ews = target.ews();
const int targetLen = target.lengthOf(); const auto targetLen = target.lengthOf();
if(target.ordering() == 'c' && ews == 1) { // ews == 1 always here if(target.ordering() == 'c' && ews == 1) { // ews == 1 always here
//#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided) //#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided)
for(Nd4jLong i=0; i<targetLen; ++i) { for(Nd4jLong i=0; i<targetLen; ++i) {
@ -372,7 +372,7 @@ static void repeat_(const NDArray& input, NDArray& output, const std::vector<int
const int rank = input.rankOf(); // xRank = zRank const int rank = input.rankOf(); // xRank = zRank
const int zLen = output.lengthOf(); // xLen <= zLen const int zLen = output.lengthOf(); // xLen <= zLen
const int repSize = repeats.size(); const uint repSize = repeats.size();
// loop through input array // loop through input array
auto func = PRAGMA_THREADS_FOR { auto func = PRAGMA_THREADS_FOR {

View File

@ -1300,16 +1300,16 @@ void pullRowsGeneric(void *vx,
if (xEWS == 1 && zEWS == 1) { if (xEWS == 1 && zEWS == 1) {
PRAGMA_OMP_SIMD PRAGMA_OMP_SIMD
for (int i = 0; i < tadLength; i++) { for (Nd4jLong i = 0; i < tadLength; i++) {
rZ[i] = rX[i]; rZ[i] = rX[i];
} }
} else if (xEWS >= 1 && zEWS >= 1) { } else if (xEWS >= 1 && zEWS >= 1) {
PRAGMA_OMP_SIMD PRAGMA_OMP_SIMD
for (int i = 0; i < tadLength; i++) { for (Nd4jLong i = 0; i < tadLength; i++) {
rZ[i * zEWS] = rX[i * xEWS]; rZ[i * zEWS] = rX[i * xEWS];
} }
} else { } else {
for (int i = 0; i < tadLength; i++) { for (Nd4jLong i = 0; i < tadLength; i++) {
auto xOffset = xTadOffsetForBlock + shape::getIndexOffset(i, tadShapeInfo); auto xOffset = xTadOffsetForBlock + shape::getIndexOffset(i, tadShapeInfo);
auto zOffset = zTadOffsetForBlock + shape::getIndexOffset(i, zTadShapeInfo); auto zOffset = zTadOffsetForBlock + shape::getIndexOffset(i, zTadShapeInfo);
hZ[zOffset] = hX[xOffset]; hZ[zOffset] = hX[xOffset];

View File

@ -78,7 +78,7 @@ static void usualGemm(const NDArray* vA, const NDArray* vB, NDArray* vC,
T3 val = A[aOffset] * B[bOffset]; // first iteration T3 val = A[aOffset] * B[bOffset]; // first iteration
for (uint j = 1; j < K; ++j) { // rest iterations for (int j = 1; j < K; ++j) { // rest iterations
aOffset += shape::stride(aShapeInfo)[aKaxis]; aOffset += shape::stride(aShapeInfo)[aKaxis];
bOffset += shape::stride(bShapeInfo)[bKaxis]; bOffset += shape::stride(bShapeInfo)[bKaxis];
val = val + A[aOffset] * B[bOffset]; val = val + A[aOffset] * B[bOffset];
@ -131,7 +131,7 @@ static void usualGemv(const NDArray* vA, const NDArray* vX, NDArray* vY, const
T3 val = A[aOffset] * X[xOffset]; // first iteration T3 val = A[aOffset] * X[xOffset]; // first iteration
for (uint j = 1; j < N; ++j) { // rest iterations for (int j = 1; j < N; ++j) { // rest iterations
aOffset += aNstride; aOffset += aNstride;
xOffset += incx; xOffset += incx;
val = val + A[aOffset] * X[xOffset]; val = val + A[aOffset] * X[xOffset];
@ -163,7 +163,7 @@ static void usualDot(const Nd4jLong length, const double alpha, const void* vX,
T3 sum = 0; T3 sum = 0;
PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(length > Environment::getInstance()->elementwiseThreshold()) schedule(guided) reduction(OMP_SUMT:sum)) PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(length > Environment::getInstance()->elementwiseThreshold()) schedule(guided) reduction(OMP_SUMT:sum))
for(int i = 0; i < length; ++i) for(Nd4jLong i = 0; i < length; ++i)
sum += X[i * incx] * Y[i * incy]; sum += X[i * incx] * Y[i * incy];
if(betaPersent) if(betaPersent)
@ -462,7 +462,7 @@ static void batchedGemm(const NDArray* vA, const NDArray* vB, NDArray* vC,
T3 val = A[aOffset] * B[bOffset]; // first iteration T3 val = A[aOffset] * B[bOffset]; // first iteration
for (uint j = 1; j < K; ++j) { // rest iterations for (int j = 1; j < K; ++j) { // rest iterations
aOffset += shape::stride(aShapeInfo)[aKaxis]; aOffset += shape::stride(aShapeInfo)[aKaxis];
bOffset += shape::stride(bShapeInfo)[bKaxis]; bOffset += shape::stride(bShapeInfo)[bKaxis];
val = val + A[aOffset] * B[bOffset]; val = val + A[aOffset] * B[bOffset];

View File

@ -58,7 +58,7 @@ BiDiagonalUp::BiDiagonalUp(const NDArray& matrix): _HHmatrix(nd4j::NDArrayFactor
T _x, _y; T _x, _y;
for(int i = 0; i < cols-1; ++i ) { for(Nd4jLong i = 0; i < cols-1; ++i ) {
// evaluate Householder matrix nullifying columns // evaluate Householder matrix nullifying columns
column = new NDArray(_HHmatrix({i,rows, i,i+1}, true)); column = new NDArray(_HHmatrix({i,rows, i,i+1}, true));

View File

@ -53,7 +53,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
auto tad = const_cast<X *>(x) + tadOffsets[i]; auto tad = const_cast<X *>(x) + tadOffsets[i];
auto indexValue = OpType::startingIndexValue(tad); auto indexValue = OpType::startingIndexValue(tad);
for (uint j = 0; j < tadLen; j++) { for (Nd4jLong j = 0; j < tadLen; j++) {
functions::indexreduce::IndexValue<X> comp(tad[j], j); functions::indexreduce::IndexValue<X> comp(tad[j], j);
indexValue = OpType::update(indexValue, comp, extraParams); indexValue = OpType::update(indexValue, comp, extraParams);
} }
@ -74,7 +74,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
auto tad = const_cast<X *>(x) + tadOffsets[i]; auto tad = const_cast<X *>(x) + tadOffsets[i];
auto indexValue = OpType::startingIndexValue(tad); auto indexValue = OpType::startingIndexValue(tad);
for (uint j = 0; j < tadLen; j++) { for (Nd4jLong j = 0; j < tadLen; j++) {
functions::indexreduce::IndexValue<X> comp(tad[j * tadEws], j); functions::indexreduce::IndexValue<X> comp(tad[j * tadEws], j);
indexValue = OpType::update(indexValue, comp, extraParams); indexValue = OpType::update(indexValue, comp, extraParams);
} }
@ -95,7 +95,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
auto tad = const_cast<X *>(x) + tadOffsets[i]; auto tad = const_cast<X *>(x) + tadOffsets[i];
auto indexValue = OpType::startingIndexValue(tad); auto indexValue = OpType::startingIndexValue(tad);
for (uint i0 = 0; i0 < tadLen; ++i0) { for (Nd4jLong i0 = 0; i0 < tadLen; ++i0) {
functions::indexreduce::IndexValue<X> comp(tad[i0 * tadStride[0]], i0); functions::indexreduce::IndexValue<X> comp(tad[i0 * tadStride[0]], i0);
indexValue = OpType::update(indexValue, comp, extraParams); indexValue = OpType::update(indexValue, comp, extraParams);
} }
@ -118,8 +118,8 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
auto tad = const_cast<X *>(x) + tadOffsets[i]; auto tad = const_cast<X *>(x) + tadOffsets[i];
auto indexValue = OpType::startingIndexValue(tad); auto indexValue = OpType::startingIndexValue(tad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0) { for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) { for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1]; const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1];
const auto tadIndex = i0 * newStride[0] + i1; const auto tadIndex = i0 * newStride[0] + i1;
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex); functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex);
@ -145,9 +145,9 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
auto tad = const_cast<X *>(x) + tadOffsets[i]; auto tad = const_cast<X *>(x) + tadOffsets[i];
auto indexValue = OpType::startingIndexValue(tad); auto indexValue = OpType::startingIndexValue(tad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0) { for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) { for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
for (uint i2 = 0; i2 < tadShape[2]; ++i2) { for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2]; const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2];
const auto tadIndex = i0 * newStride[0] + i1 * newStride[1] + i2; const auto tadIndex = i0 * newStride[0] + i1 * newStride[1] + i2;
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex); functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex);
@ -174,10 +174,10 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
auto tad = const_cast<X *>(x) + tadOffsets[i]; auto tad = const_cast<X *>(x) + tadOffsets[i];
auto indexValue = OpType::startingIndexValue(tad); auto indexValue = OpType::startingIndexValue(tad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0) { for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) { for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
for (uint i2 = 0; i2 < tadShape[2]; ++i2) { for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
for (uint i3 = 0; i3 < tadShape[3]; ++i3) { for (Nd4jLong i3 = 0; i3 < tadShape[3]; ++i3) {
const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2] + i3 * tadStride[3]; const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2] + i3 * tadStride[3];
const auto tadIndex = i0 * newStride[0] + i1 * newStride[1] + i2 * newStride[2] + i3; const auto tadIndex = i0 * newStride[0] + i1 * newStride[1] + i2 * newStride[2] + i3;
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex); functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex);
@ -205,11 +205,11 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
auto tad = const_cast<X *>(x) + tadOffsets[i]; auto tad = const_cast<X *>(x) + tadOffsets[i];
auto indexValue = OpType::startingIndexValue(tad); auto indexValue = OpType::startingIndexValue(tad);
for (uint i0 = 0; i0 < tadShape[0]; ++i0) { for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
for (uint i1 = 0; i1 < tadShape[1]; ++i1) { for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
for (uint i2 = 0; i2 < tadShape[2]; ++i2) { for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
for (uint i3 = 0; i3 < tadShape[3]; ++i3) { for (Nd4jLong i3 = 0; i3 < tadShape[3]; ++i3) {
for (uint i4 = 0; i4 < tadShape[4]; ++i4) { for (Nd4jLong i4 = 0; i4 < tadShape[4]; ++i4) {
const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2] + i3 * tadStride[3] + i4 * tadStride[4]; const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2] + i3 * tadStride[3] + i4 * tadStride[4];
const auto tadIndex = i0 * newStride[0] + i1 * newStride[1] + i2 * newStride[2] + i3 * newStride[3] + i4; const auto tadIndex = i0 * newStride[0] + i1 * newStride[1] + i2 * newStride[2] + i3 * newStride[3] + i4;
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex); functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex);
@ -238,7 +238,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
auto tad = const_cast<X *>(x) + tadOffsets[i]; auto tad = const_cast<X *>(x) + tadOffsets[i];
auto indexValue = OpType::startingIndexValue(tad); auto indexValue = OpType::startingIndexValue(tad);
for (uint j = 0; j < tadLen; j++) { for (Nd4jLong j = 0; j < tadLen; j++) {
functions::indexreduce::IndexValue<X> comp(tad[j * tadEws], j); functions::indexreduce::IndexValue<X> comp(tad[j * tadEws], j);
indexValue = OpType::update(indexValue, comp, extraParams); indexValue = OpType::update(indexValue, comp, extraParams);
} }
@ -262,7 +262,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
auto tad = const_cast<X *>(x) + tadOffsets[i]; auto tad = const_cast<X *>(x) + tadOffsets[i];
auto indexValue = OpType::startingIndexValue(tad); auto indexValue = OpType::startingIndexValue(tad);
for (uint j = 0; j < tadLen; j++) { for (Nd4jLong j = 0; j < tadLen; j++) {
auto tadOffset = shape::indexOffset(j, tadShapeInfo, castTadShapeInfo, canCastTad); auto tadOffset = shape::indexOffset(j, tadShapeInfo, castTadShapeInfo, canCastTad);
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], j); functions::indexreduce::IndexValue<X> comp(tad[tadOffset], j);
indexValue = OpType::update(indexValue, comp, extraParams); indexValue = OpType::update(indexValue, comp, extraParams);
@ -288,7 +288,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
auto tad = const_cast<X *>(x) + tadOffsets[i]; auto tad = const_cast<X *>(x) + tadOffsets[i];
auto indexValue = OpType::startingIndexValue(tad); auto indexValue = OpType::startingIndexValue(tad);
for (uint j = 0; j < tadLen; j++) { for (Nd4jLong j = 0; j < tadLen; j++) {
auto tadOffset = shape::indexOffset(j, tadShapeInfo, castTadShapeInfo, canCastTad); auto tadOffset = shape::indexOffset(j, tadShapeInfo, castTadShapeInfo, canCastTad);
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], j); functions::indexreduce::IndexValue<X> comp(tad[tadOffset], j);
indexValue = OpType::update(indexValue, comp, extraParams); indexValue = OpType::update(indexValue, comp, extraParams);

View File

@ -374,7 +374,7 @@ T SVD<T>::secularEq(const T diff, const NDArray& col0, const NDArray& diag, cons
auto len = permut.lengthOf(); auto len = permut.lengthOf();
T res = 1.; T res = 1.;
T item; T item;
for(int i=0; i<len; ++i) { for(Nd4jLong i=0; i<len; ++i) {
auto j = permut.e<int>(i); auto j = permut.e<int>(i);
item = col0.e<T>(j) / ((diagShifted.e<T>(j) - diff) * (diag.e<T>(j) + shift + diff)); item = col0.e<T>(j) / ((diagShifted.e<T>(j) - diff) * (diag.e<T>(j) + shift + diff));
res += item * col0.e<T>(j); res += item * col0.e<T>(j);
@ -383,7 +383,6 @@ T SVD<T>::secularEq(const T diff, const NDArray& col0, const NDArray& diag, cons
return res; return res;
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
template <typename T> template <typename T>
void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArray& permut, NDArray& singVals, NDArray& shifts, NDArray& mus) { void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArray& permut, NDArray& singVals, NDArray& shifts, NDArray& mus) {
@ -394,7 +393,7 @@ void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra
while(curLen > 1 && col0.e<T>(curLen-1) == (T)0.f) while(curLen > 1 && col0.e<T>(curLen-1) == (T)0.f)
--curLen; --curLen;
for (int k = 0; k < len; ++k) { for (Nd4jLong k = 0; k < len; ++k) {
if (col0.e<T>(k) == (T)0.f || curLen==1) { if (col0.e<T>(k) == (T)0.f || curLen==1) {

View File

@ -232,13 +232,13 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
if(!h) { // seqLen and h are absent if(!h) { // seqLen and h are absent
lstmLayerCell(xSet->at(0), Wx, Wr, b, h0, c0, Wp, params, ht, ct); // first time step lstmLayerCell(xSet->at(0), Wx, Wr, b, h0, c0, Wp, params, ht, ct); // first time step
for (int t = 1; t < sL; ++t) for (Nd4jLong t = 1; t < sL; ++t)
lstmLayerCell(xSet->at(t), Wx, Wr, b, ht, ct, Wp, params, ht, ct); // rest time steps lstmLayerCell(xSet->at(t), Wx, Wr, b, ht, ct, Wp, params, ht, ct); // rest time steps
} }
else { // seqLen is absent and h is present else { // seqLen is absent and h is present
lstmLayerCell(xSet->at(0), Wx, Wr, b, h0, c0, Wp, params, hSet->at(0), ct); // first time step lstmLayerCell(xSet->at(0), Wx, Wr, b, h0, c0, Wp, params, hSet->at(0), ct); // first time step
for (int t = 1; t < sL; ++t) for (Nd4jLong t = 1; t < sL; ++t)
lstmLayerCell(xSet->at(t), Wx, Wr, b, hSet->at(t - 1), ct, Wp, params, hSet->at(t), ct); // rest time steps lstmLayerCell(xSet->at(t), Wx, Wr, b, hSet->at(t - 1), ct, Wp, params, hSet->at(t), ct); // rest time steps
if(hL) if(hL)
@ -249,7 +249,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
if(!h) { // seqLen is present and h is absent if(!h) { // seqLen is present and h is absent
for (int e = 0; e < bS; ++e) { for (Nd4jLong e = 0; e < bS; ++e) {
const int limit = seqLen->e<int>(e); const int limit = seqLen->e<int>(e);
@ -272,7 +272,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
} }
else { // seqLen and h are present else { // seqLen and h are present
for (int e = 0; e < bS; ++e) { for (Nd4jLong e = 0; e < bS; ++e) {
int limit = seqLen->e<int>(e); int limit = seqLen->e<int>(e);
@ -312,13 +312,13 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
if(!h) { // seqLen and h are absent if(!h) { // seqLen and h are absent
lstmLayerCell(xSet->at(sL - 1), Wx, Wr, b, h0, c0, Wp, params, ht, ct); // first time step lstmLayerCell(xSet->at(sL - 1), Wx, Wr, b, h0, c0, Wp, params, ht, ct); // first time step
for (int t = sL - 2; t >= 0; --t) for (Nd4jLong t = sL - 2; t >= 0; --t)
lstmLayerCell(xSet->at(t), Wx, Wr, b, ht, ct, Wp, params, ht, ct); // rest time steps lstmLayerCell(xSet->at(t), Wx, Wr, b, ht, ct, Wp, params, ht, ct); // rest time steps
} }
else { // seqLen is absent and h is present else { // seqLen is absent and h is present
lstmLayerCell(xSet->at(sL - 1), Wx, Wr, b, h0, c0, Wp, params, hSet->at(sL - 1), ct); // first time step lstmLayerCell(xSet->at(sL - 1), Wx, Wr, b, h0, c0, Wp, params, hSet->at(sL - 1), ct); // first time step
for (int t = sL - 2; t >= 0; --t) for (Nd4jLong t = sL - 2; t >= 0; --t)
lstmLayerCell(xSet->at(t), Wx, Wr, b, hSet->at(t + 1), ct, Wp, params, hSet->at(t), ct); // rest time steps lstmLayerCell(xSet->at(t), Wx, Wr, b, hSet->at(t + 1), ct, Wp, params, hSet->at(t), ct); // rest time steps
if(hL) if(hL)
@ -329,7 +329,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
if(!h) { // h is absent and seqLen is present if(!h) { // h is absent and seqLen is present
for (int e = 0; e < bS; ++e) { for (Nd4jLong e = 0; e < bS; ++e) {
const int limit = seqLen->e<int>(e); const int limit = seqLen->e<int>(e);
@ -344,7 +344,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
auto ind = getBatchTimeTotalIndex(dataFormat, sL, bS, sL - 1, e); auto ind = getBatchTimeTotalIndex(dataFormat, sL, bS, sL - 1, e);
lstmLayerCell(xSet->at(ind), Wx, Wr, b, h0Set->at(e), c0Set->at(e), Wp, params, htSet->at(e), ctSet->at(e)); // first time step lstmLayerCell(xSet->at(ind), Wx, Wr, b, h0Set->at(e), c0Set->at(e), Wp, params, htSet->at(e), ctSet->at(e)); // first time step
for (int t = sL - 2; t >= sL - limit; --t) { for (Nd4jLong t = sL - 2; t >= sL - limit; --t) {
ind = getBatchTimeTotalIndex(dataFormat, sL, bS, t, e); ind = getBatchTimeTotalIndex(dataFormat, sL, bS, t, e);
lstmLayerCell(xSet->at(ind), Wx, Wr, b, htSet->at(e), ctSet->at(e), Wp, params, htSet->at(e), ctSet->at(e)); // rest time steps lstmLayerCell(xSet->at(ind), Wx, Wr, b, htSet->at(e), ctSet->at(e), Wp, params, htSet->at(e), ctSet->at(e)); // rest time steps
} }
@ -352,7 +352,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
} }
else { // seqLen and h are present else { // seqLen and h are present
for (int e = 0; e < bS; ++e) { for (Nd4jLong e = 0; e < bS; ++e) {
int limit = seqLen->e<int>(e); int limit = seqLen->e<int>(e);
@ -371,7 +371,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
auto indPrev = getBatchTimeTotalIndex(dataFormat, sL, bS, sL - 1, e); auto indPrev = getBatchTimeTotalIndex(dataFormat, sL, bS, sL - 1, e);
lstmLayerCell(xSet->at(indPrev), Wx, Wr, b, h0Set->at(e), c0Set->at(e), Wp, params, hSet->at(indPrev), ctSet->at(e)); // first time step lstmLayerCell(xSet->at(indPrev), Wx, Wr, b, h0Set->at(e), c0Set->at(e), Wp, params, hSet->at(indPrev), ctSet->at(e)); // first time step
for (int t = sL - 2; t >= sL - limit; --t) { for (Nd4jLong t = sL - 2; t >= sL - limit; --t) {
auto indCurr = getBatchTimeTotalIndex(dataFormat, sL, bS, t, e); auto indCurr = getBatchTimeTotalIndex(dataFormat, sL, bS, t, e);
lstmLayerCell(xSet->at(indCurr), Wx, Wr, b, hSet->at(indPrev), ctSet->at(e), Wp, params, hSet->at(indCurr), ctSet->at(e)); // rest time steps lstmLayerCell(xSet->at(indCurr), Wx, Wr, b, hSet->at(indPrev), ctSet->at(e), Wp, params, hSet->at(indCurr), ctSet->at(e)); // rest time steps
indPrev = indCurr; indPrev = indCurr;
@ -388,7 +388,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
if(!h) { // h is absent and seqLen is present if(!h) { // h is absent and seqLen is present
for (int e = 0; e < bS; ++e) { for (Nd4jLong e = 0; e < bS; ++e) {
const int limit = seqLen->e<int>(e); const int limit = seqLen->e<int>(e);
@ -411,7 +411,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
} }
else { // seqLen and h are present else { // seqLen and h are present
for (int e = 0; e < bS; ++e) { for (Nd4jLong e = 0; e < bS; ++e) {
int limit = seqLen->e<int>(e); int limit = seqLen->e<int>(e);

View File

@ -80,7 +80,7 @@ namespace nd4j {
valueCoords[e] = indices.e<Nd4jLong>(e); valueCoords[e] = indices.e<Nd4jLong>(e);
// write results individually // write results individually
for (uint64_t e = 0; e < numElements; e++) { for (Nd4jLong e = 0; e < numElements; e++) {
auto vIndex = shape::coords2index(output.shapeInfo(), valueCoords.data()); auto vIndex = shape::coords2index(output.shapeInfo(), valueCoords.data());
auto cLength = 0L; auto cLength = 0L;
std::string str; std::string str;

View File

@ -33,7 +33,7 @@ namespace helpers {
std::vector<T> values; std::vector<T> values;
for (int e = 0; e < input->lengthOf(); e++) { for (Nd4jLong e = 0; e < input->lengthOf(); e++) {
T v = input->e<T>(e); T v = input->e<T>(e);
if (std::find(values.begin(), values.end(), v) == values.end()) { if (std::find(values.begin(), values.end(), v) == values.end()) {
values.push_back(v); values.push_back(v);
@ -56,7 +56,7 @@ namespace helpers {
MAP_IMPL<T, int> indicesMap; MAP_IMPL<T, int> indicesMap;
MAP_IMPL<T, int> countsMap; MAP_IMPL<T, int> countsMap;
for (int e = 0; e < input->lengthOf(); e++) { for (Nd4jLong e = 0; e < input->lengthOf(); e++) {
T v = input->e<T>(e); T v = input->e<T>(e);
if (std::find(valuesVector.begin(), valuesVector.end(), v) == valuesVector.end()) { if (std::find(valuesVector.begin(), valuesVector.end(), v) == valuesVector.end()) {
valuesVector.push_back(v); valuesVector.push_back(v);
@ -77,7 +77,7 @@ namespace helpers {
}; };
samediff::Threads::parallel_for(func, 0, values->lengthOf()); samediff::Threads::parallel_for(func, 0, values->lengthOf());
for (int e = 0; e < indices->lengthOf(); e++) { for (Nd4jLong e = 0; e < indices->lengthOf(); e++) {
auto posI = std::find(valuesVector.begin(), valuesVector.end(), input->e<T>(e)); auto posI = std::find(valuesVector.begin(), valuesVector.end(), input->e<T>(e));
auto dist = std::distance(valuesVector.begin(), posI); auto dist = std::distance(valuesVector.begin(), posI);
indices->p(e, Nd4jLong(dist));//indicesMap[(*input)(e)]; indices->p(e, Nd4jLong(dist));//indicesMap[(*input)(e)];

View File

@ -30,7 +30,7 @@ namespace nd4j {
int cnt = 0; int cnt = 0;
Nd4jLong idx[MAX_RANK]; Nd4jLong idx[MAX_RANK];
for (int e = 0; e < condition.lengthOf(); e++) { for (Nd4jLong e = 0; e < condition.lengthOf(); e++) {
shape::index2coords(e, condition.getShapeInfo(), idx); shape::index2coords(e, condition.getShapeInfo(), idx);
auto offset = shape::getOffset(condition.getShapeInfo(), idx); auto offset = shape::getOffset(condition.getShapeInfo(), idx);