Loops auto-vectorization problem fix (#277)
* libnd4j cast loop types Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j more type castination added to loops Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j sync casting types of iterated variable in loops Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j more loops reviewed for vectorization problem fix Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j fixed several typos Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j several more files reviewed to fix auto-vectorization problem in loops Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j merge master and reviewed more files to fix auto-vectorization problem in loops Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j several type casting added in broadcasting that were missed, fixed mac builds Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j double check all files and fix several more places in loops Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j fixed builds Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j revert changes for lup.cpp Signed-off-by: Oleg <oleg.semeniv@gmail.com> * libnd4j more files reviewed for auto-vectorization problem fix Signed-off-by: Oleg <oleg.semeniv@gmail.com>master
parent
5332ace32b
commit
f116f53d61
|
@ -1702,7 +1702,7 @@ bool NDArray::isSameShape(const std::vector<Nd4jLong>& shape) const{
|
|||
if (this->rankOf() != (int) shape.size())
|
||||
return false;
|
||||
for (int e = 0; e < this->rankOf(); e++) {
|
||||
if (this->shapeOf()[e] != shape.at(e) && shape.at(e) != -1)
|
||||
if (this->shapeOf()[e] != shape[e] && shape[e] != -1)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -980,7 +980,7 @@ std::string NDArray::asString(Nd4jLong limit) {
|
|||
template<typename T>
|
||||
std::vector<T> NDArray::getBufferAsVector() {
|
||||
std::vector<T> vector(lengthOf());
|
||||
for (int e = 0; e < lengthOf(); e++)
|
||||
for (Nd4jLong e = 0; e < lengthOf(); e++)
|
||||
vector[e] = this->e<T>(e);
|
||||
return vector;
|
||||
}
|
||||
|
@ -2128,12 +2128,12 @@ bool NDArray::isIdentityMatrix() {
|
|||
throw std::runtime_error("isIdentityMatrix method: matrix must be square and have rank = 2 !");
|
||||
|
||||
const double eps = 1e-5f;
|
||||
for(int i=0; i<rows(); ++i)
|
||||
for(Nd4jLong i=0; i<rows(); ++i)
|
||||
if(nd4j::math::nd4j_abs(e<double>(i,i) - 1.f) > eps)
|
||||
return false;
|
||||
|
||||
for(int i=0; i<rows(); ++i) {
|
||||
for(int j=0; j<columns(); ++j) {
|
||||
for(Nd4jLong i=0; i<rows(); ++i) {
|
||||
for(Nd4jLong j=0; j<columns(); ++j) {
|
||||
if (i == j)
|
||||
continue;
|
||||
if(nd4j::math::nd4j_abs(e<double>(i,j)) > eps)
|
||||
|
@ -2335,7 +2335,7 @@ NDArray NDArray::asS() const {
|
|||
Nd4jLong dataLength = 0;
|
||||
|
||||
auto data = bufferAsT<int8_t>() + offsetsLength;
|
||||
for (int e = 0; e < lengthOf(); e++) {
|
||||
for (Nd4jLong e = 0; e < lengthOf(); e++) {
|
||||
offsets[e] = dataLength;
|
||||
start = nInputoffsets[e];
|
||||
stop = nInputoffsets[e + 1];
|
||||
|
@ -3524,7 +3524,7 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const {
|
|||
// string is special case, we'll compare them one by one, considering both arrays are guaranteed to have the same length
|
||||
|
||||
if (dataType() == DataType::UTF8) {
|
||||
for (int e = 0; e < this->lengthOf(); e++) {
|
||||
for (Nd4jLong e = 0; e < this->lengthOf(); e++) {
|
||||
auto s1 = this->e<std::string>(e);
|
||||
auto s2 = other->e<std::string>(e);
|
||||
|
||||
|
@ -3533,7 +3533,7 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const {
|
|||
}
|
||||
}
|
||||
else if (dataType() == DataType::UTF16) {
|
||||
for (int e = 0; e < this->lengthOf(); e++) {
|
||||
for (Nd4jLong e = 0; e < this->lengthOf(); e++) {
|
||||
auto s1 = this->e<std::u16string>(e);
|
||||
auto s2 = other->e<std::u16string>(e);
|
||||
|
||||
|
@ -3542,7 +3542,7 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const {
|
|||
}
|
||||
}
|
||||
else {
|
||||
for (int e = 0; e < this->lengthOf(); e++) {
|
||||
for (Nd4jLong e = 0; e < this->lengthOf(); e++) {
|
||||
auto s1 = this->e<std::u32string>(e);
|
||||
auto s2 = other->e<std::u32string>(e);
|
||||
|
||||
|
@ -4801,7 +4801,7 @@ ResultSet NDArray::allTensorsAlongDimension(const std::vector<int> &dimensions)
|
|||
auto pack = ConstantTadHelper::getInstance()->tadForDimensions(_shapeInfo, const_cast<int*>(dimensions.data()), dimensions.size());
|
||||
auto numTads = pack.numberOfTads();
|
||||
|
||||
for (int idx = 0; idx < numTads; idx++ ) {
|
||||
for (Nd4jLong idx = 0; idx < numTads; idx++ ) {
|
||||
auto array = new NDArray(_buffer, ShapeDescriptor(pack.primaryShapeInfo()), getContext(), pack.primaryOffsets()[idx] + getBufferOffset());
|
||||
array->_isView = true;
|
||||
result.push_back(array);
|
||||
|
@ -4872,7 +4872,7 @@ NDArray NDArray::operator()(const std::vector<Nd4jLong>& idx, const bool keepUni
|
|||
|
||||
std::vector<int> dimsWithUnities;
|
||||
|
||||
for (uint d = 0; d < rank; ++d)
|
||||
for (int d = 0; d < rank; ++d)
|
||||
if(idx[n*d] != idx[n*d+1] && shapeOf[d] == 1)
|
||||
dimsWithUnities.push_back(d);
|
||||
|
||||
|
|
|
@ -308,7 +308,7 @@ void NDArray::tile(const std::vector<Nd4jLong>& reps, NDArray& target) const {
|
|||
// fill newBuff, loop through all elements of newBuff
|
||||
// looping through _buffer goes automatically by means of getSubArrayIndex applying
|
||||
const int ews = target.ews();
|
||||
const int targetLen = target.lengthOf();
|
||||
const auto targetLen = target.lengthOf();
|
||||
if(target.ordering() == 'c' && ews == 1) { // ews == 1 always here
|
||||
//#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided)
|
||||
for(Nd4jLong i=0; i<targetLen; ++i) {
|
||||
|
@ -372,7 +372,7 @@ static void repeat_(const NDArray& input, NDArray& output, const std::vector<int
|
|||
|
||||
const int rank = input.rankOf(); // xRank = zRank
|
||||
const int zLen = output.lengthOf(); // xLen <= zLen
|
||||
const int repSize = repeats.size();
|
||||
const uint repSize = repeats.size();
|
||||
|
||||
// loop through input array
|
||||
auto func = PRAGMA_THREADS_FOR {
|
||||
|
|
|
@ -1300,16 +1300,16 @@ void pullRowsGeneric(void *vx,
|
|||
|
||||
if (xEWS == 1 && zEWS == 1) {
|
||||
PRAGMA_OMP_SIMD
|
||||
for (int i = 0; i < tadLength; i++) {
|
||||
for (Nd4jLong i = 0; i < tadLength; i++) {
|
||||
rZ[i] = rX[i];
|
||||
}
|
||||
} else if (xEWS >= 1 && zEWS >= 1) {
|
||||
PRAGMA_OMP_SIMD
|
||||
for (int i = 0; i < tadLength; i++) {
|
||||
for (Nd4jLong i = 0; i < tadLength; i++) {
|
||||
rZ[i * zEWS] = rX[i * xEWS];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < tadLength; i++) {
|
||||
for (Nd4jLong i = 0; i < tadLength; i++) {
|
||||
auto xOffset = xTadOffsetForBlock + shape::getIndexOffset(i, tadShapeInfo);
|
||||
auto zOffset = zTadOffsetForBlock + shape::getIndexOffset(i, zTadShapeInfo);
|
||||
hZ[zOffset] = hX[xOffset];
|
||||
|
|
|
@ -78,7 +78,7 @@ static void usualGemm(const NDArray* vA, const NDArray* vB, NDArray* vC,
|
|||
|
||||
T3 val = A[aOffset] * B[bOffset]; // first iteration
|
||||
|
||||
for (uint j = 1; j < K; ++j) { // rest iterations
|
||||
for (int j = 1; j < K; ++j) { // rest iterations
|
||||
aOffset += shape::stride(aShapeInfo)[aKaxis];
|
||||
bOffset += shape::stride(bShapeInfo)[bKaxis];
|
||||
val = val + A[aOffset] * B[bOffset];
|
||||
|
@ -131,7 +131,7 @@ static void usualGemv(const NDArray* vA, const NDArray* vX, NDArray* vY, const
|
|||
|
||||
T3 val = A[aOffset] * X[xOffset]; // first iteration
|
||||
|
||||
for (uint j = 1; j < N; ++j) { // rest iterations
|
||||
for (int j = 1; j < N; ++j) { // rest iterations
|
||||
aOffset += aNstride;
|
||||
xOffset += incx;
|
||||
val = val + A[aOffset] * X[xOffset];
|
||||
|
@ -163,7 +163,7 @@ static void usualDot(const Nd4jLong length, const double alpha, const void* vX,
|
|||
|
||||
T3 sum = 0;
|
||||
PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(length > Environment::getInstance()->elementwiseThreshold()) schedule(guided) reduction(OMP_SUMT:sum))
|
||||
for(int i = 0; i < length; ++i)
|
||||
for(Nd4jLong i = 0; i < length; ++i)
|
||||
sum += X[i * incx] * Y[i * incy];
|
||||
|
||||
if(betaPersent)
|
||||
|
@ -462,7 +462,7 @@ static void batchedGemm(const NDArray* vA, const NDArray* vB, NDArray* vC,
|
|||
|
||||
T3 val = A[aOffset] * B[bOffset]; // first iteration
|
||||
|
||||
for (uint j = 1; j < K; ++j) { // rest iterations
|
||||
for (int j = 1; j < K; ++j) { // rest iterations
|
||||
aOffset += shape::stride(aShapeInfo)[aKaxis];
|
||||
bOffset += shape::stride(bShapeInfo)[bKaxis];
|
||||
val = val + A[aOffset] * B[bOffset];
|
||||
|
|
|
@ -58,7 +58,7 @@ BiDiagonalUp::BiDiagonalUp(const NDArray& matrix): _HHmatrix(nd4j::NDArrayFactor
|
|||
|
||||
T _x, _y;
|
||||
|
||||
for(int i = 0; i < cols-1; ++i ) {
|
||||
for(Nd4jLong i = 0; i < cols-1; ++i ) {
|
||||
|
||||
// evaluate Householder matrix nullifying columns
|
||||
column = new NDArray(_HHmatrix({i,rows, i,i+1}, true));
|
||||
|
|
|
@ -53,7 +53,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
|
|||
auto tad = const_cast<X *>(x) + tadOffsets[i];
|
||||
auto indexValue = OpType::startingIndexValue(tad);
|
||||
|
||||
for (uint j = 0; j < tadLen; j++) {
|
||||
for (Nd4jLong j = 0; j < tadLen; j++) {
|
||||
functions::indexreduce::IndexValue<X> comp(tad[j], j);
|
||||
indexValue = OpType::update(indexValue, comp, extraParams);
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
|
|||
auto tad = const_cast<X *>(x) + tadOffsets[i];
|
||||
auto indexValue = OpType::startingIndexValue(tad);
|
||||
|
||||
for (uint j = 0; j < tadLen; j++) {
|
||||
for (Nd4jLong j = 0; j < tadLen; j++) {
|
||||
functions::indexreduce::IndexValue<X> comp(tad[j * tadEws], j);
|
||||
indexValue = OpType::update(indexValue, comp, extraParams);
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
|
|||
auto tad = const_cast<X *>(x) + tadOffsets[i];
|
||||
auto indexValue = OpType::startingIndexValue(tad);
|
||||
|
||||
for (uint i0 = 0; i0 < tadLen; ++i0) {
|
||||
for (Nd4jLong i0 = 0; i0 < tadLen; ++i0) {
|
||||
functions::indexreduce::IndexValue<X> comp(tad[i0 * tadStride[0]], i0);
|
||||
indexValue = OpType::update(indexValue, comp, extraParams);
|
||||
}
|
||||
|
@ -118,8 +118,8 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
|
|||
auto tad = const_cast<X *>(x) + tadOffsets[i];
|
||||
auto indexValue = OpType::startingIndexValue(tad);
|
||||
|
||||
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
|
||||
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
|
||||
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
|
||||
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
|
||||
const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1];
|
||||
const auto tadIndex = i0 * newStride[0] + i1;
|
||||
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex);
|
||||
|
@ -145,9 +145,9 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
|
|||
auto tad = const_cast<X *>(x) + tadOffsets[i];
|
||||
auto indexValue = OpType::startingIndexValue(tad);
|
||||
|
||||
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
|
||||
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
|
||||
for (uint i2 = 0; i2 < tadShape[2]; ++i2) {
|
||||
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
|
||||
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
|
||||
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
|
||||
const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2];
|
||||
const auto tadIndex = i0 * newStride[0] + i1 * newStride[1] + i2;
|
||||
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex);
|
||||
|
@ -174,10 +174,10 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
|
|||
auto tad = const_cast<X *>(x) + tadOffsets[i];
|
||||
auto indexValue = OpType::startingIndexValue(tad);
|
||||
|
||||
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
|
||||
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
|
||||
for (uint i2 = 0; i2 < tadShape[2]; ++i2) {
|
||||
for (uint i3 = 0; i3 < tadShape[3]; ++i3) {
|
||||
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
|
||||
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
|
||||
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
|
||||
for (Nd4jLong i3 = 0; i3 < tadShape[3]; ++i3) {
|
||||
const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2] + i3 * tadStride[3];
|
||||
const auto tadIndex = i0 * newStride[0] + i1 * newStride[1] + i2 * newStride[2] + i3;
|
||||
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex);
|
||||
|
@ -205,11 +205,11 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
|
|||
auto tad = const_cast<X *>(x) + tadOffsets[i];
|
||||
auto indexValue = OpType::startingIndexValue(tad);
|
||||
|
||||
for (uint i0 = 0; i0 < tadShape[0]; ++i0) {
|
||||
for (uint i1 = 0; i1 < tadShape[1]; ++i1) {
|
||||
for (uint i2 = 0; i2 < tadShape[2]; ++i2) {
|
||||
for (uint i3 = 0; i3 < tadShape[3]; ++i3) {
|
||||
for (uint i4 = 0; i4 < tadShape[4]; ++i4) {
|
||||
for (Nd4jLong i0 = 0; i0 < tadShape[0]; ++i0) {
|
||||
for (Nd4jLong i1 = 0; i1 < tadShape[1]; ++i1) {
|
||||
for (Nd4jLong i2 = 0; i2 < tadShape[2]; ++i2) {
|
||||
for (Nd4jLong i3 = 0; i3 < tadShape[3]; ++i3) {
|
||||
for (Nd4jLong i4 = 0; i4 < tadShape[4]; ++i4) {
|
||||
const auto tadOffset = i0 * tadStride[0] + i1 * tadStride[1] + i2 * tadStride[2] + i3 * tadStride[3] + i4 * tadStride[4];
|
||||
const auto tadIndex = i0 * newStride[0] + i1 * newStride[1] + i2 * newStride[2] + i3 * newStride[3] + i4;
|
||||
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], tadIndex);
|
||||
|
@ -238,7 +238,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
|
|||
auto tad = const_cast<X *>(x) + tadOffsets[i];
|
||||
auto indexValue = OpType::startingIndexValue(tad);
|
||||
|
||||
for (uint j = 0; j < tadLen; j++) {
|
||||
for (Nd4jLong j = 0; j < tadLen; j++) {
|
||||
functions::indexreduce::IndexValue<X> comp(tad[j * tadEws], j);
|
||||
indexValue = OpType::update(indexValue, comp, extraParams);
|
||||
}
|
||||
|
@ -262,7 +262,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
|
|||
auto tad = const_cast<X *>(x) + tadOffsets[i];
|
||||
auto indexValue = OpType::startingIndexValue(tad);
|
||||
|
||||
for (uint j = 0; j < tadLen; j++) {
|
||||
for (Nd4jLong j = 0; j < tadLen; j++) {
|
||||
auto tadOffset = shape::indexOffset(j, tadShapeInfo, castTadShapeInfo, canCastTad);
|
||||
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], j);
|
||||
indexValue = OpType::update(indexValue, comp, extraParams);
|
||||
|
@ -288,7 +288,7 @@ void nd4j::IndexReductionLoops<X,Z>::loopIndexReduce(X* x, Nd4jLong* xShapeInfo,
|
|||
auto tad = const_cast<X *>(x) + tadOffsets[i];
|
||||
auto indexValue = OpType::startingIndexValue(tad);
|
||||
|
||||
for (uint j = 0; j < tadLen; j++) {
|
||||
for (Nd4jLong j = 0; j < tadLen; j++) {
|
||||
auto tadOffset = shape::indexOffset(j, tadShapeInfo, castTadShapeInfo, canCastTad);
|
||||
functions::indexreduce::IndexValue<X> comp(tad[tadOffset], j);
|
||||
indexValue = OpType::update(indexValue, comp, extraParams);
|
||||
|
|
|
@ -374,7 +374,7 @@ T SVD<T>::secularEq(const T diff, const NDArray& col0, const NDArray& diag, cons
|
|||
auto len = permut.lengthOf();
|
||||
T res = 1.;
|
||||
T item;
|
||||
for(int i=0; i<len; ++i) {
|
||||
for(Nd4jLong i=0; i<len; ++i) {
|
||||
auto j = permut.e<int>(i);
|
||||
item = col0.e<T>(j) / ((diagShifted.e<T>(j) - diff) * (diag.e<T>(j) + shift + diff));
|
||||
res += item * col0.e<T>(j);
|
||||
|
@ -383,7 +383,6 @@ T SVD<T>::secularEq(const T diff, const NDArray& col0, const NDArray& diag, cons
|
|||
return res;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T>
|
||||
void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArray& permut, NDArray& singVals, NDArray& shifts, NDArray& mus) {
|
||||
|
@ -394,7 +393,7 @@ void SVD<T>::calcSingVals(const NDArray& col0, const NDArray& diag, const NDArra
|
|||
while(curLen > 1 && col0.e<T>(curLen-1) == (T)0.f)
|
||||
--curLen;
|
||||
|
||||
for (int k = 0; k < len; ++k) {
|
||||
for (Nd4jLong k = 0; k < len; ++k) {
|
||||
|
||||
if (col0.e<T>(k) == (T)0.f || curLen==1) {
|
||||
|
||||
|
|
|
@ -232,13 +232,13 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
|
|||
if(!h) { // seqLen and h are absent
|
||||
|
||||
lstmLayerCell(xSet->at(0), Wx, Wr, b, h0, c0, Wp, params, ht, ct); // first time step
|
||||
for (int t = 1; t < sL; ++t)
|
||||
for (Nd4jLong t = 1; t < sL; ++t)
|
||||
lstmLayerCell(xSet->at(t), Wx, Wr, b, ht, ct, Wp, params, ht, ct); // rest time steps
|
||||
}
|
||||
else { // seqLen is absent and h is present
|
||||
|
||||
lstmLayerCell(xSet->at(0), Wx, Wr, b, h0, c0, Wp, params, hSet->at(0), ct); // first time step
|
||||
for (int t = 1; t < sL; ++t)
|
||||
for (Nd4jLong t = 1; t < sL; ++t)
|
||||
lstmLayerCell(xSet->at(t), Wx, Wr, b, hSet->at(t - 1), ct, Wp, params, hSet->at(t), ct); // rest time steps
|
||||
|
||||
if(hL)
|
||||
|
@ -249,7 +249,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
|
|||
|
||||
if(!h) { // seqLen is present and h is absent
|
||||
|
||||
for (int e = 0; e < bS; ++e) {
|
||||
for (Nd4jLong e = 0; e < bS; ++e) {
|
||||
|
||||
const int limit = seqLen->e<int>(e);
|
||||
|
||||
|
@ -272,7 +272,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
|
|||
}
|
||||
else { // seqLen and h are present
|
||||
|
||||
for (int e = 0; e < bS; ++e) {
|
||||
for (Nd4jLong e = 0; e < bS; ++e) {
|
||||
|
||||
int limit = seqLen->e<int>(e);
|
||||
|
||||
|
@ -312,13 +312,13 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
|
|||
if(!h) { // seqLen and h are absent
|
||||
|
||||
lstmLayerCell(xSet->at(sL - 1), Wx, Wr, b, h0, c0, Wp, params, ht, ct); // first time step
|
||||
for (int t = sL - 2; t >= 0; --t)
|
||||
for (Nd4jLong t = sL - 2; t >= 0; --t)
|
||||
lstmLayerCell(xSet->at(t), Wx, Wr, b, ht, ct, Wp, params, ht, ct); // rest time steps
|
||||
}
|
||||
else { // seqLen is absent and h is present
|
||||
|
||||
lstmLayerCell(xSet->at(sL - 1), Wx, Wr, b, h0, c0, Wp, params, hSet->at(sL - 1), ct); // first time step
|
||||
for (int t = sL - 2; t >= 0; --t)
|
||||
for (Nd4jLong t = sL - 2; t >= 0; --t)
|
||||
lstmLayerCell(xSet->at(t), Wx, Wr, b, hSet->at(t + 1), ct, Wp, params, hSet->at(t), ct); // rest time steps
|
||||
|
||||
if(hL)
|
||||
|
@ -329,7 +329,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
|
|||
|
||||
if(!h) { // h is absent and seqLen is present
|
||||
|
||||
for (int e = 0; e < bS; ++e) {
|
||||
for (Nd4jLong e = 0; e < bS; ++e) {
|
||||
|
||||
const int limit = seqLen->e<int>(e);
|
||||
|
||||
|
@ -344,7 +344,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
|
|||
auto ind = getBatchTimeTotalIndex(dataFormat, sL, bS, sL - 1, e);
|
||||
lstmLayerCell(xSet->at(ind), Wx, Wr, b, h0Set->at(e), c0Set->at(e), Wp, params, htSet->at(e), ctSet->at(e)); // first time step
|
||||
|
||||
for (int t = sL - 2; t >= sL - limit; --t) {
|
||||
for (Nd4jLong t = sL - 2; t >= sL - limit; --t) {
|
||||
ind = getBatchTimeTotalIndex(dataFormat, sL, bS, t, e);
|
||||
lstmLayerCell(xSet->at(ind), Wx, Wr, b, htSet->at(e), ctSet->at(e), Wp, params, htSet->at(e), ctSet->at(e)); // rest time steps
|
||||
}
|
||||
|
@ -352,7 +352,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
|
|||
}
|
||||
else { // seqLen and h are present
|
||||
|
||||
for (int e = 0; e < bS; ++e) {
|
||||
for (Nd4jLong e = 0; e < bS; ++e) {
|
||||
|
||||
int limit = seqLen->e<int>(e);
|
||||
|
||||
|
@ -371,7 +371,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
|
|||
auto indPrev = getBatchTimeTotalIndex(dataFormat, sL, bS, sL - 1, e);
|
||||
lstmLayerCell(xSet->at(indPrev), Wx, Wr, b, h0Set->at(e), c0Set->at(e), Wp, params, hSet->at(indPrev), ctSet->at(e)); // first time step
|
||||
|
||||
for (int t = sL - 2; t >= sL - limit; --t) {
|
||||
for (Nd4jLong t = sL - 2; t >= sL - limit; --t) {
|
||||
auto indCurr = getBatchTimeTotalIndex(dataFormat, sL, bS, t, e);
|
||||
lstmLayerCell(xSet->at(indCurr), Wx, Wr, b, hSet->at(indPrev), ctSet->at(e), Wp, params, hSet->at(indCurr), ctSet->at(e)); // rest time steps
|
||||
indPrev = indCurr;
|
||||
|
@ -388,7 +388,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
|
|||
|
||||
if(!h) { // h is absent and seqLen is present
|
||||
|
||||
for (int e = 0; e < bS; ++e) {
|
||||
for (Nd4jLong e = 0; e < bS; ++e) {
|
||||
|
||||
const int limit = seqLen->e<int>(e);
|
||||
|
||||
|
@ -411,7 +411,7 @@ void lstmLayerTimeLoop(const NDArray* x, const NDArray* Wx, const NDArray* Wr,
|
|||
}
|
||||
else { // seqLen and h are present
|
||||
|
||||
for (int e = 0; e < bS; ++e) {
|
||||
for (Nd4jLong e = 0; e < bS; ++e) {
|
||||
|
||||
int limit = seqLen->e<int>(e);
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ namespace nd4j {
|
|||
valueCoords[e] = indices.e<Nd4jLong>(e);
|
||||
|
||||
// write results individually
|
||||
for (uint64_t e = 0; e < numElements; e++) {
|
||||
for (Nd4jLong e = 0; e < numElements; e++) {
|
||||
auto vIndex = shape::coords2index(output.shapeInfo(), valueCoords.data());
|
||||
auto cLength = 0L;
|
||||
std::string str;
|
||||
|
|
|
@ -33,7 +33,7 @@ namespace helpers {
|
|||
|
||||
std::vector<T> values;
|
||||
|
||||
for (int e = 0; e < input->lengthOf(); e++) {
|
||||
for (Nd4jLong e = 0; e < input->lengthOf(); e++) {
|
||||
T v = input->e<T>(e);
|
||||
if (std::find(values.begin(), values.end(), v) == values.end()) {
|
||||
values.push_back(v);
|
||||
|
@ -56,7 +56,7 @@ namespace helpers {
|
|||
MAP_IMPL<T, int> indicesMap;
|
||||
MAP_IMPL<T, int> countsMap;
|
||||
|
||||
for (int e = 0; e < input->lengthOf(); e++) {
|
||||
for (Nd4jLong e = 0; e < input->lengthOf(); e++) {
|
||||
T v = input->e<T>(e);
|
||||
if (std::find(valuesVector.begin(), valuesVector.end(), v) == valuesVector.end()) {
|
||||
valuesVector.push_back(v);
|
||||
|
@ -77,7 +77,7 @@ namespace helpers {
|
|||
};
|
||||
samediff::Threads::parallel_for(func, 0, values->lengthOf());
|
||||
|
||||
for (int e = 0; e < indices->lengthOf(); e++) {
|
||||
for (Nd4jLong e = 0; e < indices->lengthOf(); e++) {
|
||||
auto posI = std::find(valuesVector.begin(), valuesVector.end(), input->e<T>(e));
|
||||
auto dist = std::distance(valuesVector.begin(), posI);
|
||||
indices->p(e, Nd4jLong(dist));//indicesMap[(*input)(e)];
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace nd4j {
|
|||
int cnt = 0;
|
||||
|
||||
Nd4jLong idx[MAX_RANK];
|
||||
for (int e = 0; e < condition.lengthOf(); e++) {
|
||||
for (Nd4jLong e = 0; e < condition.lengthOf(); e++) {
|
||||
shape::index2coords(e, condition.getShapeInfo(), idx);
|
||||
|
||||
auto offset = shape::getOffset(condition.getShapeInfo(), idx);
|
||||
|
|
Loading…
Reference in New Issue