parent
							
								
									11bddb3825
								
							
						
					
					
						commit
						f49c4ea9d0
					
				@ -2938,7 +2938,7 @@ bool NDArray::reshapei(const char order, const std::vector<Nd4jLong>& cshape) {
 | 
				
			|||||||
    if (numberNegativesOnes > 0)
 | 
					    if (numberNegativesOnes > 0)
 | 
				
			||||||
        delete[] shape_;
 | 
					        delete[] shape_;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int arrLength = 1;
 | 
					    Nd4jLong arrLength = 1;
 | 
				
			||||||
    for(const auto& item : shape)
 | 
					    for(const auto& item : shape)
 | 
				
			||||||
        arrLength *= item;
 | 
					        arrLength *= item;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -153,7 +153,7 @@ static void templatedSwap(void *xBuffer, void *yBuffer, Nd4jLong length) {
 | 
				
			|||||||
    auto y = reinterpret_cast<T *>(yBuffer);
 | 
					    auto y = reinterpret_cast<T *>(yBuffer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PRAGMA_OMP_PARALLEL_FOR_SIMD_ARGS(schedule(static))
 | 
					    PRAGMA_OMP_PARALLEL_FOR_SIMD_ARGS(schedule(static))
 | 
				
			||||||
    for (int i = 0; i < length; ++i) {
 | 
					    for (Nd4jLong i = 0; i < length; ++i) {
 | 
				
			||||||
        auto temp = x[i];
 | 
					        auto temp = x[i];
 | 
				
			||||||
        x[i] = y[i];
 | 
					        x[i] = y[i];
 | 
				
			||||||
        y[i] = temp;
 | 
					        y[i] = temp;
 | 
				
			||||||
@ -272,7 +272,7 @@ NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
 | 
				
			|||||||
    else {
 | 
					    else {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
					        PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
				
			||||||
        for(int i=0;  i<resultLen; ++i) {
 | 
					        for(Nd4jLong i=0;  i<resultLen; ++i) {
 | 
				
			||||||
            auto xOffset = result.getOffset(i);
 | 
					            auto xOffset = result.getOffset(i);
 | 
				
			||||||
            auto yOffset = shape::subArrayOffset(i, newShapeInfo, getShapeInfo());
 | 
					            auto yOffset = shape::subArrayOffset(i, newShapeInfo, getShapeInfo());
 | 
				
			||||||
            BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign, (result.getBuffer(), xOffset, this->getBuffer(), yOffset), LIBND4J_TYPES);
 | 
					            BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign, (result.getBuffer(), xOffset, this->getBuffer(), yOffset), LIBND4J_TYPES);
 | 
				
			||||||
@ -305,15 +305,14 @@ void NDArray::tile(const std::vector<Nd4jLong>& reps, NDArray& target) const {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    else if(target.ordering() == 'c' && ews > 1) {
 | 
					    else if(target.ordering() == 'c' && ews > 1) {
 | 
				
			||||||
//#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided)
 | 
					        for(Nd4jLong i=0;  i<targetLen; ++i) {
 | 
				
			||||||
        for(int i=0;  i<targetLen; ++i) {
 | 
					 | 
				
			||||||
            auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo());
 | 
					            auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo());
 | 
				
			||||||
            BUILD_DOUBLE_SELECTOR(target.dataType(), dataType(), templatedDoubleAssign, (target.getBuffer(), i*ews, getBuffer(), yOffset), LIBND4J_TYPES, LIBND4J_TYPES);
 | 
					            BUILD_DOUBLE_SELECTOR(target.dataType(), dataType(), templatedDoubleAssign, (target.getBuffer(), i*ews, getBuffer(), yOffset), LIBND4J_TYPES, LIBND4J_TYPES);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    else {
 | 
					    else {
 | 
				
			||||||
//#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided)
 | 
					
 | 
				
			||||||
        for(int i=0;  i<targetLen; ++i) {
 | 
					        for(Nd4jLong i=0;  i<targetLen; ++i) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            auto xOffset = target.getOffset(i);
 | 
					            auto xOffset = target.getOffset(i);
 | 
				
			||||||
            auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo());
 | 
					            auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo());
 | 
				
			||||||
@ -335,23 +334,22 @@ void NDArray::tile(NDArray& target) const {
 | 
				
			|||||||
    const auto ews = target.ews();
 | 
					    const auto ews = target.ews();
 | 
				
			||||||
    const auto targetLen = target.lengthOf();
 | 
					    const auto targetLen = target.lengthOf();
 | 
				
			||||||
    if(target.ordering() == 'c' && ews == 1) {           //  ews == 1 always here
 | 
					    if(target.ordering() == 'c' && ews == 1) {           //  ews == 1 always here
 | 
				
			||||||
//#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided)
 | 
					
 | 
				
			||||||
        for (int i = 0; i < targetLen; ++i) {
 | 
					        for (Nd4jLong i = 0; i < targetLen; ++i) {
 | 
				
			||||||
            auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo());
 | 
					            auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo());
 | 
				
			||||||
            BUILD_DOUBLE_SELECTOR(target.dataType(), dataType(), templatedDoubleAssign, (target.getBuffer(), i, getBuffer(), yOffset), LIBND4J_TYPES, LIBND4J_TYPES);
 | 
					            BUILD_DOUBLE_SELECTOR(target.dataType(), dataType(), templatedDoubleAssign, (target.getBuffer(), i, getBuffer(), yOffset), LIBND4J_TYPES, LIBND4J_TYPES);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    else if(target.ordering() == 'c' && ews > 1) {
 | 
					    else if(target.ordering() == 'c' && ews > 1) {
 | 
				
			||||||
//#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided)
 | 
					
 | 
				
			||||||
        for(int i=0;  i<targetLen; ++i) {
 | 
					        for(Nd4jLong i=0;  i<targetLen; ++i) {
 | 
				
			||||||
            auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo());
 | 
					            auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo());
 | 
				
			||||||
            BUILD_DOUBLE_SELECTOR(target.dataType(), dataType(), templatedDoubleAssign, (target.getBuffer(), i*ews, getBuffer(), yOffset), LIBND4J_TYPES, LIBND4J_TYPES);
 | 
					            BUILD_DOUBLE_SELECTOR(target.dataType(), dataType(), templatedDoubleAssign, (target.getBuffer(), i*ews, getBuffer(), yOffset), LIBND4J_TYPES, LIBND4J_TYPES);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    else {
 | 
					    else {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided)
 | 
					        for(Nd4jLong i=0;  i<targetLen; ++i) {
 | 
				
			||||||
        for(int i=0;  i<targetLen; ++i) {
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            auto xOffset = target.getOffset(i);
 | 
					            auto xOffset = target.getOffset(i);
 | 
				
			||||||
            auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo());
 | 
					            auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo());
 | 
				
			||||||
 | 
				
			|||||||
@ -39,7 +39,7 @@ void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::
 | 
				
			|||||||
        if (f == z) {
 | 
					        if (f == z) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
					            PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
				
			||||||
            for (int e = 0; e < _length; e++) {
 | 
					            for (Nd4jLong e = 0; e < _length; e++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                auto tOffset = this->getOffset(e);
 | 
					                auto tOffset = this->getOffset(e);
 | 
				
			||||||
                auto uOffset = second->getOffset(e);
 | 
					                auto uOffset = second->getOffset(e);
 | 
				
			||||||
@ -50,7 +50,7 @@ void NDArray::applyTriplewiseLambda(NDArray* second, NDArray *third, const std::
 | 
				
			|||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
					            PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
				
			||||||
            for (int e = 0; e < _length; e++) {
 | 
					            for (Nd4jLong e = 0; e < _length; e++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                auto tOffset = this->getOffset(e);
 | 
					                auto tOffset = this->getOffset(e);
 | 
				
			||||||
                auto uOffset = second->getOffset(e);
 | 
					                auto uOffset = second->getOffset(e);
 | 
				
			||||||
@ -104,13 +104,13 @@ void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<T(T,
 | 
				
			|||||||
    if (this->ordering() == other->ordering() && this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1) && this->ews() == other->ews()) {
 | 
					    if (this->ordering() == other->ordering() && this->ordering() == target->ordering() && (this->ews() == 1 && target->ews() == 1) && this->ews() == other->ews()) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
					        PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
				
			||||||
        for (int e = 0; e < _length; e++)
 | 
					        for (Nd4jLong e = 0; e < _length; e++)
 | 
				
			||||||
            z[e] = func(f[e], s[e]);
 | 
					            z[e] = func(f[e], s[e]);
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        if (f == z) {
 | 
					        if (f == z) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
					            PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
				
			||||||
            for (int e = 0; e < _length; e++) {
 | 
					            for (Nd4jLong e = 0; e < _length; e++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                auto xOffset = this->getOffset(e);
 | 
					                auto xOffset = this->getOffset(e);
 | 
				
			||||||
                auto yOffset = other->getOffset(e);
 | 
					                auto yOffset = other->getOffset(e);
 | 
				
			||||||
@ -120,7 +120,7 @@ void NDArray::applyPairwiseLambda(const NDArray* other, const std::function<T(T,
 | 
				
			|||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
					            PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
				
			||||||
            for (int e = 0; e < _length; e++) {
 | 
					            for (Nd4jLong e = 0; e < _length; e++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                auto xOffset = this->getOffset(e);
 | 
					                auto xOffset = this->getOffset(e);
 | 
				
			||||||
                auto yOffset = other->getOffset(e);
 | 
					                auto yOffset = other->getOffset(e);
 | 
				
			||||||
 | 
				
			|||||||
@ -1014,21 +1014,21 @@ void flattenGeneric(Nd4jPointer *extraPointers,
 | 
				
			|||||||
            if (len < ELEMENT_THRESHOLD) {
 | 
					            if (len < ELEMENT_THRESHOLD) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                PRAGMA_OMP_SIMD
 | 
					                PRAGMA_OMP_SIMD
 | 
				
			||||||
                for (int i = 0; i < len; i++) {
 | 
					                for (Nd4jLong i = 0; i < len; i++) {
 | 
				
			||||||
                    hZ[i * resultEleStride] = input[i * inputEleStride];
 | 
					                    hZ[i * resultEleStride] = input[i * inputEleStride];
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            else {
 | 
					            else {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
					                PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
				
			||||||
                for (int i = 0; i < len; i++) {
 | 
					                for (Nd4jLong i = 0; i < len; i++) {
 | 
				
			||||||
                    hZ[i * resultEleStride] = input[i * inputEleStride];
 | 
					                    hZ[i * resultEleStride] = input[i * inputEleStride];
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        else {
 | 
					        else {
 | 
				
			||||||
            int idx = 0;
 | 
					            int idx = 0;
 | 
				
			||||||
            for(int i = 0; i < len; i++)
 | 
					            for(Nd4jLong i = 0; i < len; i++)
 | 
				
			||||||
                    hZ[idx++] = input[shape::getIndexOffset(i, inputShapeInfo, len)];
 | 
					                    hZ[idx++] = input[shape::getIndexOffset(i, inputShapeInfo, len)];
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -1047,7 +1047,7 @@ void flattenGeneric(Nd4jPointer *extraPointers,
 | 
				
			|||||||
            if (order == 'f') {
 | 
					            if (order == 'f') {
 | 
				
			||||||
                // 1. get c ordering coordinates
 | 
					                // 1. get c ordering coordinates
 | 
				
			||||||
                auto cIndexCoordinates = new Nd4jLong[rank - 1];
 | 
					                auto cIndexCoordinates = new Nd4jLong[rank - 1];
 | 
				
			||||||
                int divisor = 1;
 | 
					                Nd4jLong divisor = 1;
 | 
				
			||||||
                for (int dim = rank - 1; dim > 0; dim--) {
 | 
					                for (int dim = rank - 1; dim > 0; dim--) {
 | 
				
			||||||
                    cIndexCoordinates[dim - 1] = (i / divisor) % xShape[dim];
 | 
					                    cIndexCoordinates[dim - 1] = (i / divisor) % xShape[dim];
 | 
				
			||||||
                    divisor *= xShape[dim];
 | 
					                    divisor *= xShape[dim];
 | 
				
			||||||
@ -1056,7 +1056,7 @@ void flattenGeneric(Nd4jPointer *extraPointers,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                // 2. convert to f ordering index
 | 
					                // 2. convert to f ordering index
 | 
				
			||||||
                int fIndex = 0;
 | 
					                int fIndex = 0;
 | 
				
			||||||
                int multiplier = 1;
 | 
					                Nd4jLong multiplier = 1;
 | 
				
			||||||
                for (int dim = 1; dim <= rank - 1; dim++) {
 | 
					                for (int dim = 1; dim <= rank - 1; dim++) {
 | 
				
			||||||
                    fIndex += cIndexCoordinates[dim - 1] * multiplier;
 | 
					                    fIndex += cIndexCoordinates[dim - 1] * multiplier;
 | 
				
			||||||
                    multiplier *= xShape[dim];
 | 
					                    multiplier *= xShape[dim];
 | 
				
			||||||
 | 
				
			|||||||
@ -301,7 +301,7 @@ void* NDArray::specialBufferWithOffset(Nd4jLong offset) const {
 | 
				
			|||||||
// change an array by repeating it the number of times given by reps.
 | 
					// change an array by repeating it the number of times given by reps.
 | 
				
			||||||
NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
 | 
					NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
 | 
				
			||||||
    int dim = reps.size();
 | 
					    int dim = reps.size();
 | 
				
			||||||
    int product = 1;
 | 
					    Nd4jLong product = 1;
 | 
				
			||||||
    for(const auto& item : reps)
 | 
					    for(const auto& item : reps)
 | 
				
			||||||
        product *= item;
 | 
					        product *= item;
 | 
				
			||||||
    if(product == 0)
 | 
					    if(product == 0)
 | 
				
			||||||
 | 
				
			|||||||
@ -670,7 +670,7 @@ void cnpy::npy_save(std::string fname,
 | 
				
			|||||||
        fwrite(&header[0],sizeof(char),header.size(),fp);
 | 
					        fwrite(&header[0],sizeof(char),header.size(),fp);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    unsigned int nels = 1;
 | 
					    unsigned long long nels = 1;
 | 
				
			||||||
    for(int i = 0;i < ndims;i++) nels *= shape[i];
 | 
					    for(int i = 0;i < ndims;i++) nels *= shape[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fwrite(data,sizeof(T),nels,fp);
 | 
					    fwrite(data,sizeof(T),nels,fp);
 | 
				
			||||||
 | 
				
			|||||||
@ -64,14 +64,14 @@ namespace nd4j {
 | 
				
			|||||||
                auto longPtr = reinterpret_cast<Nd4jLong *>(rawPtr);
 | 
					                auto longPtr = reinterpret_cast<Nd4jLong *>(rawPtr);
 | 
				
			||||||
                auto charPtr = reinterpret_cast<char *>(longPtr + length + 1);
 | 
					                auto charPtr = reinterpret_cast<char *>(longPtr + length + 1);
 | 
				
			||||||
                auto offsets = new Nd4jLong[length+1];
 | 
					                auto offsets = new Nd4jLong[length+1];
 | 
				
			||||||
                for (int e = 0; e <= length; e++) {
 | 
					                for (Nd4jLong e = 0; e <= length; e++) {
 | 
				
			||||||
                    auto o = longPtr[e];
 | 
					                    auto o = longPtr[e];
 | 
				
			||||||
                    // FIXME: BE vs LE on partials
 | 
					                    // FIXME: BE vs LE on partials
 | 
				
			||||||
                    //auto v = canKeep ?  o : BitwiseUtils::swap_bytes<Nd4jLong>(o);
 | 
					                    //auto v = canKeep ?  o : BitwiseUtils::swap_bytes<Nd4jLong>(o);
 | 
				
			||||||
                    offsets[e] = o;
 | 
					                    offsets[e] = o;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                for (int e = 0; e < length; e++) {
 | 
					                for (Nd4jLong e = 0; e < length; e++) {
 | 
				
			||||||
                    auto start = offsets[e];
 | 
					                    auto start = offsets[e];
 | 
				
			||||||
                    auto end = offsets[e+1];
 | 
					                    auto end = offsets[e+1];
 | 
				
			||||||
                    auto len = end - start;
 | 
					                    auto len = end - start;
 | 
				
			||||||
 | 
				
			|||||||
@ -492,7 +492,7 @@ namespace shape {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        //find the length of the elements we
 | 
					        //find the length of the elements we
 | 
				
			||||||
        //are iterating over
 | 
					        //are iterating over
 | 
				
			||||||
        int len = 1;
 | 
					        Nd4jLong len = 1;
 | 
				
			||||||
        //left over index cursor for initializing elements
 | 
					        //left over index cursor for initializing elements
 | 
				
			||||||
        int leftOverIndex = 0;
 | 
					        int leftOverIndex = 0;
 | 
				
			||||||
        for(int i = 0; i < rank; i++) {
 | 
					        for(int i = 0; i < rank; i++) {
 | 
				
			||||||
@ -669,7 +669,7 @@ namespace shape {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        //find the length of the elements we
 | 
					        //find the length of the elements we
 | 
				
			||||||
        //are iterating over
 | 
					        //are iterating over
 | 
				
			||||||
        int len = 1;
 | 
					        Nd4jLong len = 1;
 | 
				
			||||||
        //left over index cursor for initializing elements
 | 
					        //left over index cursor for initializing elements
 | 
				
			||||||
        int leftOverIndex = 0;
 | 
					        int leftOverIndex = 0;
 | 
				
			||||||
        for(int i = 0; i < rank; i++) {
 | 
					        for(int i = 0; i < rank; i++) {
 | 
				
			||||||
@ -787,7 +787,7 @@ namespace shape {
 | 
				
			|||||||
        Nd4jLong *ret2 = shape::sliceOfShapeBuffer(sliceIndex, permuted);
 | 
					        Nd4jLong *ret2 = shape::sliceOfShapeBuffer(sliceIndex, permuted);
 | 
				
			||||||
        Nd4jLong tensorLength = shape::prodLong(tensorShape,tadRank);
 | 
					        Nd4jLong tensorLength = shape::prodLong(tensorShape,tadRank);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        int compLength = shape::isVector(ret2) ? shape::length(ret2) : shape::prod(tensorShape,tadRank);
 | 
					        Nd4jLong compLength = shape::isVector(ret2) ? shape::length(ret2) : shape::prodLong(tensorShape,tadRank);
 | 
				
			||||||
        // int temp;
 | 
					        // int temp;
 | 
				
			||||||
        // const bool isLikeVector = shape::isLikeVector(ret2, temp);
 | 
					        // const bool isLikeVector = shape::isLikeVector(ret2, temp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -75,7 +75,7 @@ std::vector<Nd4jLong> ShapeUtils::evalShapeForTensorDot(const Nd4jLong* aShapeIn
 | 
				
			|||||||
    permutBt = axesB;
 | 
					    permutBt = axesB;
 | 
				
			||||||
    permutBt.insert(permutBt.end(), list_B.begin(), list_B.end());
 | 
					    permutBt.insert(permutBt.end(), list_B.begin(), list_B.end());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int n2 = 1;
 | 
					    Nd4jLong n2 = 1;
 | 
				
			||||||
    for (int i = 0; i < axeAsize; i++)
 | 
					    for (int i = 0; i < axeAsize; i++)
 | 
				
			||||||
        n2 *= aShapeInfo[axesA[i] + 1];
 | 
					        n2 *= aShapeInfo[axesA[i] + 1];
 | 
				
			||||||
    shapeAt = {-1, n2};
 | 
					    shapeAt = {-1, n2};
 | 
				
			||||||
@ -86,7 +86,7 @@ std::vector<Nd4jLong> ShapeUtils::evalShapeForTensorDot(const Nd4jLong* aShapeIn
 | 
				
			|||||||
        oldShapeA[i] = aShapeInfo[list_A[i] + 1];
 | 
					        oldShapeA[i] = aShapeInfo[list_A[i] + 1];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int n3 = 1;
 | 
					    Nd4jLong n3 = 1;
 | 
				
			||||||
    for (int i = 0; i < axeBsize; i++)
 | 
					    for (int i = 0; i < axeBsize; i++)
 | 
				
			||||||
        n3 *= bShapeInfo[axesB[i] + 1];
 | 
					        n3 *= bShapeInfo[axesB[i] + 1];
 | 
				
			||||||
    shapeBt = {n3, -1};
 | 
					    shapeBt = {n3, -1};
 | 
				
			||||||
@ -553,7 +553,7 @@ std::vector<int> ShapeUtils::getDimsWithSameShape(const NDArray& max, const NDAr
 | 
				
			|||||||
Nd4jLong* ShapeUtils::evalTileShapeInfo(const NDArray& arr, const std::vector<Nd4jLong>& reps, nd4j::memory::Workspace* workspace) {
 | 
					Nd4jLong* ShapeUtils::evalTileShapeInfo(const NDArray& arr, const std::vector<Nd4jLong>& reps, nd4j::memory::Workspace* workspace) {
 | 
				
			||||||
    // check whether reps contains at least one zero (then throw exception) or whether all elements in reps are unities (then simply reshape or do nothing)
 | 
					    // check whether reps contains at least one zero (then throw exception) or whether all elements in reps are unities (then simply reshape or do nothing)
 | 
				
			||||||
    int repsSize = reps.size();
 | 
					    int repsSize = reps.size();
 | 
				
			||||||
    int product = 1;
 | 
					    Nd4jLong product = 1;
 | 
				
			||||||
    for(const auto& item : reps)
 | 
					    for(const auto& item : reps)
 | 
				
			||||||
        product *= item;
 | 
					        product *= item;
 | 
				
			||||||
    if(product == 0)
 | 
					    if(product == 0)
 | 
				
			||||||
 | 
				
			|||||||
@ -127,7 +127,7 @@ namespace shape {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    ND4J_EXPORT _CUDA_HD int tadIndexForLinear(int linearIndex, int tadLength);
 | 
					    ND4J_EXPORT _CUDA_HD int tadIndexForLinear(int linearIndex, int tadLength);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ND4J_EXPORT _CUDA_HD int tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength);
 | 
					    ND4J_EXPORT _CUDA_HD Nd4jLong tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ND4J_EXPORT _CUDA_HD bool canReshape(const int oldRank, Nd4jLong* oldShape, const int newRank, Nd4jLong* newShape, bool isFOrder);
 | 
					    ND4J_EXPORT _CUDA_HD bool canReshape(const int oldRank, Nd4jLong* oldShape, const int newRank, Nd4jLong* newShape, bool isFOrder);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -856,8 +856,6 @@ namespace shape {
 | 
				
			|||||||
 * Returns the prod of the data
 | 
					 * Returns the prod of the data
 | 
				
			||||||
 * up to the given length
 | 
					 * up to the given length
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
    ND4J_EXPORT _CUDA_HD int prod(Nd4jLong *data, int length);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    ND4J_EXPORT _CUDA_HD Nd4jLong prodLong(const Nd4jLong *data, int length);
 | 
					    ND4J_EXPORT _CUDA_HD Nd4jLong prodLong(const Nd4jLong *data, int length);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
@ -1055,12 +1053,12 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) {
 | 
				
			|||||||
* Length of a tad given
 | 
					* Length of a tad given
 | 
				
			||||||
* the shape information
 | 
					* the shape information
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
    INLINEDEF _CUDA_HD int tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength) {
 | 
					    INLINEDEF _CUDA_HD Nd4jLong tadLength(Nd4jLong *shapeInfo, int *dimension, int dimensionLength) {
 | 
				
			||||||
        if(dimensionLength == 1) {
 | 
					        if(dimensionLength == 1) {
 | 
				
			||||||
            return shape::shapeOf(shapeInfo)[dimension[0]];
 | 
					            return shape::shapeOf(shapeInfo)[dimension[0]];
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        else {
 | 
					        else {
 | 
				
			||||||
            int ret = 1;
 | 
					            Nd4jLong ret = 1;
 | 
				
			||||||
            for(int i = 0; i < shape::rank(shapeInfo); i++) {
 | 
					            for(int i = 0; i < shape::rank(shapeInfo); i++) {
 | 
				
			||||||
                for(int j = 0; j < dimensionLength; j++) {
 | 
					                for(int j = 0; j < dimensionLength; j++) {
 | 
				
			||||||
                    if(i == dimension[j])
 | 
					                    if(i == dimension[j])
 | 
				
			||||||
@ -1307,7 +1305,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) {
 | 
				
			|||||||
        traceNew(6);
 | 
					        traceNew(6);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        Nd4jLong *stride = new Nd4jLong[dimensions];
 | 
					        Nd4jLong *stride = new Nd4jLong[dimensions];
 | 
				
			||||||
        int st = startNum;
 | 
					        Nd4jLong st = startNum;
 | 
				
			||||||
        for (int j = 0; j < rank; j++) {
 | 
					        for (int j = 0; j < rank; j++) {
 | 
				
			||||||
            stride[j] = st;
 | 
					            stride[j] = st;
 | 
				
			||||||
            st *= shape[j];
 | 
					            st *= shape[j];
 | 
				
			||||||
@ -1326,7 +1324,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        //int dimensions = rank;
 | 
					        //int dimensions = rank;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        int st = startNum;
 | 
					        Nd4jLong st = startNum;
 | 
				
			||||||
        for (int j = 0; j < rank; j++) {
 | 
					        for (int j = 0; j < rank; j++) {
 | 
				
			||||||
            ret[j] = st;
 | 
					            ret[j] = st;
 | 
				
			||||||
            st *= shape[j];
 | 
					            st *= shape[j];
 | 
				
			||||||
@ -1361,7 +1359,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        // }
 | 
					        // }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        int st = startNum;
 | 
					        Nd4jLong st = startNum;
 | 
				
			||||||
        for (int j = rank - 1; j >= 0; j--) {
 | 
					        for (int j = rank - 1; j >= 0; j--) {
 | 
				
			||||||
            stride[j] = st;
 | 
					            stride[j] = st;
 | 
				
			||||||
            st *= shape[j];
 | 
					            st *= shape[j];
 | 
				
			||||||
@ -1383,7 +1381,7 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        // }
 | 
					        // }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        int st = startNum;
 | 
					        Nd4jLong st = startNum;
 | 
				
			||||||
        for (int j = rank - 1; j >= 0; j--) {
 | 
					        for (int j = rank - 1; j >= 0; j--) {
 | 
				
			||||||
            ret[j] = st;
 | 
					            ret[j] = st;
 | 
				
			||||||
            st *= shape[j];
 | 
					            st *= shape[j];
 | 
				
			||||||
@ -1513,8 +1511,8 @@ __device__ INLINEDEF Nd4jLong *cuMalloc(Nd4jLong *buffer, long size) {
 | 
				
			|||||||
            int oldnd;
 | 
					            int oldnd;
 | 
				
			||||||
            Nd4jLong *oldDims = shape::copyOf(rank, shape);
 | 
					            Nd4jLong *oldDims = shape::copyOf(rank, shape);
 | 
				
			||||||
            Nd4jLong *oldStrides = shape::copyOf(rank, stride);
 | 
					            Nd4jLong *oldStrides = shape::copyOf(rank, stride);
 | 
				
			||||||
            int np, op, last_stride;
 | 
					            Nd4jLong np, op, last_stride;
 | 
				
			||||||
            int oldStart, oldStop, ok, newStart, newStop, nk;
 | 
					            Nd4jLong oldStart, oldStop, ok, newStart, newStop, nk;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            traceNew(10);
 | 
					            traceNew(10);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -2042,13 +2040,12 @@ template <typename T>
 | 
				
			|||||||
 * @return
 | 
					 * @return
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
    INLINEDEF _CUDA_HD char getOrder(int length, Nd4jLong *shape, Nd4jLong *stride, int elementStride) {
 | 
					    INLINEDEF _CUDA_HD char getOrder(int length, Nd4jLong *shape, Nd4jLong *stride, int elementStride) {
 | 
				
			||||||
        int sd = -1;
 | 
					        Nd4jLong sd = 1;
 | 
				
			||||||
        int dim = -1;
 | 
					        int dim = -1;
 | 
				
			||||||
        int i = -1;
 | 
					        int i = -1;
 | 
				
			||||||
        int cContiguous = 1;
 | 
					        int cContiguous = 1;
 | 
				
			||||||
        int isFortran = 1;
 | 
					        int isFortran = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        sd = 1;
 | 
					 | 
				
			||||||
        for (i = length - 1; i >= 0; --i) {
 | 
					        for (i = length - 1; i >= 0; --i) {
 | 
				
			||||||
            dim = shape[i];
 | 
					            dim = shape[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -2235,7 +2232,7 @@ template <typename T>
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    INLINEDEF _CUDA_HD int oneDimEqualToLength(Nd4jLong *shape, int rank) {
 | 
					    INLINEDEF _CUDA_HD int oneDimEqualToLength(Nd4jLong *shape, int rank) {
 | 
				
			||||||
        for(int i = 0; i < rank; i++) {
 | 
					        for(int i = 0; i < rank; i++) {
 | 
				
			||||||
            if(shape[i] == shape::prod(shape,rank))
 | 
					            if(shape[i] == shape::prodLong(shape,rank))
 | 
				
			||||||
                return 1;
 | 
					                return 1;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -3103,11 +3100,11 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons
 | 
				
			|||||||
        if(shape::isVector(shape,rank)) {
 | 
					        if(shape::isVector(shape,rank)) {
 | 
				
			||||||
            //return total length for row vectors
 | 
					            //return total length for row vectors
 | 
				
			||||||
            if(dimensionLength == 1 && shape[0] == 1) {
 | 
					            if(dimensionLength == 1 && shape[0] == 1) {
 | 
				
			||||||
                return shape::prod(shape,rank);
 | 
					                return shape::prodLong(shape,rank);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        else if(rank == dimensionLength)
 | 
					        else if(rank == dimensionLength)
 | 
				
			||||||
            return shape::prod(shape,rank);
 | 
					            return shape::prodLong(shape,rank);
 | 
				
			||||||
        int absSelta = nd4j::math::nd4j_abs<int>(rank - dimensionLength);
 | 
					        int absSelta = nd4j::math::nd4j_abs<int>(rank - dimensionLength);
 | 
				
			||||||
        traceNew(27);
 | 
					        traceNew(27);
 | 
				
			||||||
        auto ret2 = shape::removeIndex<Nd4jLong>(shape, dimension, rank, dimensionLength);
 | 
					        auto ret2 = shape::removeIndex<Nd4jLong>(shape, dimension, rank, dimensionLength);
 | 
				
			||||||
@ -3554,18 +3551,6 @@ INLINEDEF _CUDA_HD bool haveSameShapeAndStrides(const Nd4jLong *shapeInfo1, cons
 | 
				
			|||||||
        return ret;
 | 
					        return ret;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					 | 
				
			||||||
 * Returns the prod of the data
 | 
					 | 
				
			||||||
 * up to the given length
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
    INLINEDEF _CUDA_HD int prod(Nd4jLong *data, int length) {
 | 
					 | 
				
			||||||
        int prod = 1;
 | 
					 | 
				
			||||||
        for (int i = 0; i < length; i++) {
 | 
					 | 
				
			||||||
            prod *= data[i];
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return prod;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * Returns the prod of the data
 | 
					 * Returns the prod of the data
 | 
				
			||||||
@ -3956,7 +3941,7 @@ INLINEDEF _CUDA_H bool reshapeC(const int oldRank, const Nd4jLong* oldShapeInfo,
 | 
				
			|||||||
        Nd4jLong* newStrides       = shape::stride(newShapeInfo);
 | 
					        Nd4jLong* newStrides       = shape::stride(newShapeInfo);
 | 
				
			||||||
        const Nd4jLong* oldShape   = shape::shapeOf(const_cast<Nd4jLong*>(oldShapeInfo));
 | 
					        const Nd4jLong* oldShape   = shape::shapeOf(const_cast<Nd4jLong*>(oldShapeInfo));
 | 
				
			||||||
        const Nd4jLong* oldStrides = shape::stride(const_cast<Nd4jLong*>(oldShapeInfo));
 | 
					        const Nd4jLong* oldStrides = shape::stride(const_cast<Nd4jLong*>(oldShapeInfo));
 | 
				
			||||||
        int oldStart(0), oldStop(1), newStart(0), newStop(1), newDim, oldDim;
 | 
					        Nd4jLong oldStart(0), oldStop(1), newStart(0), newStop(1), newDim, oldDim;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        while (newStart < newRank && oldStart < oldRank) {
 | 
					        while (newStart < newRank && oldStart < oldRank) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -3995,11 +3980,11 @@ INLINEDEF _CUDA_H bool reshapeC(const int oldRank, const Nd4jLong* oldShapeInfo,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    INLINEDEF _CUDA_H bool canReshape(const int oldRank, Nd4jLong* oldShape, const int newRank, Nd4jLong* newShapeOf, bool isFOrder) {
 | 
					    INLINEDEF _CUDA_H bool canReshape(const int oldRank, Nd4jLong* oldShape, const int newRank, Nd4jLong* newShapeOf, bool isFOrder) {
 | 
				
			||||||
        int oldnd;
 | 
					        Nd4jLong oldnd;
 | 
				
			||||||
        Nd4jLong* oldDims = shape::copyOf(oldRank, shape::shapeOf(oldShape));
 | 
					        Nd4jLong* oldDims = shape::copyOf(oldRank, shape::shapeOf(oldShape));
 | 
				
			||||||
        Nd4jLong* oldStrides = shape::copyOf(oldRank, shape::stride(oldShape));
 | 
					        Nd4jLong* oldStrides = shape::copyOf(oldRank, shape::stride(oldShape));
 | 
				
			||||||
        int np, op, last_stride;
 | 
					        Nd4jLong np, op, last_stride;
 | 
				
			||||||
        int oldStart, oldStop, ok, newStart, newStop, nk;
 | 
					        Nd4jLong oldStart, oldStop, ok, newStart, newStop, nk;
 | 
				
			||||||
        auto newStrides = new Nd4jLong[newRank];
 | 
					        auto newStrides = new Nd4jLong[newRank];
 | 
				
			||||||
        oldnd = 0;
 | 
					        oldnd = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -229,7 +229,7 @@ CUSTOM_OP_IMPL(deconv2d_bp, 3, 2, false, 0, 9) {
 | 
				
			|||||||
    // ----- calculation of gradB ----- //
 | 
					    // ----- calculation of gradB ----- //
 | 
				
			||||||
    if(gradB) {
 | 
					    if(gradB) {
 | 
				
			||||||
        if(gradB->rankOf() == 2)
 | 
					        if(gradB->rankOf() == 2)
 | 
				
			||||||
            gradB = new NDArray(gradB->reshape(gradB->ordering(), {(int)gradB->lengthOf()}));
 | 
					            gradB = new NDArray(gradB->reshape(gradB->ordering(), {gradB->lengthOf()}));
 | 
				
			||||||
        gradO->reduceAlongDimension(reduce::Sum, gradB, {0, 2, 3});                                // sum over bS, oH, oW
 | 
					        gradO->reduceAlongDimension(reduce::Sum, gradB, {0, 2, 3});                                // sum over bS, oH, oW
 | 
				
			||||||
        if(gradB != OUTPUT_VARIABLE(2))
 | 
					        if(gradB != OUTPUT_VARIABLE(2))
 | 
				
			||||||
            delete gradB;
 | 
					            delete gradB;
 | 
				
			||||||
 | 
				
			|||||||
@ -53,7 +53,7 @@ namespace nd4j {
 | 
				
			|||||||
            // now let's build subarrays
 | 
					            // now let's build subarrays
 | 
				
			||||||
            int cnt = 0;
 | 
					            int cnt = 0;
 | 
				
			||||||
            std::vector<Nd4jLong> indices(2 * array->rankOf(), 0);
 | 
					            std::vector<Nd4jLong> indices(2 * array->rankOf(), 0);
 | 
				
			||||||
            for (int e = 0; e < sizes->lengthOf(); e++) {
 | 
					            for (Nd4jLong e = 0; e < sizes->lengthOf(); e++) {
 | 
				
			||||||
                int c_size = sizes->e<int>(e);
 | 
					                int c_size = sizes->e<int>(e);
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                REQUIRE_TRUE(c_size > 0, 0, "Slice size should have postive value, but got %i instead", c_size);
 | 
					                REQUIRE_TRUE(c_size > 0, 0, "Slice size should have postive value, but got %i instead", c_size);
 | 
				
			||||||
 | 
				
			|||||||
@ -42,10 +42,10 @@ CONFIGURABLE_OP_IMPL(betainc, 3, 1, false, 0, 0) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    REQUIRE_TRUE(a->isSameShape(b) && a->isSameShape(x), 0, "CONFIGURABLE_OP betainc: all three input arrays must have the same shapes, bit got a=%s, b=%s and x=%s instead !", ShapeUtils::shapeAsString(a).c_str(), ShapeUtils::shapeAsString(b).c_str(), ShapeUtils::shapeAsString(x).c_str());
 | 
					    REQUIRE_TRUE(a->isSameShape(b) && a->isSameShape(x), 0, "CONFIGURABLE_OP betainc: all three input arrays must have the same shapes, bit got a=%s, b=%s and x=%s instead !", ShapeUtils::shapeAsString(a).c_str(), ShapeUtils::shapeAsString(b).c_str(), ShapeUtils::shapeAsString(x).c_str());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int arrLen = a->lengthOf();
 | 
					    Nd4jLong arrLen = a->lengthOf();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // FIXME: this stuff should be single op call. No sense rolling over couple of arrays twice
 | 
					    // FIXME: this stuff should be single op call. No sense rolling over couple of arrays twice
 | 
				
			||||||
    for(int i = 0; i < arrLen; ++i ) {
 | 
					    for(Nd4jLong i = 0; i < arrLen; ++i ) {
 | 
				
			||||||
        REQUIRE_TRUE(a->e<float>(i) > 0.f,   0, "BETAINC op: arrays a array must contain only elements > 0 !");
 | 
					        REQUIRE_TRUE(a->e<float>(i) > 0.f,   0, "BETAINC op: arrays a array must contain only elements > 0 !");
 | 
				
			||||||
        REQUIRE_TRUE(b->e<float>(i) > 0.f,   0, "BETAINC op: arrays b array must contain only elements > 0 !");
 | 
					        REQUIRE_TRUE(b->e<float>(i) > 0.f,   0, "BETAINC op: arrays b array must contain only elements > 0 !");
 | 
				
			||||||
        REQUIRE_TRUE(0.f <= x->e<float>(i) && x->e<float>(i) <= 1.f, 0, "BETAINC op: all elements of x array must be within [0, 1] range!");
 | 
					        REQUIRE_TRUE(0.f <= x->e<float>(i) && x->e<float>(i) <= 1.f, 0, "BETAINC op: all elements of x array must be within [0, 1] range!");
 | 
				
			||||||
 | 
				
			|||||||
@ -35,7 +35,7 @@ CONFIGURABLE_OP_IMPL(polygamma, 2, 1, false, 0, 0) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    REQUIRE_TRUE(n->isSameShape(x), 0, "POLYGAMMA op: two input arrays n and x must have the same shapes, but got n=%s and x=%s instead !", ShapeUtils::shapeAsString(n).c_str(), ShapeUtils::shapeAsString(x).c_str());
 | 
					    REQUIRE_TRUE(n->isSameShape(x), 0, "POLYGAMMA op: two input arrays n and x must have the same shapes, but got n=%s and x=%s instead !", ShapeUtils::shapeAsString(n).c_str(), ShapeUtils::shapeAsString(x).c_str());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int arrLen = n->lengthOf();
 | 
					    Nd4jLong arrLen = n->lengthOf();
 | 
				
			||||||
    // FIXME: this shit should be single op call, not a loop!
 | 
					    // FIXME: this shit should be single op call, not a loop!
 | 
				
			||||||
    auto nPositive =  n->reduceNumber(nd4j::reduce::IsPositive, nullptr);
 | 
					    auto nPositive =  n->reduceNumber(nd4j::reduce::IsPositive, nullptr);
 | 
				
			||||||
    auto xPositive =  x->reduceNumber(nd4j::reduce::IsPositive, nullptr);
 | 
					    auto xPositive =  x->reduceNumber(nd4j::reduce::IsPositive, nullptr);
 | 
				
			||||||
 | 
				
			|||||||
@ -46,7 +46,7 @@ namespace ops {
 | 
				
			|||||||
        int pos = 0;
 | 
					        int pos = 0;
 | 
				
			||||||
        std::vector<Nd4jLong> indices(2 * input->rankOf());
 | 
					        std::vector<Nd4jLong> indices(2 * input->rankOf());
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        for (int e = 0; e < sizes->lengthOf(); e++) {
 | 
					        for (Nd4jLong e = 0; e < sizes->lengthOf(); e++) {
 | 
				
			||||||
            int c_size = sizes->e<int>(e);
 | 
					            int c_size = sizes->e<int>(e);
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            for (int d = 0; d < input->rankOf(); d++) {
 | 
					            for (int d = 0; d < input->rankOf(); d++) {
 | 
				
			||||||
@ -103,7 +103,7 @@ namespace ops {
 | 
				
			|||||||
        
 | 
					        
 | 
				
			||||||
        auto length = sizes->lengthOf();
 | 
					        auto length = sizes->lengthOf();
 | 
				
			||||||
        int pos = 0;
 | 
					        int pos = 0;
 | 
				
			||||||
        for (int e = 0; e < length; e++) {
 | 
					        for (Nd4jLong e = 0; e < length; e++) {
 | 
				
			||||||
            int c_size = sizes->e<int>(e);
 | 
					            int c_size = sizes->e<int>(e);
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -38,7 +38,7 @@ namespace nd4j {
 | 
				
			|||||||
                REQUIRE_TRUE(v >= 0 && v < input->rankOf(), 0, "Tear dimensions should be non-negative values, and lower then input rank. Got %i instead", v);
 | 
					                REQUIRE_TRUE(v >= 0 && v < input->rankOf(), 0, "Tear dimensions should be non-negative values, and lower then input rank. Got %i instead", v);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            auto tads = input->allTensorsAlongDimension(dims);
 | 
					            auto tads = input->allTensorsAlongDimension(dims);
 | 
				
			||||||
            for (int e = 0; e < tads->size(); e++) {
 | 
					            for (Nd4jLong e = 0; e < tads->size(); e++) {
 | 
				
			||||||
                auto outE = OUTPUT_VARIABLE(e);
 | 
					                auto outE = OUTPUT_VARIABLE(e);
 | 
				
			||||||
                outE->assign(tads->at(e));
 | 
					                outE->assign(tads->at(e));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -62,7 +62,7 @@ namespace nd4j {
 | 
				
			|||||||
            auto numTads = tadPack.numberOfTads();
 | 
					            auto numTads = tadPack.numberOfTads();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            auto result = SHAPELIST();
 | 
					            auto result = SHAPELIST();
 | 
				
			||||||
            for (int e = 0; e < numTads; e++) {
 | 
					            for (Nd4jLong e = 0; e < numTads; e++) {
 | 
				
			||||||
                auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), shape::order(inShape), shape::rank(tadPack.primaryShapeInfo()), shape::shapeOf(tadPack.primaryShapeInfo()));
 | 
					                auto newShape = ConstantShapeHelper::getInstance()->createShapeInfo(block.dataType(), shape::order(inShape), shape::rank(tadPack.primaryShapeInfo()), shape::shapeOf(tadPack.primaryShapeInfo()));
 | 
				
			||||||
                result->push_back(newShape);
 | 
					                result->push_back(newShape);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
				
			|||||||
@ -34,10 +34,10 @@ namespace nd4j {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            REQUIRE_TRUE(x->isSameShape(q), 0, "ZETA op: two input arrays must have the same shapes, bot got x=%s and q=%s !", ShapeUtils::shapeAsString(x).c_str(), ShapeUtils::shapeAsString(q).c_str());
 | 
					            REQUIRE_TRUE(x->isSameShape(q), 0, "ZETA op: two input arrays must have the same shapes, bot got x=%s and q=%s !", ShapeUtils::shapeAsString(x).c_str(), ShapeUtils::shapeAsString(q).c_str());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            int arrLen = x->lengthOf();
 | 
					            Nd4jLong arrLen = x->lengthOf();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // FIXME: this should NOT be loop.
 | 
					            // FIXME: this should NOT be loop.
 | 
				
			||||||
            for(int i = 0; i < arrLen; ++i ) {
 | 
					            for(Nd4jLong i = 0; i < arrLen; ++i ) {
 | 
				
			||||||
                REQUIRE_TRUE(x->e<float>(i) > 1.f, 0, "ZETA op: all elements of x array must be > 1 !");
 | 
					                REQUIRE_TRUE(x->e<float>(i) > 1.f, 0, "ZETA op: all elements of x array must be > 1 !");
 | 
				
			||||||
                REQUIRE_TRUE(q->e<float>(i) > 0.f, 0, "ZETA op: all elements of q array must be > 0 !");
 | 
					                REQUIRE_TRUE(q->e<float>(i) > 0.f, 0, "ZETA op: all elements of q array must be > 0 !");
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
				
			|||||||
@ -54,14 +54,14 @@ namespace nd4j {
 | 
				
			|||||||
                int e2 = e;
 | 
					                int e2 = e;
 | 
				
			||||||
                for (; e < (int) arguments->size(); e++) {
 | 
					                for (; e < (int) arguments->size(); e++) {
 | 
				
			||||||
                    if (arguments->at(e) == -1){
 | 
					                    if (arguments->at(e) == -1){
 | 
				
			||||||
                        long shapeLength = 1;
 | 
					                        Nd4jLong shapeLength = 1;
 | 
				
			||||||
                        for(; e2 < e; e2++){
 | 
					                        for(; e2 < e; e2++){
 | 
				
			||||||
                            shapeLength *= arguments->at(e2);
 | 
					                            shapeLength *= arguments->at(e2);
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                        for(e2 = e + 1; e2 < arguments->size(); e2++){
 | 
					                        for(e2 = e + 1; e2 < arguments->size(); e2++){
 | 
				
			||||||
                            shapeLength *= arguments->at(e2);
 | 
					                            shapeLength *= arguments->at(e2);
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                        long realShape = x->lengthOf() / shapeLength;
 | 
					                        Nd4jLong realShape = x->lengthOf() / shapeLength;
 | 
				
			||||||
                        shapeNew.push_back(realShape);
 | 
					                        shapeNew.push_back(realShape);
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                    else{
 | 
					                    else{
 | 
				
			||||||
@ -109,16 +109,15 @@ namespace nd4j {
 | 
				
			|||||||
                for (int e = 0; e < (int) s->lengthOf(); e++) {
 | 
					                for (int e = 0; e < (int) s->lengthOf(); e++) {
 | 
				
			||||||
                    auto dim = s->e<Nd4jLong >(e);
 | 
					                    auto dim = s->e<Nd4jLong >(e);
 | 
				
			||||||
                    if (dim == -1){
 | 
					                    if (dim == -1){
 | 
				
			||||||
                        long shapeLength = 1;
 | 
					                        Nd4jLong shapeLength = 1;
 | 
				
			||||||
                        for(int e2 = 0; e2 < e; e2++){
 | 
					                        for(int e2 = 0; e2 < e; e2++){
 | 
				
			||||||
                            shapeLength *= s->e<Nd4jLong>(e2);
 | 
					                            shapeLength *= s->e<Nd4jLong>(e2);
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                        for(int e2 = e + 1; e2 < (int) s->lengthOf(); e2++){
 | 
					                        for(int e2 = e + 1; e2 < (int) s->lengthOf(); e2++){
 | 
				
			||||||
                            REQUIRE_TRUE(s->e<Nd4jLong>(e2) != -1, 0, "Reshape : Only one unknown dimension (-1) is allowed.");
 | 
					                            REQUIRE_TRUE(s->e<Nd4jLong>(e2) != -1, 0, "Reshape : Only one unknown dimension (-1) is allowed.");
 | 
				
			||||||
                            shapeLength *=
 | 
					                            shapeLength *= s->e<Nd4jLong>(e2);
 | 
				
			||||||
                                    s->e<Nd4jLong>(e2);
 | 
					 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                        long realShape = x->lengthOf() / shapeLength;
 | 
					                        Nd4jLong realShape = x->lengthOf() / shapeLength;
 | 
				
			||||||
                        shapeNew[e] = realShape;
 | 
					                        shapeNew[e] = realShape;
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                    else{
 | 
					                    else{
 | 
				
			||||||
@ -187,7 +186,7 @@ namespace nd4j {
 | 
				
			|||||||
                for (; e < (int) arguments->size(); e++) {
 | 
					                for (; e < (int) arguments->size(); e++) {
 | 
				
			||||||
                    if ((int) arguments->at(e) == -1){
 | 
					                    if ((int) arguments->at(e) == -1){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        long shapeLength = 1;
 | 
					                        Nd4jLong shapeLength = 1;
 | 
				
			||||||
                        for(; e2 < e; e2 ++){
 | 
					                        for(; e2 < e; e2 ++){
 | 
				
			||||||
                            shapeLength *= arguments->at(e2);
 | 
					                            shapeLength *= arguments->at(e2);
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
@ -201,7 +200,7 @@ namespace nd4j {
 | 
				
			|||||||
                            shapeNew.push_back(0);
 | 
					                            shapeNew.push_back(0);
 | 
				
			||||||
                        } else {
 | 
					                        } else {
 | 
				
			||||||
                            //Standard case
 | 
					                            //Standard case
 | 
				
			||||||
                            long realShape = shape::length(inp) / shapeLength;
 | 
					                            Nd4jLong realShape = shape::length(inp) / shapeLength;
 | 
				
			||||||
                            shapeNew.push_back(realShape);
 | 
					                            shapeNew.push_back(realShape);
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
@ -240,7 +239,7 @@ namespace nd4j {
 | 
				
			|||||||
                for (int e = 0; e < (int) y->lengthOf(); e++) {
 | 
					                for (int e = 0; e < (int) y->lengthOf(); e++) {
 | 
				
			||||||
                    auto dim = y->e<Nd4jLong>(e);
 | 
					                    auto dim = y->e<Nd4jLong>(e);
 | 
				
			||||||
                    if (dim == -1){
 | 
					                    if (dim == -1){
 | 
				
			||||||
                        long shapeLength = 1;
 | 
					                        Nd4jLong shapeLength = 1;
 | 
				
			||||||
                        for(int e2 = 0; e2 < e; e2++){
 | 
					                        for(int e2 = 0; e2 < e; e2++){
 | 
				
			||||||
                            shapeLength *= y->e<Nd4jLong>(e2);
 | 
					                            shapeLength *= y->e<Nd4jLong>(e2);
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
@ -253,7 +252,7 @@ namespace nd4j {
 | 
				
			|||||||
                            //Edge case for empty:
 | 
					                            //Edge case for empty:
 | 
				
			||||||
                            shapeNew[e] = 0;
 | 
					                            shapeNew[e] = 0;
 | 
				
			||||||
                        } else {
 | 
					                        } else {
 | 
				
			||||||
                            long realShape = shape::length(inp) / shapeLength;
 | 
					                            Nd4jLong realShape = shape::length(inp) / shapeLength;
 | 
				
			||||||
                            shapeNew[e] = realShape;
 | 
					                            shapeNew[e] = realShape;
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                    }else {
 | 
					                    }else {
 | 
				
			||||||
 | 
				
			|||||||
@ -41,7 +41,7 @@ namespace nd4j {
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
            else if (block.width() > 1) {
 | 
					            else if (block.width() > 1) {
 | 
				
			||||||
                auto a = INPUT_VARIABLE(1);
 | 
					                auto a = INPUT_VARIABLE(1);
 | 
				
			||||||
                for (int e = 0; e < a->lengthOf(); e++) {
 | 
					                for (Nd4jLong e = 0; e < a->lengthOf(); e++) {
 | 
				
			||||||
                    int _a = a->e<int>(e);
 | 
					                    int _a = a->e<int>(e);
 | 
				
			||||||
                    
 | 
					                    
 | 
				
			||||||
                    if (_a < 0)
 | 
					                    if (_a < 0)
 | 
				
			||||||
 | 
				
			|||||||
@ -31,14 +31,14 @@ namespace nd4j {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                if(isStrictlyIncreasing) {
 | 
					                if(isStrictlyIncreasing) {
 | 
				
			||||||
                    PRAGMA_OMP_PARALLEL_FOR_SIMD_REDUCTION(+:sum)
 | 
					                    PRAGMA_OMP_PARALLEL_FOR_SIMD_REDUCTION(+:sum)
 | 
				
			||||||
                    for (int i = 0; i < length - 1; i++) {
 | 
					                    for (Nd4jLong i = 0; i < length - 1; i++) {
 | 
				
			||||||
                        auto val0 = input->t<T>(i);
 | 
					                        auto val0 = input->t<T>(i);
 | 
				
			||||||
                        auto val1 = input->t<T>(i + 1);
 | 
					                        auto val1 = input->t<T>(i + 1);
 | 
				
			||||||
                        sum += val0 >= val1 ? -1 : 0;
 | 
					                        sum += val0 >= val1 ? -1 : 0;
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                } else {
 | 
					                } else {
 | 
				
			||||||
                    PRAGMA_OMP_PARALLEL_FOR_SIMD_REDUCTION(+:sum)
 | 
					                    PRAGMA_OMP_PARALLEL_FOR_SIMD_REDUCTION(+:sum)
 | 
				
			||||||
                    for (int i = 0; i < length - 1; i++) {
 | 
					                    for (Nd4jLong i = 0; i < length - 1; i++) {
 | 
				
			||||||
                        auto val0 = input->t<T>(i);
 | 
					                        auto val0 = input->t<T>(i);
 | 
				
			||||||
                        auto val1 = input->t<T>(i + 1);
 | 
					                        auto val1 = input->t<T>(i + 1);
 | 
				
			||||||
                        sum += val0 > val1 ? -1 : 0;
 | 
					                        sum += val0 > val1 ? -1 : 0;
 | 
				
			||||||
 | 
				
			|||||||
@ -39,7 +39,7 @@ void crossBatched(nd4j::LaunchContext * context, NDArray *a, NDArray *b, NDArray
 | 
				
			|||||||
    int tads = tadsA->size();
 | 
					    int tads = tadsA->size();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
					    PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
				
			||||||
    for (int e = 0; e < tads; e++) {
 | 
					    for (Nd4jLong e = 0; e < tads; e++) {
 | 
				
			||||||
        auto a_ = tadsA->at(e);
 | 
					        auto a_ = tadsA->at(e);
 | 
				
			||||||
        auto b_ = tadsB->at(e);
 | 
					        auto b_ = tadsB->at(e);
 | 
				
			||||||
        auto o_ = tadsO->at(e);
 | 
					        auto o_ = tadsO->at(e);
 | 
				
			||||||
 | 
				
			|||||||
@ -43,7 +43,7 @@ static void triuBP_(nd4j::LaunchContext * context, const NDArray& input, const N
 | 
				
			|||||||
    int dLen = dOdI.lengthOf();
 | 
					    int dLen = dOdI.lengthOf();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PRAGMA_OMP_PARALLEL_FOR_IF(dLen > Environment::getInstance()->elementwiseThreshold())
 | 
					    PRAGMA_OMP_PARALLEL_FOR_IF(dLen > Environment::getInstance()->elementwiseThreshold())
 | 
				
			||||||
    for(int i = 0; i < dLen; ++i) {
 | 
					    for(Nd4jLong i = 0; i < dLen; ++i) {
 | 
				
			||||||
        if(dOdI.t<T>(i) != static_cast<T>(0.f))
 | 
					        if(dOdI.t<T>(i) != static_cast<T>(0.f))
 | 
				
			||||||
            dOdI.t<T>(i) = static_cast<T>(1.f);
 | 
					            dOdI.t<T>(i) = static_cast<T>(1.f);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -94,7 +94,7 @@ namespace nd4j {
 | 
				
			|||||||
                //indices->printIndexedBuffer("indices");
 | 
					                //indices->printIndexedBuffer("indices");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                std::vector<int> dims(indices->lengthOf());
 | 
					                std::vector<int> dims(indices->lengthOf());
 | 
				
			||||||
                for (int e = 0; e < indices->lengthOf(); e++) {
 | 
					                for (Nd4jLong e = 0; e < indices->lengthOf(); e++) {
 | 
				
			||||||
                    // lol otherwise we segfault on macOS
 | 
					                    // lol otherwise we segfault on macOS
 | 
				
			||||||
                    int f = indices->e<int>(e);
 | 
					                    int f = indices->e<int>(e);
 | 
				
			||||||
                    dims[e] = f >= 0 ? f : f += x->rankOf();
 | 
					                    dims[e] = f >= 0 ? f : f += x->rankOf();
 | 
				
			||||||
 | 
				
			|||||||
@ -66,17 +66,17 @@ void SpecialMethods<T>::concatCpuGeneric(const std::vector<NDArray*>& inArrs, ND
 | 
				
			|||||||
                PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
					                PRAGMA_OMP_PARALLEL_FOR_SIMD
 | 
				
			||||||
                for (uint r = 0; r < numOfArrs; r++) {
 | 
					                for (uint r = 0; r < numOfArrs; r++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    const uint arrLen = inArrs[r]->lengthOf();
 | 
					                    const Nd4jLong arrLen = inArrs[r]->lengthOf();
 | 
				
			||||||
                    const uint xEws    = (arrLen == 1) ? 1 : inArrs[r]->stridesOf()[nonUnityDim[r]];
 | 
					                    const uint xEws    = (arrLen == 1) ? 1 : inArrs[r]->stridesOf()[nonUnityDim[r]];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    T *z = outBuff + zOffset[r];
 | 
					                    T *z = outBuff + zOffset[r];
 | 
				
			||||||
                    T *x = inArrs[r]->bufferAsT<T>();
 | 
					                    T *x = inArrs[r]->bufferAsT<T>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    if(outEws == 1 && xEws == 1)
 | 
					                    if(outEws == 1 && xEws == 1)
 | 
				
			||||||
                        for (uint e = 0; e < arrLen; e++)
 | 
					                        for (Nd4jLong e = 0; e < arrLen; e++)
 | 
				
			||||||
                            z[e] = x[e];
 | 
					                            z[e] = x[e];
 | 
				
			||||||
                    else
 | 
					                    else
 | 
				
			||||||
                        for (uint e = 0; e < arrLen; e++)
 | 
					                        for (Nd4jLong e = 0; e < arrLen; e++)
 | 
				
			||||||
                            z[e * outEws] = x[e * xEws];
 | 
					                            z[e * outEws] = x[e * xEws];
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                return;
 | 
					                return;
 | 
				
			||||||
 | 
				
			|||||||
@ -382,7 +382,7 @@ TEST_F(LegacyOpsTests, Test_IsMax_1) {
 | 
				
			|||||||
            z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), extra, nullptr, nullptr);
 | 
					            z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), extra, nullptr, nullptr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // z.printIndexedBuffer("z");
 | 
					    // z.printIndexedBuffer("z");
 | 
				
			||||||
    for (int e = 0; e < z.lengthOf(); e++) {
 | 
					    for (Nd4jLong e = 0; e < z.lengthOf(); e++) {
 | 
				
			||||||
        ASSERT_TRUE(z.e<double>(e) >= 0);
 | 
					        ASSERT_TRUE(z.e<double>(e) >= 0);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -402,7 +402,7 @@ TEST_F(LegacyOpsTests, Test_IsMax_2) {
 | 
				
			|||||||
            z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), extra, nullptr, nullptr);
 | 
					            z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), extra, nullptr, nullptr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // z.printIndexedBuffer("z");
 | 
					    // z.printIndexedBuffer("z");
 | 
				
			||||||
 for (int e = 0; e < z.lengthOf(); e++) {
 | 
					 for (Nd4jLong e = 0; e < z.lengthOf(); e++) {
 | 
				
			||||||
     if (e >= z.lengthOf() / 2)
 | 
					     if (e >= z.lengthOf() / 2)
 | 
				
			||||||
         ASSERT_TRUE(z.e<bool>(e));
 | 
					         ASSERT_TRUE(z.e<bool>(e));
 | 
				
			||||||
     else
 | 
					     else
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user