/******************************************************************************* * Copyright (c) 2015-2018 Skymind, Inc. * * This program and the accompanying materials are made available under the * terms of the Apache License, Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * SPDX-License-Identifier: Apache-2.0 ******************************************************************************/ #ifndef NDARRAY_CPP #define NDARRAY_CPP #include "../NDArray.h" #include "../NDArrayFactory.h" #include "NativeOpExecutioner.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace nd4j { //////////////////////////////////////////////////////////////////////// void* NDArray::platformBuffer() { return buffer(); } void* NDArray::getPlatformBuffer() const { return getBuffer(); } Nd4jLong* NDArray::getPlatformShapeInfo() const { return getShapeInfo(); } Nd4jLong* NDArray::platformShapeInfo() { return shapeInfo(); } void NDArray::syncToDevice() const { } void NDArray::syncToHost() const { } void NDArray::tickWriteHost() const { } void NDArray::tickWriteDevice() const { } void NDArray::tickReadHost() const { } void NDArray::tickReadDevice() const { } void NDArray::tickBothActual() const { } bool NDArray::isActualOnHostSide() const { return true; } bool NDArray::isActualOnDeviceSide() const { return true; } void NDArray::makeBothBuffersActual() const { } //////////////////////////////////////////////////////////////////////// template void NDArray::fillAsTriangular(const float val, int lower, int upper, const char direction, NDArray* target) { if (isS()) throw std::runtime_error("NDArray::fillArrayAsTriangular: you can't use this method on String array!"); if(target == nullptr) target = this; if(!isSameShape(target) && !(rankOf() == 1 && target->rankOf() == 2 && sizeAt(0) == target->sizeAt(0) && sizeAt(0) == target->sizeAt(1))) throw std::string("NDArray::fillArrayAsTriangular method: wrong shape of target array !"); if (direction == 'u') lower = -target->sizeAt(-2); else if (direction == 'l') upper = target->sizeAt(-1); const T value = static_cast(val); const auto x = reinterpret_cast(getBuffer()); auto z = reinterpret_cast(target->getBuffer()); const int xRank = rankOf(); const int zRank = target->rankOf(); const auto zLen = target->lengthOf(); const bool areSameOffsets = shape::haveSameShapeAndStrides(getShapeInfo(), target->getShapeInfo()); std::vector coords(zRank); PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(zLen > Environment::getInstance()->elementwiseThreshold()) firstprivate(coords)) for (Nd4jLong i = 0; i < zLen; ++i) { shape::index2coords(zRank, target->shapeOf(), i, zLen, coords.data()); const auto zOffset = shape::getOffset(0, target->shapeOf(), target->stridesOf(), coords.data(), zRank); // if( (row + upper < col) || (row + lower > col) ) if((coords[zRank - 2] + upper < coords[zRank - 1]) || (coords[zRank - 2] + lower > coords[zRank - 1])) z[zOffset] = value; else if(this != target) { // when this and target are different arrays if(xRank != zRank) coords[0] = coords[1]; const auto xOffset = areSameOffsets ? zOffset : shape::getOffset(0, shapeOf(), stridesOf(), coords.data(), xRank); z[zOffset] = x[xOffset]; } } } BUILD_SINGLE_TEMPLATE(template void NDArray::fillAsTriangular, (const float val, int lower, int upper, const char direction, NDArray* target), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// void NDArray::setIdentity() { if (isS()) throw std::runtime_error("NDArray::setIdentity: you can't use this method on String array!"); this->nullify(); int rank = rankOf(); auto shape = shapeOf(); auto strides = stridesOf(); int minDim = MAX_INT; Nd4jLong indices[MAX_RANK]; for(int j = 0; j < rank; ++j) indices[j] = 1; Nd4jLong offset = shape::getOffset(0, shape, strides, indices, rank); for(int i = 0; i < rank; ++i) if(minDim > shape[i]) minDim = shape[i]; float v = 1.0f; PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(minDim > Environment::getInstance()->elementwiseThreshold()) schedule(guided)) for(int i = 0; i < minDim; ++i) templatedSet(buffer(), i*offset, this->dataType(), &v); } //////////////////////////////////////////////////////////////////////// template static void templatedSwap(void *xBuffer, void *yBuffer, Nd4jLong length) { auto x = reinterpret_cast(xBuffer); auto y = reinterpret_cast(yBuffer); PRAGMA_OMP_PARALLEL_FOR_SIMD_ARGS(schedule(static)) for (Nd4jLong i = 0; i < length; ++i) { auto temp = x[i]; x[i] = y[i]; y[i] = temp; } } BUILD_SINGLE_TEMPLATE(template void templatedSwap, (void *xBuffer, void *yBuffer, Nd4jLong length), LIBND4J_TYPES); //////////////////////////////////////////////////////////////////////// void NDArray::swapUnsafe(NDArray& other) { auto xType = this->dataType(); if (xType != other.dataType()) throw std::runtime_error("NDArray::swapUnsage method: both arrays must have the same data type"); if(buffer() == nullptr || other.buffer() == nullptr) throw std::runtime_error("NDArray::swapUnsafe method: input array should not be empty!"); if(lengthOf() != other.lengthOf()) throw std::runtime_error("NDArray::swapUnsafe method: input arrays should have the same length!"); BUILD_SINGLE_SELECTOR(xType, templatedSwap, (buffer(), other.buffer(), this->lengthOf()), LIBND4J_TYPES); } //////////////////////////////////////////////////////////////////////// void NDArray::synchronize(const char* msg) const { // no-op } void NDArray::prepareSpecialUse(const std::initializer_list& writeList, const std::initializer_list& readList, bool synchronizeWritables) { // no-op } void NDArray::registerSpecialUse(const std::initializer_list& writeList, const std::initializer_list& readList) { // no-op } void NDArray::preparePrimaryUse(const std::initializer_list& writeList, const std::initializer_list& readList, bool synchronizeWritables) { // no-op } void NDArray::registerPrimaryUse(const std::initializer_list& writeList, const std::initializer_list& readList) { // no-op } void NDArray::syncShape() const { // no-op } ////////////////////////////////////////////////////////////////////////// template void NDArray::printCurrentBuffer(const bool host, const char* msg, const int precision) const { } //////////////////////////////////////////////////////////////////////// void* NDArray::specialBufferWithOffset(Nd4jLong offset) const { return nullptr; } //////////////////////////////////////////////////////////////////////// void* NDArray::specialBuffer() { if (_buffer->special() == nullptr) return getBuffer(); // FIXME: this should be fixed once CUDA backend added return static_cast(_buffer->special()) + (_offset * sizeOfT()); } //////////////////////////////////////////////////////////////////////// void* NDArray::getSpecialBuffer() const { if (_buffer->special() == nullptr) return getBuffer(); // FIXME: this should be fixed once CUDA backend added return static_cast(_buffer->special()) + (_offset * sizeOfT()); } ////////////////////////////////////////////////////////////////////////// // change an array by repeating it the number of times given by reps. NDArray NDArray::tile(const std::vector& reps) const { const int repsSize = reps.size(); Nd4jLong product = 1; for(const auto& item : reps) product *= item; if(product == 0) throw std::runtime_error("NDArray::tile method: one of the elements in reps array is zero !"); int rankOld = rankOf(); int diff = rankOld - repsSize; if(product==1) { // in this case 2 possibilities are present: just reshape or nothing to do NDArray result(*this); if(diff < 0) { // reshape to higher dimension std::vector shapeNew = reps; // there is requirement to have unities at first "diff" positions of new shape memcpy(&shapeNew[-diff], result.getShapeInfo()+1, rankOld * sizeof(Nd4jLong)); // put old shape numbers at rest of positions result.reshapei(ordering(), shapeNew); } return result; // nothing to do, if diff >= 0 -> identity tile } // evaluate shapeInfo for resulting array auto newShapeInfo = ShapeUtils::evalTileShapeInfo(*this, reps, getContext()->getWorkspace()); // create new buffer, in any case the memory amount new buffer points to is bigger then those for old _buffer std::shared_ptr newBuff = std::make_shared(shape::length(newShapeInfo) * sizeOfT(), dataType(), getContext()->getWorkspace()); // assign new shape and new buffer to resulting array NDArray result(newBuff, ShapeDescriptor(newShapeInfo), getContext()); // fill newBuff, loop through all elements of newBuff // looping through _buffer goes automatically by means of getSubArrayIndex applying const auto resultLen = result.lengthOf(); auto xType = this->dataType(); if(result.ordering() == 'c') { // ews == 1 always here PRAGMA_OMP_PARALLEL_FOR_SIMD for(Nd4jLong i = 0; i < resultLen; ++i) { auto yOffset = shape::subArrayOffset(i, newShapeInfo, getShapeInfo()); BUILD_SINGLE_SELECTOR(xType, this->template templatedAssign, (result.getBuffer(), i, this->getBuffer(), yOffset), LIBND4J_TYPES); } } else { PRAGMA_OMP_PARALLEL_FOR_SIMD for(Nd4jLong i=0; itemplate templatedAssign, (result.getBuffer(), xOffset, this->getBuffer(), yOffset), LIBND4J_TYPES); } } result.tickWriteHost(); return result; } ////////////////////////////////////////////////////////////////////////// // change an array by repeating it the number of times given by reps. void NDArray::tile(const std::vector& reps, NDArray& target) const { auto repProd = shape::prodLong(reps.data(), reps.size()); if (repProd < 1) throw std::runtime_error("NDArray::tile: reps can't contain 0s"); // evaluate true tile shapeInfo for comparison with target shapeInfo auto newShapeInfo = ShapeUtils::evalTileShapeInfo(*this, reps, getContext()->getWorkspace()); if(!shape::equalsSoft(newShapeInfo, target.getShapeInfo())) { delete []newShapeInfo; throw std::runtime_error("NDArray::tile method - shapeInfo of target array is not suitable for tile operation !"); } // fill newBuff, loop through all elements of newBuff // looping through _buffer goes automatically by means of getSubArrayIndex applying const int ews = target.ews(); const int targetLen = target.lengthOf(); if(target.ordering() == 'c' && ews == 1) { // ews == 1 always here //#pragma omp parallel for simd if(targetLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided) for(Nd4jLong i=0; i 1) { for(Nd4jLong i=0; i target.rankOf()) throw std::runtime_error("NDArray::tile method - rank of target array must be bigger or equal to the rank of this array !"); if(!ShapeUtils::areShapesBroadcastable(*this, target)) throw std::runtime_error("NDArray::tile method - shapeInfo of target array is not suitable for tile operation !"); // fill newBuff, loop through all elements of newBuff // looping through _buffer goes automatically by means of getSubArrayIndex applying const auto ews = target.ews(); const auto targetLen = target.lengthOf(); if(target.ordering() == 'c' && ews == 1) { // ews == 1 always here for (Nd4jLong i = 0; i < targetLen; ++i) { auto yOffset = shape::subArrayOffset(i, target.getShapeInfo(), getShapeInfo()); BUILD_DOUBLE_SELECTOR(target.dataType(), dataType(), templatedDoubleAssign, (target.getBuffer(), i, getBuffer(), yOffset), LIBND4J_TYPES, LIBND4J_TYPES); } } else if(target.ordering() == 'c' && ews > 1) { for(Nd4jLong i=0; i static void repeat_(const NDArray& input, NDArray& output, const std::vector& repeats, const int axis) { const X* x = input.bufferAsT(); Z* z = output.bufferAsT(); const int rank = input.rankOf(); // xRank = zRank const int zLen = output.lengthOf(); // xLen <= zLen const int repSize = repeats.size(); std::vector coords(rank); // loop through input array PRAGMA_OMP_PARALLEL_FOR_ARGS(schedule(guided) firstprivate(coords)) for (Nd4jLong i = 0; i < zLen; ++i) { shape::index2coords(rank, output.shapeOf(), i, zLen, coords.data()); const auto zOffset = shape::getOffset(0, output.shapeOf(), output.stridesOf(), coords.data(), rank); if(repSize > 1) { for (uint j = 0; j < repSize; ++j) { coords[axis] -= repeats[j]; if (coords[axis] < 0) { coords[axis] = j; break; } } } else coords[axis] /= repeats[0]; z[zOffset] = x[shape::getOffset(0, input.shapeOf(), input.stridesOf(), coords.data(), rank)]; } } ////////////////////////////////////////////////////////////////////////// // create new array by repeating it the number of times given by repeats NDArray* NDArray::repeat(const int axis, const std::vector& repeats) const { auto output = new NDArray('c', ShapeUtils::evalRepeatShape(axis, repeats, *this), dataType(), getContext()); BUILD_SINGLE_SELECTOR_TWICE(dataType(), repeat_, (*this, *output, repeats, axis), LIBND4J_TYPES); return output; } ////////////////////////////////////////////////////////////////////////// // fill array by repeating it the number of times given by reps void NDArray::repeat(const int axis, const std::vector& repeats, NDArray& target) const { if(!target.isSameShape(ShapeUtils::evalRepeatShape(axis, repeats, *this))) throw std::invalid_argument("NDArray::repeat(const int axis, const std::vector& repeats, NDArray& target) method: wrong shape of target array!"); BUILD_DOUBLE_SELECTOR(dataType(), target.dataType(), repeat_, (*this, target, repeats, axis), LIBND4J_TYPES, LIBND4J_TYPES); } ////////////////////////////////////////////////////////////////////////// #ifndef __JAVACPP_HACK__ #include "NDArrayLambda.hpp" #endif /* #ifndef __CLION_IDE__ #include "NDArray.macro" #endif */ } #endif