cavis/libnd4j/include/helpers/cuda/PointersManager.cu

125 lines
5.3 KiB
Plaintext
Raw Normal View History

2019-06-06 14:21:15 +02:00
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author Yurii Shyrma (iuriish@yahoo.com), created on 06.02.2019
// @author raver119@gmail.com
//
#include <PointersManager.h>
#include <exceptions/cuda_exception.h>
#include <StringUtils.h>
#include <logger.h>
#include <memory/Workspace.h>
namespace nd4j {
//////////////////////////////////////////////////////////////////////////
PointersManager::PointersManager(const nd4j::LaunchContext* context, const std::string& funcName) {
_context = const_cast<nd4j::LaunchContext*>(context);
_funcName = funcName;
}
//////////////////////////////////////////////////////////////////////////
void* PointersManager::replicatePointer(const void* src, const size_t numberOfBytes) {
void* dst = nullptr;
if (_context->getWorkspace() == nullptr) {
cudaError_t cudaResult = cudaMalloc(reinterpret_cast<void **>(&dst), numberOfBytes);
if (cudaResult != 0)
throw cuda_exception::build(_funcName + ": cannot allocate global memory on device!", cudaResult);
} else {
dst = _context->getWorkspace()->allocateBytes(nd4j::memory::MemoryType::DEVICE, numberOfBytes);
}
if (_context != nullptr)
cudaMemcpyAsync(dst, src, numberOfBytes, cudaMemcpyHostToDevice, *_context->getCudaStream());
else
cudaMemcpy(dst, src, numberOfBytes, cudaMemcpyHostToDevice);
_pOnGlobMem.emplace_back(dst);
return dst;
}
//////////////////////////////////////////////////////////////////////////
void PointersManager::synchronize() const {
if (_context != nullptr) {
cudaError_t cudaResult = cudaStreamSynchronize(*_context->getCudaStream());
if (cudaResult != 0)
throw cuda_exception::build(_funcName + ": cuda stream synchronization failed !", cudaResult);
} else {
nd4j_printf("<%s> syncStream isn't possible: no stream set!", _funcName.c_str());
}
}
//////////////////////////////////////////////////////////////////////////
PointersManager::~PointersManager() {
for (auto& p :_pOnGlobMem)
cudaFree(p);
}
////////////////////////////////////////////////////////////////////////
template <typename T>
static __global__ void printDevContentOnDev_(const void* pDev, const Nd4jLong len, const int tid) {
PointersManager::printDevContentOnDev<T>(pDev, len, tid);
}
////////////////////////////////////////////////////////////////////////
template<typename T>
void PointersManager::printDevContentOnDevFromHost(const void* pDev, const Nd4jLong len, const int tid) {
printDevContentOnDev_<T><<<512, 512, 1024, *nd4j::LaunchContext ::defaultContext()->getCudaStream()>>>(pDev, len, tid);
auto res = cudaStreamSynchronize(*nd4j::LaunchContext ::defaultContext()->getCudaStream());
if (res != 0)
throw std::runtime_error("PointersManager::printDevContentOnDevFromHost: cudaStreamSynchronize failed!");
}
template void PointersManager::printDevContentOnDevFromHost<Nd4jLong>(const void* pDev, const Nd4jLong len, const int tid);
template void PointersManager::printDevContentOnDevFromHost<int>(const void* pDev, const Nd4jLong len, const int tid);
template void PointersManager::printDevContentOnDevFromHost<float>(const void* pDev, const Nd4jLong len, const int tid);
template void PointersManager::printDevContentOnDevFromHost<double>(const void* pDev, const Nd4jLong len, const int tid);
//BUILD_SINGLE_TEMPLATE(template void PointersManager::printDevContentOnDevFromHost, (void* pDev, Nd4jLong len, int tid), LIBND4J_TYPES);
////////////////////////////////////////////////////////////////////////
template<typename T>
void PointersManager::printDevContentOnHost(const void* pDev, const Nd4jLong len) const {
printf("host print out\n");
void* pHost = operator new(sizeof(T) * len);
cudaMemcpyAsync(pHost, pDev, sizeof(T) * len, cudaMemcpyDeviceToHost, *_context->getCudaStream());
cudaError_t cudaResult = cudaStreamSynchronize(*_context->getCudaStream());
if(cudaResult != 0)
throw std::runtime_error("PointersManager::printCudaHost: cudaStreamSynchronize failed!");
for(Nd4jLong i = 0; i < len; ++i)
printf("%f, ", (double)reinterpret_cast<T*>(pHost)[i]);
printf("\n");
operator delete(pHost);
}
template void PointersManager::printDevContentOnHost<Nd4jLong>(const void* pDev, const Nd4jLong len) const;
template void PointersManager::printDevContentOnHost<int>(const void* pDev, const Nd4jLong len) const;
template void PointersManager::printDevContentOnHost<float>(const void* pDev, const Nd4jLong len) const;
template void PointersManager::printDevContentOnHost<double>(const void* pDev, const Nd4jLong len) const;
}