189 lines
6.6 KiB
Plaintext
189 lines
6.6 KiB
Plaintext
/*******************************************************************************
|
|
* Copyright (c) 2015-2018 Skymind, Inc.
|
|
* Copyright (c) 2019 Konduit K.K.
|
|
*
|
|
* This program and the accompanying materials are made available under the
|
|
* terms of the Apache License, Version 2.0 which is available at
|
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
* License for the specific language governing permissions and limitations
|
|
* under the License.
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
******************************************************************************/
|
|
|
|
//
|
|
// @author raver119@gmail.com
|
|
//
|
|
|
|
#include <exceptions/cuda_exception.h>
|
|
#include <ConstantHelper.h>
|
|
#include <DataTypeUtils.h>
|
|
#include <shape.h>
|
|
#include <execution/LaunchContext.h>
|
|
#include <specials.h>
|
|
#include <logger.h>
|
|
#include <cuda_runtime.h>
|
|
#include <cuda.h>
|
|
#include <execution/AffinityManager.h>
|
|
|
|
#define CONSTANT_LIMIT 49152
|
|
|
|
__constant__ char deviceConstantMemory[CONSTANT_LIMIT];
|
|
|
|
namespace nd4j {
|
|
static void* getConstantSpace() {
|
|
Nd4jPointer dConstAddr;
|
|
auto dZ = cudaGetSymbolAddress(reinterpret_cast<void **>(&dConstAddr), deviceConstantMemory);
|
|
|
|
if (dZ != 0)
|
|
throw cuda_exception::build("cudaGetSymbolAddress(...) failed", dZ);
|
|
|
|
return dConstAddr;
|
|
}
|
|
|
|
int ConstantHelper::getCurrentDevice() {
|
|
return AffinityManager::currentDeviceId();
|
|
}
|
|
|
|
int ConstantHelper::getNumberOfDevices() {
|
|
return AffinityManager::numberOfDevices();
|
|
}
|
|
|
|
|
|
ConstantHelper::ConstantHelper() {
|
|
auto initialDevice = getCurrentDevice();
|
|
|
|
auto numDevices = getNumberOfDevices();
|
|
_devicePointers.resize(numDevices);
|
|
_deviceOffsets.resize(numDevices);
|
|
_cache.resize(numDevices);
|
|
_counters.resize(numDevices);
|
|
|
|
// filling all pointers
|
|
for (int e = 0; e < numDevices; e++) {
|
|
auto res = cudaSetDevice(e);
|
|
if (res != 0)
|
|
throw cuda_exception::build("cudaSetDevice failed", res);
|
|
auto constant = getConstantSpace();
|
|
|
|
MAP_IMPL<ConstantDescriptor, ConstantHolder*> devCache;
|
|
|
|
_devicePointers[e] = constant;
|
|
_deviceOffsets[e] = 0;
|
|
_cache[e] = devCache;
|
|
_counters[e] = 0L;
|
|
}
|
|
|
|
//
|
|
auto res = cudaSetDevice(initialDevice);
|
|
if (res != 0)
|
|
throw cuda_exception::build("Final cudaSetDevice failed", res);
|
|
}
|
|
|
|
ConstantHelper* ConstantHelper::getInstance() {
|
|
if (!_INSTANCE)
|
|
_INSTANCE = new nd4j::ConstantHelper();
|
|
|
|
return _INSTANCE;
|
|
}
|
|
|
|
void* ConstantHelper::replicatePointer(void *src, size_t numBytes, memory::Workspace *workspace) {
|
|
std::lock_guard<std::mutex> lock(_mutex);
|
|
|
|
auto deviceId = getCurrentDevice();
|
|
Nd4jPointer constantPtr = nullptr;
|
|
Nd4jLong constantOffset = 0L;
|
|
if (_devicePointers[deviceId] == 0) {
|
|
auto constant = getConstantSpace();
|
|
|
|
// filling default ptr, which will be 0 probably
|
|
_devicePointers[deviceId] = constant;
|
|
_deviceOffsets[deviceId] = 0;
|
|
constantPtr = constant;
|
|
} else {
|
|
constantPtr = _devicePointers[deviceId];
|
|
constantOffset = _deviceOffsets[deviceId];
|
|
}
|
|
|
|
if (constantOffset + numBytes >= CONSTANT_LIMIT) {
|
|
int8_t *ptr = nullptr;
|
|
ALLOCATE_SPECIAL(ptr, workspace, numBytes, int8_t);
|
|
auto res = cudaMemcpy(ptr, src, numBytes, cudaMemcpyHostToDevice);
|
|
if (res != 0)
|
|
throw cuda_exception::build("cudaMemcpy failed", res);
|
|
|
|
return ptr;
|
|
} else {
|
|
auto originalBytes = numBytes;
|
|
auto rem = numBytes % 8;
|
|
if (rem != 0)
|
|
numBytes += 8 - rem;
|
|
|
|
_deviceOffsets[deviceId] += numBytes;
|
|
|
|
auto res = cudaMemcpyToSymbol(deviceConstantMemory, const_cast<const void *>(src), originalBytes, constantOffset, cudaMemcpyHostToDevice);
|
|
if (res != 0)
|
|
throw cuda_exception::build("cudaMemcpyToSymbol failed", res);
|
|
|
|
return reinterpret_cast<int8_t *>(constantPtr) + constantOffset;
|
|
}
|
|
}
|
|
|
|
ConstantDataBuffer* ConstantHelper::constantBuffer(const ConstantDescriptor &descriptor, nd4j::DataType dataType) {
|
|
const auto deviceId = getCurrentDevice();
|
|
|
|
// all cache modifications are synchronous
|
|
_mutexHolder.lock();
|
|
|
|
if (_cache[deviceId].count(descriptor) == 0) {
|
|
_cache[deviceId][descriptor] = new ConstantHolder();
|
|
}
|
|
auto holder = _cache[deviceId][descriptor];
|
|
|
|
// release cache lock
|
|
_mutexHolder.unlock();
|
|
|
|
ConstantDataBuffer* result;
|
|
|
|
// access to this holder instance is synchronous
|
|
std::lock_guard<std::mutex> lock(*holder->mutex());
|
|
|
|
if (holder->hasBuffer(dataType)) {
|
|
result = holder->getConstantDataBuffer(dataType);
|
|
} else {
|
|
auto numBytes = descriptor.length() * DataTypeUtils::sizeOf(dataType);
|
|
auto cbuff = new int8_t[numBytes];
|
|
_counters[deviceId] += numBytes;
|
|
|
|
// create buffer with this dtype
|
|
if (descriptor.isFloat()) {
|
|
BUILD_DOUBLE_SELECTOR(nd4j::DataType::DOUBLE, dataType, nd4j::SpecialTypeConverter::convertGeneric, (nullptr, const_cast<double *>(descriptor.floatValues().data()), descriptor.length(), cbuff), (nd4j::DataType::DOUBLE, double), LIBND4J_TYPES);
|
|
} else if (descriptor.isInteger()) {
|
|
BUILD_DOUBLE_SELECTOR(nd4j::DataType::INT64, dataType, nd4j::SpecialTypeConverter::convertGeneric, (nullptr, const_cast<Nd4jLong *>(descriptor.integerValues().data()), descriptor.length(), cbuff), (nd4j::DataType::INT64, Nd4jLong), LIBND4J_TYPES);
|
|
}
|
|
|
|
auto dbuff = replicatePointer(cbuff, descriptor.length() * DataTypeUtils::sizeOf(dataType));
|
|
|
|
ConstantDataBuffer dataBuffer(cbuff, dbuff, descriptor.length(), DataTypeUtils::sizeOf(dataType));
|
|
|
|
holder->addBuffer(dataBuffer, dataType);
|
|
result = holder->getConstantDataBuffer(dataType);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
Nd4jLong ConstantHelper::getCachedAmount(int deviceId) {
|
|
int numDevices = getNumberOfDevices();
|
|
if (deviceId > numDevices || deviceId < 0)
|
|
return 0L;
|
|
else
|
|
return _counters[deviceId];
|
|
}
|
|
|
|
nd4j::ConstantHelper* nd4j::ConstantHelper::_INSTANCE = 0;
|
|
} |