cavis/libnd4j/include/memory/MemoryCounter.h

147 lines
4.8 KiB
C
Raw Normal View History

2021-02-01 13:31:45 +01:00
/* ******************************************************************************
*
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
2021-02-01 13:31:45 +01:00
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author raver119@gmail.com
//
#ifndef SD_MEMORYCOUNTER_H
#define SD_MEMORYCOUNTER_H
#include <system/pointercast.h>
#include <system/dll.h>
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
#include <map>
#include <memory/MemoryType.h>
#include <mutex>
namespace sd {
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
namespace memory {
/**
* This class provides simple per-device counter
*/
class ND4J_EXPORT MemoryCounter {
private:
// used for synchronization
std::mutex _locker;
// per-device counters
std::map<int, Nd4jLong> _deviceCounters;
// TODO: change this wrt heterogenous stuff on next iteration
// per-group counters
std::map<sd::memory::MemoryType, Nd4jLong> _groupCounters;
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
// per-device limits
std::map<int, Nd4jLong> _deviceLimits;
// per-group limits
std::map<sd::memory::MemoryType, Nd4jLong> _groupLimits;
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
MemoryCounter();
~MemoryCounter() = default;
public:
static MemoryCounter & getInstance();
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
/**
* This method checks if allocation of numBytes won't break through per-group or per-device limit
* @param numBytes
* @return TRUE if allocated ammount will keep us below limit, FALSE otherwise
*/
bool validate(Nd4jLong numBytes);
/**
* This method checks if allocation of numBytes won't break through per-device limit
* @param deviceId
* @param numBytes
* @return TRUE if allocated ammount will keep us below limit, FALSE otherwise
*/
bool validateDevice(int deviceId, Nd4jLong numBytes);
/**
* This method checks if allocation of numBytes won't break through per-group limit
* @param deviceId
* @param numBytes
* @return TRUE if allocated ammount will keep us below limit, FALSE otherwise
*/
bool validateGroup(sd::memory::MemoryType group, Nd4jLong numBytes);
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
/**
* This method adds specified number of bytes to specified counter
* @param deviceId
* @param numBytes
*/
void countIn(int deviceId, Nd4jLong numBytes);
void countIn(sd::memory::MemoryType group, Nd4jLong numBytes);
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
/**
* This method subtracts specified number of bytes from specified counter
* @param deviceId
* @param numBytes
*/
void countOut(int deviceId, Nd4jLong numBytes);
void countOut(sd::memory::MemoryType group, Nd4jLong numBytes);
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
/**
* This method returns amount of memory allocated on specified device
* @param deviceId
* @return
*/
Nd4jLong allocatedDevice(int deviceId);
/**
* This method returns amount of memory allocated in specified group of devices
* @param group
* @return
*/
Nd4jLong allocatedGroup(sd::memory::MemoryType group);
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
/**
* This method allows to set per-device memory limits
* @param deviceId
* @param numBytes
*/
void setDeviceLimit(int deviceId, Nd4jLong numBytes);
/**
* This method returns current device limit in bytes
* @param deviceId
* @return
*/
Nd4jLong deviceLimit(int deviceId);
/**
* This method allows to set per-group memory limits
* @param group
* @param numBytes
*/
void setGroupLimit(sd::memory::MemoryType group, Nd4jLong numBytes);
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
/**
* This method returns current group limit in bytes
* @param group
* @return
*/
Nd4jLong groupLimit(sd::memory::MemoryType group);
[WIP] Memory limits (#167) * initial commit Signed-off-by: raver119 <raver119@gmail.com> * one more initial commit Signed-off-by: raver119 <raver119@gmail.com> * additional initial commit Signed-off-by: raver119 <raver119@gmail.com> * subsequent initial commit Signed-off-by: raver119 <raver119@gmail.com> * initial commit testing Signed-off-by: raver119 <raver119@gmail.com> * initial commit per device Signed-off-by: raver119 <raver119@gmail.com> * initial commit per group Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + few missed lines Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + missed includes Signed-off-by: raver119 <raver119@gmail.com> * initial commit for cuda + one more missed include Signed-off-by: raver119 <raver119@gmail.com> * initial commit shouldn't count host mem as dev0 in cuda Signed-off-by: raver119 <raver119@gmail.com> * initial commit that tracks HOST group limits for CUDA Signed-off-by: raver119 <raver119@gmail.com> * initial commit with some Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with more Environment changes Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit with maxMasterThreads fix Signed-off-by: raver119 <raver119@gmail.com> * initial commit without maxMasterThreads exception Signed-off-by: raver119 <raver119@gmail.com> * initial commit without Nd4jULong in Environment Signed-off-by: raver119 <raver119@gmail.com> * add sleep and more iterations for OOM cases Signed-off-by: raver119 <raver119@gmail.com> * limits propagation from java side Signed-off-by: raver119 <raver119@gmail.com> * - consume ErrorCode every time - one test for memory limits Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * unordered_map Signed-off-by: raver119 <raver119@gmail.com> * RSub op mapping fixed Signed-off-by: raver119 <raver119@gmail.com> * typo fixed Signed-off-by: raver119 <raver119@gmail.com> * one bad test fixed Signed-off-by: raver119 <raver119@gmail.com>
2020-01-24 08:11:09 +01:00
};
}
}
#endif //SD_MEMORYCOUNTER_H