diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java index 90b77f449..f096a1f6f 100644 --- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java +++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java @@ -67,7 +67,7 @@ public interface WorkspaceMgr> { /** * Set arrays to be scoped out (not in any workspace) for the specified array type. - * This means that create, dup, leverage etc methods will return result arrays that are not attached to any workspace + * This means that create, dup, leverage etc. methods will return result arrays that are not attached to any workspace * * @param arrayType Array type to set scoped out for */ @@ -120,7 +120,7 @@ public interface WorkspaceMgr> { boolean isWorkspaceOpen(T arrayType); /** - * Assert thath the workspace for the specified array type is open. + * Assert that the workspace for the specified array type is open. * For array types that are set to scoped out, this will be treated as a no-op * @param arrayType Array type to check * @param msg May be null. If non-null: include this in the exception @@ -129,7 +129,7 @@ public interface WorkspaceMgr> { void assertOpen(T arrayType, String msg) throws ND4JWorkspaceException; /** - * Assert thath the workspace for the specified array type is not open. + * Assert that the workspace for the specified array type is not open. * For array types that are set to scoped out, this will be treated as a no-op * @param arrayType Array type to check * @param msg May be null. If non-null: include this in the exception @@ -193,7 +193,7 @@ public interface WorkspaceMgr> { /** * Create an uninitialized array in the specified array type's workspace (or detached if none is specified). - * Equivalent to {@link org.nd4j.linalg.factory.Nd4j#createUninitialized(int)} (int...)}, other than the array location + * Equivalent to {@link org.nd4j.linalg.factory.Nd4j#createUninitialized(int...)}, other than the array location * @param arrayType Array type * @param dataType Data type of the created array * @param shape Shape @@ -231,7 +231,7 @@ public interface WorkspaceMgr> { /** * Cast the specified array to the specified datatype.
- * If the array is already the correct type, the bahaviour depends on the 'dupIfCorrectType' argument.
+ * If the array is already the correct type, the behaviour depends on the 'dupIfCorrectType' argument.
* dupIfCorrectType = false && toCast.dataType() == dataType: return input array as-is (unless workspace is wrong)
* dupIfCorrectType = true && toCast.dataType() == dataType: duplicate the array into the specified workspace
* @param arrayType Array type diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java index aa9b2686f..7cf2431f9 100644 --- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java +++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java @@ -81,7 +81,7 @@ public class EvaluationToolsTests extends BaseDL4JTest { String str = EvaluationTools.rocChartToHtml(roc); - // System.out.println(str); + System.out.println(str); } } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java index 79782d956..8ae76bd41 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java @@ -58,6 +58,8 @@ public class Cnn3DLossLayer extends FeedForwardLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { setNetConfiguration(conf); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer ret = new org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer(lconf, networkDataType); ret.addTrainingListeners(trainingListeners); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java index b4f93482d..50e917dac 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java @@ -63,6 +63,8 @@ public class CnnLossLayer extends FeedForwardLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { setNetConfiguration(conf); LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); + org.deeplearning4j.nn.layers.convolution.CnnLossLayer ret = new org.deeplearning4j.nn.layers.convolution.CnnLossLayer(lconf, networkDataType); ret.addTrainingListeners(trainingListeners); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java index 1cdd16dba..9c50ccba4 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java @@ -77,7 +77,11 @@ public class GravesLSTM extends AbstractLSTM { public Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerValidation.assertNInNOutSet("GravesLSTM", getLayerName(), layerIndex, getNIn(), getNOut()); + LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + lconf.setNetConfiguration(conf); + runInheritance(); + org.deeplearning4j.nn.layers.recurrent.GravesLSTM ret = new org.deeplearning4j.nn.layers.recurrent.GravesLSTM(lconf, networkDataType); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java index 4742b9e5b..e7db009ed 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java @@ -61,6 +61,8 @@ public class RnnLossLayer extends FeedForwardLayer { int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + lconf.setNetConfiguration(conf); + runInheritance(); org.deeplearning4j.nn.layers.recurrent.RnnLossLayer ret = new org.deeplearning4j.nn.layers.recurrent.RnnLossLayer(lconf, networkDataType); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java index bddd9fc30..55e766133 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java @@ -135,6 +135,7 @@ public class SubsamplingLayer extends NoParamLayer { Collection trainingListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex); + runInheritance(); org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer ret = new org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer(lconf, networkDataType); diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java index 9774b8c07..d14f20d85 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java @@ -24,6 +24,8 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; + import lombok.*; import net.brutex.ai.dnn.api.IModel; import org.deeplearning4j.nn.api.ITraininableLayerConfiguration; @@ -328,13 +330,9 @@ public abstract class AbstractLayer impl @Override public void clearNoiseWeightParams() {} - public List variables() { - return variables; - } - - public List variables(boolean copy) { + public List getVariables(boolean copy) { if (copy) { - return variables(); + return new ArrayList<>(getVariables()); } return variables; } @@ -585,7 +583,7 @@ public abstract class AbstractLayer impl */ @Override public INDArray getParams() { - // throw new RuntimeException("Not implemented"); + //throw new RuntimeException("Not implemented"); return null; } diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java index 01aede19f..1a055c528 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java @@ -184,6 +184,17 @@ public abstract class BaseLayer setParams(params, 'f'); } + /** + * * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException + * * will be triggered when calling this. + * + * @return 1d parameter vector + */ + @Override + public INDArray getParams() { + return paramsFlattened; + } + /** */ @Override public void close() {} @@ -358,7 +369,7 @@ public abstract class BaseLayer protected void setParams(INDArray params, char order) { if (params == null) { - log.warn( + log.trace( "setParams(INDArray params, char order): params is null. Skipping setParams in Layer {}[{}] at index {}", getLayerConfiguration().getLayerName(), getClass().getSimpleName(), diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java index 954ddc250..a7f444c91 100644 --- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java +++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java @@ -110,14 +110,14 @@ public class DefaultParamInitializer extends AbstractParamInitializer { INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams)); params.put(WEIGHT_KEY, createWeightMatrix(layerConf, weightView, initializeParams)); - layerConf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY); + layerConf.addVariable(WEIGHT_KEY); long offset = nWeightParams; if(hasBias(layerConf)){ INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(offset, offset + nOut)); params.put(BIAS_KEY, createBias(layerConf, biasView, initializeParams)); - layerConf.getNetConfiguration().addNetWideVariable(BIAS_KEY); + layerConf.addVariable(BIAS_KEY); offset += nOut; } @@ -125,7 +125,7 @@ public class DefaultParamInitializer extends AbstractParamInitializer { INDArray gainView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(offset, offset + nOut)); params.put(GAIN_KEY, createGain(conf, gainView, initializeParams)); - conf.getNetConfiguration().addNetWideVariable(GAIN_KEY); + conf.addVariable(GAIN_KEY); } return params; diff --git a/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java b/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java index d6ddf49de..2c1a5860c 100644 --- a/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java +++ b/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java @@ -50,7 +50,8 @@ public class CpuOpContext extends BaseOpContext implements OpContext, Deallocata @Override public void close() { - // no-op + nativeOps.ctxPurge(context); + context.deallocate(); } @Override diff --git a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java index 39dad7bd4..94c2ca71b 100644 --- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java +++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java @@ -20,6 +20,8 @@ package org.nd4j.jita.workspace; +import java.util.List; +import java.util.Queue; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import lombok.val; @@ -39,10 +41,6 @@ import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.nativeblas.NativeOpsHolder; -import java.util.List; -import java.util.Queue; - - /** * CUDA-aware MemoryWorkspace implementation * @@ -51,395 +49,489 @@ import java.util.Queue; @Slf4j public class CudaWorkspace extends Nd4jWorkspace { + public CudaWorkspace(@NonNull WorkspaceConfiguration configuration) { + super(configuration); + } - public CudaWorkspace(@NonNull WorkspaceConfiguration configuration) { - super(configuration); + public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId) { + super(configuration, workspaceId); + } + + public CudaWorkspace( + @NonNull WorkspaceConfiguration configuration, + @NonNull String workspaceId, + Integer deviceId) { + super(configuration, workspaceId); + this.deviceId = deviceId; + } + + @Override + protected void init() { + if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) { + throw new ND4JIllegalStateException("CUDA do not support MMAP workspaces yet"); } - public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId) { - super(configuration, workspaceId); + super.init(); + + if (currentSize.get() > 0) { + log.debug("Allocating {} bytes at DEVICE & HOST space...", currentSize.get()); + isInit.set(true); + + long bytes = currentSize.get(); + + log.debug( + "Allocating [{}] workspace on device_{}, {} bytes...", + id, + Nd4j.getAffinityManager().getDeviceForCurrentThread(), + bytes); + + if (isDebug.get()) { + Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread(); + } + + Pointer ptr = memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.HOST, false); + if (ptr == null) throw new ND4JIllegalStateException("Can't allocate memory for workspace"); + + workspace.setHostPointer(new PagedPointer(ptr)); + + if (workspaceConfiguration.getPolicyMirroring() != MirroringPolicy.HOST_ONLY) { + workspace.setDevicePointer( + new PagedPointer( + memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.DEVICE, false))); + AllocationsTracker.getInstance() + .markAllocated( + AllocationKind.GENERAL, + Nd4j.getAffinityManager().getDeviceForCurrentThread(), + bytes + SAFETY_OFFSET); + + MemoryTracker.getInstance() + .incrementWorkspaceAllocatedAmount( + Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET); + + // if base pointer isn't aligned to 16 bytes (128 bits) - adjust the offfset then + val addr = workspace.getDevicePointer().address(); + val div = addr % alignmentBase; + if (div != 0) { + deviceOffset.set(alignmentBase - div); + hostOffset.set(alignmentBase - div); + } + } + } + } + + @Override + public PagedPointer alloc(long requiredMemory, DataType type, boolean initialize) { + return this.alloc(requiredMemory, MemoryKind.DEVICE, type, initialize); + } + + @Override + public synchronized void destroyWorkspace(boolean extended) { + val size = currentSize.getAndSet(0); + reset(); + + if (extended) clearExternalAllocations(); + + clearPinnedAllocations(extended); + + if (workspace.getHostPointer() != null) + NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(workspace.getHostPointer()); + + if (workspace.getDevicePointer() != null) { + NativeOpsHolder.getInstance() + .getDeviceNativeOps() + .freeDevice(workspace.getDevicePointer(), 0); + AllocationsTracker.getInstance() + .markReleased( + AllocationKind.GENERAL, + Nd4j.getAffinityManager().getDeviceForCurrentThread(), + size + SAFETY_OFFSET); + + MemoryTracker.getInstance() + .decrementWorkspaceAmount( + Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET); } - public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId, Integer deviceId) { - super(configuration, workspaceId); - this.deviceId = deviceId; + workspace.setDevicePointer(null); + workspace.setHostPointer(null); + } + + @Override + public PagedPointer alloc( + long requiredMemory, MemoryKind kind, DataType type, boolean initialize) { + long numElements = requiredMemory / Nd4j.sizeOfDataType(type); + + // alignment + if (requiredMemory % alignmentBase != 0) + requiredMemory += alignmentBase - (requiredMemory % alignmentBase); + + if (!isUsed.get()) { + if (disabledCounter.incrementAndGet() % 10 == 0) + log.warn( + "Workspace was turned off, and wasn't enabled after {} allocations", + disabledCounter.get()); + + if (kind == MemoryKind.DEVICE) { + val pointer = + new PagedPointer( + memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements); + externalAllocations.add(new PointersPair(null, pointer)); + MemoryTracker.getInstance() + .incrementWorkspaceAllocatedAmount( + Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory); + return pointer; + } else { + val pointer = + new PagedPointer( + memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements); + externalAllocations.add(new PointersPair(pointer, null)); + return pointer; + } } - @Override - protected void init() { - if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) { - throw new ND4JIllegalStateException("CUDA do not support MMAP workspaces yet"); + boolean trimmer = + (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED + && requiredMemory + cycleAllocations.get() > initialBlockSize.get() + && initialBlockSize.get() > 0 + && kind == MemoryKind.DEVICE) + || trimmedMode.get(); + + if (trimmer + && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE + && !trimmedMode.get()) { + trimmedMode.set(true); + trimmedStep.set(stepsCount.get()); + } + + if (kind == MemoryKind.DEVICE) { + if (deviceOffset.get() + requiredMemory <= currentSize.get() + && !trimmer + && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) { + cycleAllocations.addAndGet(requiredMemory); + long prevOffset = deviceOffset.getAndAdd(requiredMemory); + + if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY) return null; + + val ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements); + + log.debug( + "Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}", + id, + Nd4j.getAffinityManager().getDeviceForCurrentThread(), + requiredMemory, + numElements, + prevOffset, + deviceOffset.get(), + currentSize.get(), + ptr.address()); + + if (initialize) { + val context = AtomicAllocator.getInstance().getDeviceContext(); + + int ret = + NativeOpsHolder.getInstance() + .getDeviceNativeOps() + .memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream()); + if (ret == 0) + throw new ND4JIllegalStateException( + "memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread()); + + context.syncSpecialStream(); } - super.init(); + return ptr; + } else { - if (currentSize.get() > 0) { - //log.info("Allocating {} bytes at DEVICE & HOST space...", currentSize.get()); - isInit.set(true); - - long bytes = currentSize.get(); - - if (isDebug.get()) - log.info("Allocating [{}] workspace on device_{}, {} bytes...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes); - - if (isDebug.get()) { - Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread(); - } - - Pointer ptr = memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.HOST, false); - if (ptr == null) - throw new ND4JIllegalStateException("Can't allocate memory for workspace"); - - workspace.setHostPointer(new PagedPointer(ptr)); - - if (workspaceConfiguration.getPolicyMirroring() != MirroringPolicy.HOST_ONLY) { - workspace.setDevicePointer(new PagedPointer(memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.DEVICE, false))); - AllocationsTracker.getInstance().markAllocated(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET); - - MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET); - - // if base pointer isn't aligned to 16 bytes (128 bits) - adjust the offfset then - val addr = workspace.getDevicePointer().address(); - val div = addr % alignmentBase; - if (div != 0) { - deviceOffset.set(alignmentBase - div); - hostOffset.set(alignmentBase - div); - } - } - } - } - - @Override - public PagedPointer alloc(long requiredMemory, DataType type, boolean initialize) { - return this.alloc(requiredMemory, MemoryKind.DEVICE, type, initialize); - } - - - @Override - public synchronized void destroyWorkspace(boolean extended) { - val size = currentSize.getAndSet(0); - reset(); - - if (extended) - clearExternalAllocations(); - - clearPinnedAllocations(extended); - - if (workspace.getHostPointer() != null) - NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(workspace.getHostPointer()); - - if (workspace.getDevicePointer() != null) { - NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(workspace.getDevicePointer(), 0); - AllocationsTracker.getInstance().markReleased(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET); - - MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET); + // spill + if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED + && currentSize.get() > 0 + && !trimmer + && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) { + // log.info("End of space reached. Current offset: {}; requiredMemory: {}", + // deviceOffset.get(), requiredMemory); + deviceOffset.set(0); + resetPlanned.set(true); + return alloc(requiredMemory, kind, type, initialize); } - workspace.setDevicePointer(null); - workspace.setHostPointer(null); + if (!trimmer) spilledAllocationsSize.addAndGet(requiredMemory); + else pinnedAllocationsSize.addAndGet(requiredMemory); - } + log.debug( + "Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements", + id, + Nd4j.getAffinityManager().getDeviceForCurrentThread(), + requiredMemory, + numElements); + val shape = + new AllocationShape( + requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type); - @Override - public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataType type, boolean initialize) { - long numElements = requiredMemory / Nd4j.sizeOfDataType(type); + cycleAllocations.addAndGet(requiredMemory); - // alignment - if (requiredMemory % alignmentBase != 0) - requiredMemory += alignmentBase - (requiredMemory % alignmentBase); + if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY) return null; - if (!isUsed.get()) { - if (disabledCounter.incrementAndGet() % 10 == 0) - log.warn("Worskpace was turned off, and wasn't enabled after {} allocations", disabledCounter.get()); + switch (workspaceConfiguration.getPolicySpill()) { + case REALLOCATE: + case EXTERNAL: + if (!trimmer) { + externalCount.incrementAndGet(); + // + // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, + // null, AllocationStatus.DEVICE).getDevicePointer() + val pointer = + new PagedPointer( + memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), + numElements); + pointer.isLeaked(); - if (kind == MemoryKind.DEVICE) { - val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements); - externalAllocations.add(new PointersPair(null, pointer)); - MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory); - return pointer; + val pp = new PointersPair(null, pointer); + pp.setRequiredMemory(requiredMemory); + externalAllocations.add(pp); + + MemoryTracker.getInstance() + .incrementWorkspaceAllocatedAmount( + Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory); + return pointer; } else { - val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements); - externalAllocations.add(new PointersPair(pointer, null)); - return pointer; + pinnedCount.incrementAndGet(); + + val pointer = + new PagedPointer( + memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), + numElements); + pointer.isLeaked(); + + pinnedAllocations.add( + new PointersPair(stepsCount.get(), requiredMemory, null, pointer)); + MemoryTracker.getInstance() + .incrementWorkspaceAllocatedAmount( + Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory); + return pointer; } + case FAIL: + default: + { + throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full"); + } + } + } + } else if (kind == MemoryKind.HOST) { + if (hostOffset.get() + requiredMemory <= currentSize.get() + && !trimmer + && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) { + long prevOffset = hostOffset.getAndAdd(requiredMemory); + val ptr = workspace.getHostPointer().withOffset(prevOffset, numElements); + + // && workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY + if (initialize) Pointer.memset(ptr, 0, requiredMemory); + return ptr; + } else { + // log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory, + // numElements); + if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED + && currentSize.get() > 0 + && !trimmer + && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) { + // log.info("End of space reached. Current offset: {}; requiredMemory: {}", + // deviceOffset.get(), requiredMemory); + hostOffset.set(0); + // resetPlanned.set(true); + return alloc(requiredMemory, kind, type, initialize); } - boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0 && kind == MemoryKind.DEVICE) || trimmedMode.get(); + val shape = + new AllocationShape( + requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type); - if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) { - trimmedMode.set(true); - trimmedStep.set(stepsCount.get()); - } + switch (workspaceConfiguration.getPolicySpill()) { + case REALLOCATE: + case EXTERNAL: + if (!trimmer) { + // memoryManager.allocate(requiredMemory, MemoryKind.HOST, true) + // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, + // null, AllocationStatus.DEVICE).getDevicePointer() + PagedPointer pointer = + new PagedPointer( + memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), + numElements); - if (kind == MemoryKind.DEVICE) { - if (deviceOffset.get() + requiredMemory <= currentSize.get() && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) { - cycleAllocations.addAndGet(requiredMemory); - long prevOffset = deviceOffset.getAndAdd(requiredMemory); - - if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY) - return null; - - val ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements); - - if (isDebug.get()) - log.info("Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements, prevOffset, deviceOffset.get(), currentSize.get(), ptr.address()); - - if (initialize) { - val context = AtomicAllocator.getInstance().getDeviceContext(); - - int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream()); - if (ret == 0) - throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread()); - - context.syncSpecialStream(); - } - - return ptr; + externalAllocations.add(new PointersPair(pointer, null)); + return pointer; } else { + // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, + // null, AllocationStatus.DEVICE).getDevicePointer() + PagedPointer pointer = + new PagedPointer( + memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), + numElements); + pointer.isLeaked(); - // spill - if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) { - //log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory); - deviceOffset.set(0); - resetPlanned.set(true); - return alloc(requiredMemory, kind, type, initialize); - } - - if (!trimmer) - spilledAllocationsSize.addAndGet(requiredMemory); - else - pinnedAllocationsSize.addAndGet(requiredMemory); - - if (isDebug.get()) { - log.info("Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements); - } - - val shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type); - - cycleAllocations.addAndGet(requiredMemory); - - if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY) - return null; - - switch (workspaceConfiguration.getPolicySpill()) { - case REALLOCATE: - case EXTERNAL: - if (!trimmer) { - externalCount.incrementAndGet(); - // - //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer() - val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements); - pointer.isLeaked(); - - val pp = new PointersPair(null, pointer); - pp.setRequiredMemory(requiredMemory); - externalAllocations.add(pp); - - MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory); - return pointer; - } else { - pinnedCount.incrementAndGet(); - - val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements); - pointer.isLeaked(); - - pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, null, pointer)); - MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory); - return pointer; - } - case FAIL: - default: { - throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full"); - } - } + pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null)); + return pointer; } - } else if (kind == MemoryKind.HOST) { - if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) { - - long prevOffset = hostOffset.getAndAdd(requiredMemory); - - val ptr = workspace.getHostPointer().withOffset(prevOffset, numElements); - - // && workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY - if (initialize) - Pointer.memset(ptr, 0, requiredMemory); - return ptr; - } else { - // log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory, numElements); - if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) { - //log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory); - hostOffset.set(0); - //resetPlanned.set(true); - return alloc(requiredMemory, kind, type, initialize); - } - - val shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type); - - switch (workspaceConfiguration.getPolicySpill()) { - case REALLOCATE: - case EXTERNAL: - if (!trimmer) { - //memoryManager.allocate(requiredMemory, MemoryKind.HOST, true) - //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer() - PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements); - - externalAllocations.add(new PointersPair(pointer, null)); - return pointer; - } else { - //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer() - PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements); - pointer.isLeaked(); - - pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null)); - return pointer; - } - case FAIL: - default: { - throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full"); - } - } - } - } else throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind); - - //throw new ND4JIllegalStateException("Shouldn't ever reach this line"); - } - - @Override - protected void clearPinnedAllocations(boolean extended) { - if (isDebug.get()) - log.info("Workspace [{}] device_{} threadId {} cycle {}: clearing pinned allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), cyclesCount.get()); - - while (!pinnedAllocations.isEmpty()) { - val pair = pinnedAllocations.peek(); - if (pair == null) - throw new RuntimeException(); - - long stepNumber = pair.getAllocationCycle(); - long stepCurrent = stepsCount.get(); - - if (isDebug.get()) - log.info("Allocation step: {}; Current step: {}", stepNumber, stepCurrent); - - if (stepNumber + 2 < stepCurrent || extended) { - pinnedAllocations.remove(); - - if (pair.getDevicePointer() != null) { - NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0); - MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), pair.getRequiredMemory()); - pinnedCount.decrementAndGet(); - - if (isDebug.get()) - log.info("deleting external device allocation "); - } - - if (pair.getHostPointer() != null) { - NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer()); - - if (isDebug.get()) - log.info("deleting external host allocation "); - } - - val sizez = pair.getRequiredMemory() * -1; - pinnedAllocationsSize.addAndGet(sizez); - } else { - break; + case FAIL: + default: + { + throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full"); } } - } + } + } else throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind); - @Override - protected void clearExternalAllocations() { - if (isDebug.get()) - log.info("Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), guid); + // throw new ND4JIllegalStateException("Shouldn't ever reach this line"); + } - Nd4j.getExecutioner().commit(); + @Override + protected void clearPinnedAllocations(boolean extended) { - try { - for (PointersPair pair : externalAllocations) { - if (pair.getHostPointer() != null) { - NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer()); + log.debug( + "Workspace [{}] device_{} threadId {} cycle {}: clearing pinned allocations...", + id, + Nd4j.getAffinityManager().getDeviceForCurrentThread(), + Thread.currentThread().getId(), + cyclesCount.get()); - if (isDebug.get()) - log.info("deleting external host allocation... "); - } + while (!pinnedAllocations.isEmpty()) { + val pair = pinnedAllocations.peek(); + if (pair == null) throw new RuntimeException(); - if (pair.getDevicePointer() != null) { - NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0); + long stepNumber = pair.getAllocationCycle(); + long stepCurrent = stepsCount.get(); - if (isDebug.get()) - log.info("deleting external device allocation... "); + log.debug("Allocation step: {}; Current step: {}", stepNumber, stepCurrent); - val sizez = pair.getRequiredMemory(); - if (sizez != null) { - AllocationsTracker.getInstance().markReleased(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez); - MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez); - } - } - } - } catch (Exception e) { - log.error("RC: Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), guid); - throw new RuntimeException(e); + if (stepNumber + 2 < stepCurrent || extended) { + pinnedAllocations.remove(); + + if (pair.getDevicePointer() != null) { + NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0); + MemoryTracker.getInstance() + .decrementWorkspaceAmount( + Nd4j.getAffinityManager().getDeviceForCurrentThread(), pair.getRequiredMemory()); + pinnedCount.decrementAndGet(); + + log.debug("deleting external device allocation "); } - spilledAllocationsSize.set(0); - externalCount.set(0); - externalAllocations.clear(); - } + if (pair.getHostPointer() != null) { + NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer()); - @Override - protected void resetWorkspace() { - if (currentSize.get() < 1) { + log.debug("deleting external host allocation "); } + val sizez = pair.getRequiredMemory() * -1; + pinnedAllocationsSize.addAndGet(sizez); + } else { + break; + } + } + } -/* - if (Nd4j.getExecutioner() instanceof GridExecutioner) - ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking(); + @Override + protected void clearExternalAllocations() { - CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); + log.debug( + "Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...", + id, + Nd4j.getAffinityManager().getDeviceForCurrentThread(), + Thread.currentThread().getId(), + guid); - //log.info("workspace: {}, size: {}", workspace.getDevicePointer().address(), currentSize.get()); + Nd4j.getExecutioner().commit(); - NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(workspace.getDevicePointer(), 0, currentSize.get() + SAFETY_OFFSET, 0, context.getSpecialStream()); + try { + for (PointersPair pair : externalAllocations) { + if (pair.getHostPointer() != null) { + NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer()); - Pointer.memset(workspace.getHostPointer(), 0, currentSize.get() + SAFETY_OFFSET); + log.debug("deleting external host allocation... "); + } - context.getSpecialStream().synchronize(); - */ + if (pair.getDevicePointer() != null) { + NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0); + + log.debug("deleting external device allocation... "); + + val sizez = pair.getRequiredMemory(); + if (sizez != null) { + AllocationsTracker.getInstance() + .markReleased( + AllocationKind.GENERAL, + Nd4j.getAffinityManager().getDeviceForCurrentThread(), + sizez); + MemoryTracker.getInstance() + .decrementWorkspaceAmount( + Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez); + } + } + } + } catch (Exception e) { + log.error( + "RC: Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...", + id, + Nd4j.getAffinityManager().getDeviceForCurrentThread(), + Thread.currentThread().getId(), + guid); + throw new RuntimeException(e); } - protected PointersPair workspace() { - return workspace; - } + spilledAllocationsSize.set(0); + externalCount.set(0); + externalAllocations.clear(); + } - protected Queue pinnedPointers() { - return pinnedAllocations; - } + @Override + protected void resetWorkspace() { + if (currentSize.get() < 1) {} - protected List externalPointers() { - return externalAllocations; - } + /* + if (Nd4j.getExecutioner() instanceof GridExecutioner) + ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking(); - @Override - public Deallocator deallocator() { - return new CudaWorkspaceDeallocator(this); - } + CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); - @Override - public String getUniqueId() { - return "Workspace_" + getId() + "_" + Nd4j.getDeallocatorService().nextValue(); - } + //log.info("workspace: {}, size: {}", workspace.getDevicePointer().address(), currentSize.get()); - @Override - public int targetDevice() { - return deviceId; - } + NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(workspace.getDevicePointer(), 0, currentSize.get() + SAFETY_OFFSET, 0, context.getSpecialStream()); - @Override - public long getPrimaryOffset() { - return getDeviceOffset(); - } + Pointer.memset(workspace.getHostPointer(), 0, currentSize.get() + SAFETY_OFFSET); + + context.getSpecialStream().synchronize(); + */ + } + + protected PointersPair workspace() { + return workspace; + } + + protected Queue pinnedPointers() { + return pinnedAllocations; + } + + protected List externalPointers() { + return externalAllocations; + } + + @Override + public Deallocator deallocator() { + return new CudaWorkspaceDeallocator(this); + } + + @Override + public String getUniqueId() { + return "Workspace_" + getId() + "_" + Nd4j.getDeallocatorService().nextValue(); + } + + @Override + public int targetDevice() { + return deviceId; + } + + @Override + public long getPrimaryOffset() { + return getDeviceOffset(); + } } diff --git a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java index 806986fc7..41b936bf7 100644 --- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java +++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java @@ -48,7 +48,7 @@ public class CudaWorkspaceDeallocator implements Deallocator { @Override public void deallocate() { - log.trace("Deallocating CUDA workspace"); + log.debug("Deallocating CUDA workspace"); // purging workspace planes if (pointersPair != null) { diff --git a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java index 612dfdda8..5524bddbc 100644 --- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java +++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java @@ -1582,7 +1582,7 @@ public class CudaExecutioner extends DefaultOpExecutioner { } if (nativeOps.lastErrorCode() != 0) - throw new RuntimeException(nativeOps.lastErrorMessage()); + throw new RuntimeException(nativeOps.lastErrorMessage() + " error code: " + nativeOps.lastErrorCode()); profilingConfigurableHookOut(op, oc, st); diff --git a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java index 3ba143e36..52af2eeb5 100644 --- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java +++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java @@ -56,7 +56,8 @@ public class CudaOpContext extends BaseOpContext implements OpContext, Deallocat @Override public void close() { - // no-op + nativeOps.ctxPurge(context); + context.deallocate(); } @Override