Playing with some new code 2 - clean build/test

Signed-off-by: brian <brian@brutex.de>
2023-04-17 09:41:12 +02:00 · 2023-04-17 09:41:12 +02:00 · 82e65bdf59
commit 82e65bdf59
parent 9d4939ccfd
15 changed files with 473 additions and 359 deletions
--- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java
+++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java
@ -67,7 +67,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
    /**
     * Set arrays to be scoped out (not in any workspace) for the specified array type.
-     * This means that create, dup, leverage etc methods will return result arrays that are not attached to any workspace
+     * This means that create, dup, leverage etc. methods will return result arrays that are not attached to any workspace
     *
     * @param arrayType Array type to set scoped out for
     */
@ -120,7 +120,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
    boolean isWorkspaceOpen(T arrayType);
    /**
-     * Assert thath the workspace for the specified array type is open.
+     * Assert that the workspace for the specified array type is open.
     * For array types that are set to scoped out, this will be treated as a no-op
     * @param arrayType Array type to check
     * @param msg       May be null. If non-null: include this in the exception
@ -129,7 +129,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
    void assertOpen(T arrayType, String msg) throws ND4JWorkspaceException;
    /**
-     * Assert thath the workspace for the specified array type is not open.
+     * Assert that the workspace for the specified array type is not open.
     * For array types that are set to scoped out, this will be treated as a no-op
     * @param arrayType Array type to check
     * @param msg       May be null. If non-null: include this in the exception
@ -193,7 +193,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
    /**
     * Create an uninitialized array in the specified array type's workspace (or detached if none is specified).
-     * Equivalent to {@link org.nd4j.linalg.factory.Nd4j#createUninitialized(int)} (int...)}, other than the array location
+     * Equivalent to {@link org.nd4j.linalg.factory.Nd4j#createUninitialized(int...)}, other than the array location
     * @param arrayType Array type
     * @param dataType  Data type of the created array
     * @param shape     Shape
@ -231,7 +231,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
    /**
     * Cast the specified array to the specified datatype.<br>
-     * If the array is already the correct type, the bahaviour depends on the 'dupIfCorrectType' argument.<br>
+     * If the array is already the correct type, the behaviour depends on the 'dupIfCorrectType' argument.<br>
     * dupIfCorrectType = false && toCast.dataType() == dataType: return input array as-is (unless workspace is wrong)<br>
     * dupIfCorrectType = true && toCast.dataType() == dataType: duplicate the array into the specified workspace<br>
     * @param arrayType        Array type
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java
@ -81,7 +81,7 @@ public class EvaluationToolsTests extends BaseDL4JTest {
            String str = EvaluationTools.rocChartToHtml(roc);
-            //            System.out.println(str);
+            System.out.println(str);
        }
    }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java
@ -58,6 +58,8 @@ public class Cnn3DLossLayer extends FeedForwardLayer {
                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
        setNetConfiguration(conf);
        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
        runInheritance();
        org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer ret =
                        new org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer(lconf, networkDataType);
        ret.addTrainingListeners(trainingListeners);
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java
@ -63,6 +63,8 @@ public class CnnLossLayer extends FeedForwardLayer {
                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
        setNetConfiguration(conf);
        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
        runInheritance();
        org.deeplearning4j.nn.layers.convolution.CnnLossLayer ret =
                        new org.deeplearning4j.nn.layers.convolution.CnnLossLayer(lconf, networkDataType);
        ret.addTrainingListeners(trainingListeners);
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java
@ -77,7 +77,11 @@ public class GravesLSTM extends AbstractLSTM {
    public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
        LayerValidation.assertNInNOutSet("GravesLSTM", getLayerName(), layerIndex, getNIn(), getNOut());
        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
        lconf.setNetConfiguration(conf);
        runInheritance();
        org.deeplearning4j.nn.layers.recurrent.GravesLSTM ret =
                        new org.deeplearning4j.nn.layers.recurrent.GravesLSTM(lconf, networkDataType);
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java
@ -61,6 +61,8 @@ public class RnnLossLayer extends FeedForwardLayer {
                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
        lconf.setNetConfiguration(conf);
        runInheritance();
        org.deeplearning4j.nn.layers.recurrent.RnnLossLayer ret =
                        new org.deeplearning4j.nn.layers.recurrent.RnnLossLayer(lconf, networkDataType);
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
@ -135,6 +135,7 @@ public class SubsamplingLayer extends NoParamLayer {
                                                       Collection<TrainingListener> trainingListeners, int layerIndex, INDArray layerParamsView,
                                                       boolean initializeParams, DataType networkDataType) {
        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
        runInheritance();
        org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer ret =
                new org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer(lconf, networkDataType);
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
@ -24,6 +24,8 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 import lombok.*;
 import net.brutex.ai.dnn.api.IModel;
 import org.deeplearning4j.nn.api.ITraininableLayerConfiguration;
@ -328,13 +330,9 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
  @Override
  public void clearNoiseWeightParams() {}
-  public List<String> variables() {
+  public List<String> getVariables(boolean copy) {
    return variables;
  }
  public List<String> variables(boolean copy) {
    if (copy) {
-      return variables();
+      return new ArrayList<>(getVariables());
    }
    return variables;
  }
@ -585,7 +583,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
   */
  @Override
  public INDArray getParams() {
-    // throw new RuntimeException("Not implemented");
+    //throw new RuntimeException("Not implemented");
    return null;
  }
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
@ -184,6 +184,17 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>
    setParams(params, 'f');
  }
  /**
   * * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException
   * * will be triggered when calling this.
   *
   * @return 1d parameter vector
   */
  @Override
  public INDArray getParams() {
    return paramsFlattened;
  }
  /** */
  @Override
  public void close() {}
@ -358,7 +369,7 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>
  protected void setParams(INDArray params, char order) {
    if (params == null) {
-      log.warn(
+      log.trace(
          "setParams(INDArray params, char order): params is null. Skipping setParams in Layer {}[{}] at index {}",
          getLayerConfiguration().getLayerName(),
          getClass().getSimpleName(),
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java
@ -110,14 +110,14 @@ public class DefaultParamInitializer extends AbstractParamInitializer {
        INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams));
        params.put(WEIGHT_KEY, createWeightMatrix(layerConf, weightView, initializeParams));
-        layerConf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
+        layerConf.addVariable(WEIGHT_KEY);
        long offset = nWeightParams;
        if(hasBias(layerConf)){
            INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(offset, offset + nOut));
            params.put(BIAS_KEY, createBias(layerConf, biasView, initializeParams));
-            layerConf.getNetConfiguration().addNetWideVariable(BIAS_KEY);
+            layerConf.addVariable(BIAS_KEY);
            offset += nOut;
        }
@ -125,7 +125,7 @@ public class DefaultParamInitializer extends AbstractParamInitializer {
            INDArray gainView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(offset, offset + nOut));
            params.put(GAIN_KEY, createGain(conf, gainView, initializeParams));
-            conf.getNetConfiguration().addNetWideVariable(GAIN_KEY);
+            conf.addVariable(GAIN_KEY);
        }
        return params;
--- a/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java
+++ b/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java
@ -50,7 +50,8 @@ public class CpuOpContext extends BaseOpContext implements OpContext, Deallocata
    @Override
    public void close() {
-        // no-op
+        nativeOps.ctxPurge(context);
        context.deallocate();
    }
    @Override
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java
@ -20,6 +20,8 @@
 package org.nd4j.jita.workspace;
 import java.util.List;
 import java.util.Queue;
 import lombok.NonNull;
 import lombok.extern.slf4j.Slf4j;
 import lombok.val;
@ -39,10 +41,6 @@ import org.nd4j.linalg.exception.ND4JIllegalStateException;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.nativeblas.NativeOpsHolder;
 import java.util.List;
 import java.util.Queue;
 /**
 * CUDA-aware MemoryWorkspace implementation
 *
@ -51,395 +49,489 @@ import java.util.Queue;
@Slf4j
 public class CudaWorkspace extends Nd4jWorkspace {
  public CudaWorkspace(@NonNull WorkspaceConfiguration configuration) {
    super(configuration);
  }
-    public CudaWorkspace(@NonNull WorkspaceConfiguration configuration) {
+  public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId) {
-        super(configuration);
+    super(configuration, workspaceId);
  }
  public CudaWorkspace(
      @NonNull WorkspaceConfiguration configuration,
      @NonNull String workspaceId,
      Integer deviceId) {
    super(configuration, workspaceId);
    this.deviceId = deviceId;
  }
  @Override
  protected void init() {
    if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) {
      throw new ND4JIllegalStateException("CUDA do not support MMAP workspaces yet");
    }
-    public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId) {
+    super.init();
-        super(configuration, workspaceId);
+
    if (currentSize.get() > 0) {
      log.debug("Allocating {} bytes at DEVICE & HOST space...", currentSize.get());
      isInit.set(true);
      long bytes = currentSize.get();
      log.debug(
          "Allocating [{}] workspace on device_{}, {} bytes...",
          id,
          Nd4j.getAffinityManager().getDeviceForCurrentThread(),
          bytes);
      if (isDebug.get()) {
        Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
      }
      Pointer ptr = memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.HOST, false);
      if (ptr == null) throw new ND4JIllegalStateException("Can't allocate memory for workspace");
      workspace.setHostPointer(new PagedPointer(ptr));
      if (workspaceConfiguration.getPolicyMirroring() != MirroringPolicy.HOST_ONLY) {
        workspace.setDevicePointer(
            new PagedPointer(
                memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.DEVICE, false)));
        AllocationsTracker.getInstance()
            .markAllocated(
                AllocationKind.GENERAL,
                Nd4j.getAffinityManager().getDeviceForCurrentThread(),
                bytes + SAFETY_OFFSET);
        MemoryTracker.getInstance()
            .incrementWorkspaceAllocatedAmount(
                Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET);
        // if base pointer isn't aligned to 16 bytes (128 bits) - adjust the offfset then
        val addr = workspace.getDevicePointer().address();
        val div = addr % alignmentBase;
        if (div != 0) {
          deviceOffset.set(alignmentBase - div);
          hostOffset.set(alignmentBase - div);
        }
      }
    }
  }
  @Override
  public PagedPointer alloc(long requiredMemory, DataType type, boolean initialize) {
    return this.alloc(requiredMemory, MemoryKind.DEVICE, type, initialize);
  }
  @Override
  public synchronized void destroyWorkspace(boolean extended) {
    val size = currentSize.getAndSet(0);
    reset();
    if (extended) clearExternalAllocations();
    clearPinnedAllocations(extended);
    if (workspace.getHostPointer() != null)
      NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(workspace.getHostPointer());
    if (workspace.getDevicePointer() != null) {
      NativeOpsHolder.getInstance()
          .getDeviceNativeOps()
          .freeDevice(workspace.getDevicePointer(), 0);
      AllocationsTracker.getInstance()
          .markReleased(
              AllocationKind.GENERAL,
              Nd4j.getAffinityManager().getDeviceForCurrentThread(),
              size + SAFETY_OFFSET);
      MemoryTracker.getInstance()
          .decrementWorkspaceAmount(
              Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET);
    }
-    public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId, Integer deviceId) {
+    workspace.setDevicePointer(null);
-        super(configuration, workspaceId);
+    workspace.setHostPointer(null);
-        this.deviceId = deviceId;
+  }
  @Override
  public PagedPointer alloc(
      long requiredMemory, MemoryKind kind, DataType type, boolean initialize) {
    long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
    // alignment
    if (requiredMemory % alignmentBase != 0)
      requiredMemory += alignmentBase - (requiredMemory % alignmentBase);
    if (!isUsed.get()) {
      if (disabledCounter.incrementAndGet() % 10 == 0)
        log.warn(
            "Workspace was turned off, and wasn't enabled after {} allocations",
            disabledCounter.get());
      if (kind == MemoryKind.DEVICE) {
        val pointer =
            new PagedPointer(
                memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
        externalAllocations.add(new PointersPair(null, pointer));
        MemoryTracker.getInstance()
            .incrementWorkspaceAllocatedAmount(
                Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
        return pointer;
      } else {
        val pointer =
            new PagedPointer(
                memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
        externalAllocations.add(new PointersPair(pointer, null));
        return pointer;
      }
    }
-    @Override
+    boolean trimmer =
-    protected void init() {
+        (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED
-        if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) {
+                && requiredMemory + cycleAllocations.get() > initialBlockSize.get()
-            throw new ND4JIllegalStateException("CUDA do not support MMAP workspaces yet");
+                && initialBlockSize.get() > 0
                && kind == MemoryKind.DEVICE)
            || trimmedMode.get();
    if (trimmer
        && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE
        && !trimmedMode.get()) {
      trimmedMode.set(true);
      trimmedStep.set(stepsCount.get());
    }
    if (kind == MemoryKind.DEVICE) {
      if (deviceOffset.get() + requiredMemory <= currentSize.get()
          && !trimmer
          && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
        cycleAllocations.addAndGet(requiredMemory);
        long prevOffset = deviceOffset.getAndAdd(requiredMemory);
        if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY) return null;
        val ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
        log.debug(
            "Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}",
            id,
            Nd4j.getAffinityManager().getDeviceForCurrentThread(),
            requiredMemory,
            numElements,
            prevOffset,
            deviceOffset.get(),
            currentSize.get(),
            ptr.address());
        if (initialize) {
          val context = AtomicAllocator.getInstance().getDeviceContext();
          int ret =
              NativeOpsHolder.getInstance()
                  .getDeviceNativeOps()
                  .memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
          if (ret == 0)
            throw new ND4JIllegalStateException(
                "memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
          context.syncSpecialStream();
        }
-        super.init();
+        return ptr;
      } else {
-        if (currentSize.get() > 0) {
+        // spill
-            //log.info("Allocating {} bytes at DEVICE & HOST space...", currentSize.get());
+        if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED
-            isInit.set(true);
+            && currentSize.get() > 0
-
+            && !trimmer
-            long bytes = currentSize.get();
+            && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
-
+          // log.info("End of space reached. Current offset: {}; requiredMemory: {}",
-            if (isDebug.get())
+          // deviceOffset.get(), requiredMemory);
-                log.info("Allocating [{}] workspace on device_{}, {} bytes...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes);
+          deviceOffset.set(0);
-
+          resetPlanned.set(true);
-            if (isDebug.get()) {
+          return alloc(requiredMemory, kind, type, initialize);
                Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
            }
            Pointer ptr = memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.HOST, false);
            if (ptr == null)
                throw new ND4JIllegalStateException("Can't allocate memory for workspace");
            workspace.setHostPointer(new PagedPointer(ptr));
            if (workspaceConfiguration.getPolicyMirroring() != MirroringPolicy.HOST_ONLY) {
                workspace.setDevicePointer(new PagedPointer(memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.DEVICE, false)));
                AllocationsTracker.getInstance().markAllocated(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET);
                MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET);
                // if base pointer isn't aligned to 16 bytes (128 bits) - adjust the offfset then
                val addr = workspace.getDevicePointer().address();
                val div = addr % alignmentBase;
                if (div != 0) {
                    deviceOffset.set(alignmentBase - div);
                    hostOffset.set(alignmentBase - div);
                }
            }
        }
    }
    @Override
    public PagedPointer alloc(long requiredMemory, DataType type, boolean initialize) {
 	    return this.alloc(requiredMemory, MemoryKind.DEVICE, type, initialize);
    }
    @Override
    public synchronized void destroyWorkspace(boolean extended) {
        val size = currentSize.getAndSet(0);
        reset();
        if (extended)
            clearExternalAllocations();
        clearPinnedAllocations(extended);
        if (workspace.getHostPointer() != null)
            NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(workspace.getHostPointer());
        if (workspace.getDevicePointer() != null) {
            NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(workspace.getDevicePointer(), 0);
            AllocationsTracker.getInstance().markReleased(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET);
            MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET);
        }
-        workspace.setDevicePointer(null);
+        if (!trimmer) spilledAllocationsSize.addAndGet(requiredMemory);
-        workspace.setHostPointer(null);
+        else pinnedAllocationsSize.addAndGet(requiredMemory);
-    }
+        log.debug(
            "Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements",
            id,
            Nd4j.getAffinityManager().getDeviceForCurrentThread(),
            requiredMemory,
            numElements);
        val shape =
            new AllocationShape(
                requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
-    @Override
+        cycleAllocations.addAndGet(requiredMemory);
    public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataType type, boolean initialize) {
        long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
-        // alignment
+        if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY) return null;
        if (requiredMemory % alignmentBase != 0)
            requiredMemory += alignmentBase - (requiredMemory % alignmentBase);
-        if (!isUsed.get()) {
+        switch (workspaceConfiguration.getPolicySpill()) {
-            if (disabledCounter.incrementAndGet() % 10 == 0)
+          case REALLOCATE:
-                log.warn("Worskpace was turned off, and wasn't enabled after {} allocations", disabledCounter.get());
+          case EXTERNAL:
            if (!trimmer) {
              externalCount.incrementAndGet();
              //
              // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape,
              // null, AllocationStatus.DEVICE).getDevicePointer()
              val pointer =
                  new PagedPointer(
                      memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize),
                      numElements);
              pointer.isLeaked();
-            if (kind == MemoryKind.DEVICE) {
+              val pp = new PointersPair(null, pointer);
-                val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
+              pp.setRequiredMemory(requiredMemory);
-                externalAllocations.add(new PointersPair(null, pointer));
+              externalAllocations.add(pp);
-                MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
+
-                return pointer;
+              MemoryTracker.getInstance()
                  .incrementWorkspaceAllocatedAmount(
                      Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
              return pointer;
            } else {
-                val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
+              pinnedCount.incrementAndGet();
-                externalAllocations.add(new PointersPair(pointer, null));
+
-                return pointer;
+              val pointer =
                  new PagedPointer(
                      memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize),
                      numElements);
              pointer.isLeaked();
              pinnedAllocations.add(
                  new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
              MemoryTracker.getInstance()
                  .incrementWorkspaceAllocatedAmount(
                      Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
              return pointer;
            }
          case FAIL:
          default:
            {
              throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
            }
        }
      }
    } else if (kind == MemoryKind.HOST) {
      if (hostOffset.get() + requiredMemory <= currentSize.get()
          && !trimmer
          && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
        long prevOffset = hostOffset.getAndAdd(requiredMemory);
        val ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
        // && workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY
        if (initialize) Pointer.memset(ptr, 0, requiredMemory);
        return ptr;
      } else {
        //     log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory,
        // numElements);
        if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED
            && currentSize.get() > 0
            && !trimmer
            && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
          // log.info("End of space reached. Current offset: {}; requiredMemory: {}",
          // deviceOffset.get(), requiredMemory);
          hostOffset.set(0);
          // resetPlanned.set(true);
          return alloc(requiredMemory, kind, type, initialize);
        }
-        boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0 && kind == MemoryKind.DEVICE) || trimmedMode.get();
+        val shape =
            new AllocationShape(
                requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
-        if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) {
+        switch (workspaceConfiguration.getPolicySpill()) {
-            trimmedMode.set(true);
+          case REALLOCATE:
-            trimmedStep.set(stepsCount.get());
+          case EXTERNAL:
-        }
+            if (!trimmer) {
              // memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
              // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape,
              // null, AllocationStatus.DEVICE).getDevicePointer()
              PagedPointer pointer =
                  new PagedPointer(
                      memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize),
                      numElements);
-        if (kind == MemoryKind.DEVICE) {
+              externalAllocations.add(new PointersPair(pointer, null));
-            if (deviceOffset.get() + requiredMemory <= currentSize.get() && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
+              return pointer;
                cycleAllocations.addAndGet(requiredMemory);
                long prevOffset = deviceOffset.getAndAdd(requiredMemory);
                if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
                    return null;
                val ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
                if (isDebug.get())
                    log.info("Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements, prevOffset, deviceOffset.get(), currentSize.get(), ptr.address());
                if (initialize) {
                    val context = AtomicAllocator.getInstance().getDeviceContext();
                    int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
                    if (ret == 0)
                        throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
                    context.syncSpecialStream();
                }
                return ptr;
            } else {
              // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape,
              // null, AllocationStatus.DEVICE).getDevicePointer()
              PagedPointer pointer =
                  new PagedPointer(
                      memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize),
                      numElements);
              pointer.isLeaked();
-                // spill
+              pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
-                if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
+              return pointer;
                    //log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
                    deviceOffset.set(0);
                    resetPlanned.set(true);
                    return alloc(requiredMemory, kind, type, initialize);
                }
                if (!trimmer)
                    spilledAllocationsSize.addAndGet(requiredMemory);
                else
                    pinnedAllocationsSize.addAndGet(requiredMemory);
                if (isDebug.get()) {
                    log.info("Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements);
                }
                val shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
                cycleAllocations.addAndGet(requiredMemory);
                if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
                    return null;
                switch (workspaceConfiguration.getPolicySpill()) {
                    case REALLOCATE:
                    case EXTERNAL:
                        if (!trimmer) {
                            externalCount.incrementAndGet();
                            //
                            //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                            val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
                            pointer.isLeaked();
                            val pp = new PointersPair(null, pointer);
                            pp.setRequiredMemory(requiredMemory);
                            externalAllocations.add(pp);
                            MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
                            return pointer;
                        } else {
                            pinnedCount.incrementAndGet();
                            val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
                            pointer.isLeaked();
                            pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
                            MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
                            return pointer;
                        }
                    case FAIL:
                    default: {
                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
                    }
                }
            }
-        } else if (kind == MemoryKind.HOST) {
+          case FAIL:
-            if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
+          default:
-
+            {
-                long prevOffset = hostOffset.getAndAdd(requiredMemory);
+              throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
                val ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
                // && workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY
                if (initialize)
                    Pointer.memset(ptr, 0, requiredMemory);
                return ptr;
            } else {
           //     log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory, numElements);
                if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
                    //log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
                    hostOffset.set(0);
                    //resetPlanned.set(true);
                    return alloc(requiredMemory, kind, type, initialize);
                }
                val shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
                switch (workspaceConfiguration.getPolicySpill()) {
                    case REALLOCATE:
                    case EXTERNAL:
                        if (!trimmer) {
                            //memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
                            //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
                            externalAllocations.add(new PointersPair(pointer, null));
                            return pointer;
                        } else {
                            //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
                            pointer.isLeaked();
                            pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
                            return pointer;
                        }
                    case FAIL:
                    default: {
                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
                    }
                }
            }
        } else throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);
        //throw new ND4JIllegalStateException("Shouldn't ever reach this line");
    }
    @Override
    protected void clearPinnedAllocations(boolean extended) {
        if (isDebug.get())
            log.info("Workspace [{}] device_{} threadId {} cycle {}: clearing pinned allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), cyclesCount.get());
        while (!pinnedAllocations.isEmpty()) {
            val pair = pinnedAllocations.peek();
            if (pair == null)
                throw new RuntimeException();
            long stepNumber = pair.getAllocationCycle();
            long stepCurrent = stepsCount.get();
            if (isDebug.get())
                log.info("Allocation step: {}; Current step: {}", stepNumber, stepCurrent);
            if (stepNumber + 2 < stepCurrent || extended) {
                pinnedAllocations.remove();
                if (pair.getDevicePointer() != null) {
                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
                    MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), pair.getRequiredMemory());
                    pinnedCount.decrementAndGet();
                    if (isDebug.get())
                        log.info("deleting external device allocation ");
                }
                if (pair.getHostPointer() != null) {
                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());
                    if (isDebug.get())
                        log.info("deleting external host allocation ");
                }
                val sizez = pair.getRequiredMemory() * -1;
                pinnedAllocationsSize.addAndGet(sizez);
            } else {
                break;
            }
        }
-    }
+      }
    } else throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);
-    @Override
+    // throw new ND4JIllegalStateException("Shouldn't ever reach this line");
-    protected void clearExternalAllocations() {
+  }
        if (isDebug.get())
            log.info("Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), guid);
-        Nd4j.getExecutioner().commit();
+  @Override
  protected void clearPinnedAllocations(boolean extended) {
-        try {
+    log.debug(
-            for (PointersPair pair : externalAllocations) {
+        "Workspace [{}] device_{} threadId {} cycle {}: clearing pinned allocations...",
-                if (pair.getHostPointer() != null) {
+        id,
-                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());
+        Nd4j.getAffinityManager().getDeviceForCurrentThread(),
        Thread.currentThread().getId(),
        cyclesCount.get());
-                    if (isDebug.get())
+    while (!pinnedAllocations.isEmpty()) {
-                        log.info("deleting external host allocation... ");
+      val pair = pinnedAllocations.peek();
-                }
+      if (pair == null) throw new RuntimeException();
-                if (pair.getDevicePointer() != null) {
+      long stepNumber = pair.getAllocationCycle();
-                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
+      long stepCurrent = stepsCount.get();
-                    if (isDebug.get())
+      log.debug("Allocation step: {}; Current step: {}", stepNumber, stepCurrent);
                        log.info("deleting external device allocation... ");
-                    val sizez = pair.getRequiredMemory();
+      if (stepNumber + 2 < stepCurrent || extended) {
-                    if (sizez != null) {
+        pinnedAllocations.remove();
-                        AllocationsTracker.getInstance().markReleased(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez);
+
-                        MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez);
+        if (pair.getDevicePointer() != null) {
-                    }
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
-                }
+          MemoryTracker.getInstance()
-            }
+              .decrementWorkspaceAmount(
-        } catch (Exception e) {
+                  Nd4j.getAffinityManager().getDeviceForCurrentThread(), pair.getRequiredMemory());
-            log.error("RC: Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), guid);
+          pinnedCount.decrementAndGet();
-            throw new RuntimeException(e);
+
          log.debug("deleting external device allocation ");
        }
-        spilledAllocationsSize.set(0);
+        if (pair.getHostPointer() != null) {
-        externalCount.set(0);
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());
        externalAllocations.clear();
    }
-    @Override
+          log.debug("deleting external host allocation ");
    protected void resetWorkspace() {
        if (currentSize.get() < 1) {
        }
        val sizez = pair.getRequiredMemory() * -1;
        pinnedAllocationsSize.addAndGet(sizez);
      } else {
        break;
      }
    }
  }
-/*
+  @Override
-        if (Nd4j.getExecutioner() instanceof GridExecutioner)
+  protected void clearExternalAllocations() {
            ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
-        CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
+    log.debug(
        "Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...",
        id,
        Nd4j.getAffinityManager().getDeviceForCurrentThread(),
        Thread.currentThread().getId(),
        guid);
-        //log.info("workspace: {}, size: {}", workspace.getDevicePointer().address(), currentSize.get());
+    Nd4j.getExecutioner().commit();
-        NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(workspace.getDevicePointer(), 0, currentSize.get() + SAFETY_OFFSET, 0, context.getSpecialStream());
+    try {
      for (PointersPair pair : externalAllocations) {
        if (pair.getHostPointer() != null) {
          NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());
-        Pointer.memset(workspace.getHostPointer(), 0, currentSize.get() + SAFETY_OFFSET);
+          log.debug("deleting external host allocation... ");
        }
-        context.getSpecialStream().synchronize();
+        if (pair.getDevicePointer() != null) {
-        */
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
          log.debug("deleting external device allocation... ");
          val sizez = pair.getRequiredMemory();
          if (sizez != null) {
            AllocationsTracker.getInstance()
                .markReleased(
                    AllocationKind.GENERAL,
                    Nd4j.getAffinityManager().getDeviceForCurrentThread(),
                    sizez);
            MemoryTracker.getInstance()
                .decrementWorkspaceAmount(
                    Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez);
          }
        }
      }
    } catch (Exception e) {
      log.error(
          "RC: Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...",
          id,
          Nd4j.getAffinityManager().getDeviceForCurrentThread(),
          Thread.currentThread().getId(),
          guid);
      throw new RuntimeException(e);
    }
-    protected PointersPair workspace() {
+    spilledAllocationsSize.set(0);
-        return workspace;
+    externalCount.set(0);
-    }
+    externalAllocations.clear();
  }
-    protected Queue<PointersPair> pinnedPointers() {
+  @Override
-        return pinnedAllocations;
+  protected void resetWorkspace() {
-    }
+    if (currentSize.get() < 1) {}
-    protected List<PointersPair> externalPointers() {
+    /*
-        return externalAllocations;
+    if (Nd4j.getExecutioner() instanceof GridExecutioner)
-    }
+        ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
-    @Override
+    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
    public Deallocator deallocator() {
        return new CudaWorkspaceDeallocator(this);
    }
-    @Override
+    //log.info("workspace: {}, size: {}", workspace.getDevicePointer().address(), currentSize.get());
    public String getUniqueId() {
        return "Workspace_" + getId() + "_" + Nd4j.getDeallocatorService().nextValue();
    }
-    @Override
+    NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(workspace.getDevicePointer(), 0, currentSize.get() + SAFETY_OFFSET, 0, context.getSpecialStream());
    public int targetDevice() {
        return deviceId;
    }
-    @Override
+    Pointer.memset(workspace.getHostPointer(), 0, currentSize.get() + SAFETY_OFFSET);
-    public long getPrimaryOffset() {
+
-        return getDeviceOffset();
+    context.getSpecialStream().synchronize();
-    }
+    */
  }
  protected PointersPair workspace() {
    return workspace;
  }
  protected Queue<PointersPair> pinnedPointers() {
    return pinnedAllocations;
  }
  protected List<PointersPair> externalPointers() {
    return externalAllocations;
  }
  @Override
  public Deallocator deallocator() {
    return new CudaWorkspaceDeallocator(this);
  }
  @Override
  public String getUniqueId() {
    return "Workspace_" + getId() + "_" + Nd4j.getDeallocatorService().nextValue();
  }
  @Override
  public int targetDevice() {
    return deviceId;
  }
  @Override
  public long getPrimaryOffset() {
    return getDeviceOffset();
  }
 }
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java
@ -48,7 +48,7 @@ public class CudaWorkspaceDeallocator implements Deallocator {
    @Override
    public void deallocate() {
-        log.trace("Deallocating CUDA workspace");
+        log.debug("Deallocating CUDA workspace");
        // purging workspace planes
        if (pointersPair != null) {
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
@ -1582,7 +1582,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
        }
        if (nativeOps.lastErrorCode() != 0)
-            throw new RuntimeException(nativeOps.lastErrorMessage());
+            throw new RuntimeException(nativeOps.lastErrorMessage() + " error code: " + nativeOps.lastErrorCode());
        profilingConfigurableHookOut(op, oc, st);
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java
@ -56,7 +56,8 @@ public class CudaOpContext extends BaseOpContext implements OpContext, Deallocat
    @Override
    public void close() {
-        // no-op
+        nativeOps.ctxPurge(context);
        context.deallocate();
    }
    @Override