From 82e65bdf59fc5bd83df94703c38ec4ef4e2d264b Mon Sep 17 00:00:00 2001
From: brian <brian@brutex.de>
Date: Mon, 17 Apr 2023 09:41:12 +0200
Subject: [PATCH] Playing with some new code 2 - clean build/test

Signed-off-by: brian <brian@brutex.de>
---
 .../nd4j/linalg/workspace/WorkspaceMgr.java   |  10 +-
 .../eval/EvaluationToolsTests.java            |   2 +-
 .../nn/conf/layers/Cnn3DLossLayer.java        |   2 +
 .../nn/conf/layers/CnnLossLayer.java          |   2 +
 .../nn/conf/layers/GravesLSTM.java            |   4 +
 .../nn/conf/layers/RnnLossLayer.java          |   2 +
 .../nn/conf/layers/SubsamplingLayer.java      |   1 +
 .../nn/layers/AbstractLayer.java              |  12 +-
 .../deeplearning4j/nn/layers/BaseLayer.java   |  13 +-
 .../nn/params/DefaultParamInitializer.java    |   6 +-
 .../cpu/nativecpu/ops/CpuOpContext.java       |   3 +-
 .../nd4j/jita/workspace/CudaWorkspace.java    | 768 ++++++++++--------
 .../workspace/CudaWorkspaceDeallocator.java   |   2 +-
 .../ops/executioner/CudaExecutioner.java      |   2 +-
 .../ops/executioner/CudaOpContext.java        |   3 +-
 15 files changed, 473 insertions(+), 359 deletions(-)
diff --git a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java
index 90b77f449..f096a1f6f 100644
--- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java
+++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java
@@ -67,7 +67,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
 
     /**
      * Set arrays to be scoped out (not in any workspace) for the specified array type.
-     * This means that create, dup, leverage etc methods will return result arrays that are not attached to any workspace
+     * This means that create, dup, leverage etc. methods will return result arrays that are not attached to any workspace
      *
      * @param arrayType Array type to set scoped out for
      */
@@ -120,7 +120,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
     boolean isWorkspaceOpen(T arrayType);
 
     /**
-     * Assert thath the workspace for the specified array type is open.
+     * Assert that the workspace for the specified array type is open.
      * For array types that are set to scoped out, this will be treated as a no-op
      * @param arrayType Array type to check
      * @param msg       May be null. If non-null: include this in the exception
@@ -129,7 +129,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
     void assertOpen(T arrayType, String msg) throws ND4JWorkspaceException;
 
     /**
-     * Assert thath the workspace for the specified array type is not open.
+     * Assert that the workspace for the specified array type is not open.
      * For array types that are set to scoped out, this will be treated as a no-op
      * @param arrayType Array type to check
      * @param msg       May be null. If non-null: include this in the exception
@@ -193,7 +193,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
 
     /**
      * Create an uninitialized array in the specified array type's workspace (or detached if none is specified).
-     * Equivalent to {@link org.nd4j.linalg.factory.Nd4j#createUninitialized(int)} (int...)}, other than the array location
+     * Equivalent to {@link org.nd4j.linalg.factory.Nd4j#createUninitialized(int...)}, other than the array location
      * @param arrayType Array type
      * @param dataType  Data type of the created array
      * @param shape     Shape
@@ -231,7 +231,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
 
     /**
      * Cast the specified array to the specified datatype.<br>
-     * If the array is already the correct type, the bahaviour depends on the 'dupIfCorrectType' argument.<br>
+     * If the array is already the correct type, the behaviour depends on the 'dupIfCorrectType' argument.<br>
      * dupIfCorrectType = false && toCast.dataType() == dataType: return input array as-is (unless workspace is wrong)<br>
      * dupIfCorrectType = true && toCast.dataType() == dataType: duplicate the array into the specified workspace<br>
      * @param arrayType        Array type
diff --git a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java
index aa9b2686f..7cf2431f9 100644
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java
@@ -81,7 +81,7 @@ public class EvaluationToolsTests extends BaseDL4JTest {
 
 
             String str = EvaluationTools.rocChartToHtml(roc);
-            //            System.out.println(str);
+            System.out.println(str);
         }
     }
 
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java
index 79782d956..8ae76bd41 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java
@@ -58,6 +58,8 @@ public class Cnn3DLossLayer extends FeedForwardLayer {
                              int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
         setNetConfiguration(conf);
         LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+        runInheritance();
+
         org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer ret =
                         new org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer(lconf, networkDataType);
         ret.addTrainingListeners(trainingListeners);
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java
index b4f93482d..50e917dac 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java
@@ -63,6 +63,8 @@ public class CnnLossLayer extends FeedForwardLayer {
                              int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
         setNetConfiguration(conf);
         LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+        runInheritance();
+
         org.deeplearning4j.nn.layers.convolution.CnnLossLayer ret =
                         new org.deeplearning4j.nn.layers.convolution.CnnLossLayer(lconf, networkDataType);
         ret.addTrainingListeners(trainingListeners);
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java
index 1cdd16dba..9c50ccba4 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java
@@ -77,7 +77,11 @@ public class GravesLSTM extends AbstractLSTM {
     public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
                              int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
         LayerValidation.assertNInNOutSet("GravesLSTM", getLayerName(), layerIndex, getNIn(), getNOut());
+
         LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+        lconf.setNetConfiguration(conf);
+        runInheritance();
+
         org.deeplearning4j.nn.layers.recurrent.GravesLSTM ret =
                         new org.deeplearning4j.nn.layers.recurrent.GravesLSTM(lconf, networkDataType);
 
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java
index 4742b9e5b..e7db009ed 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java
@@ -61,6 +61,8 @@ public class RnnLossLayer extends FeedForwardLayer {
                              int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
 
         LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+        lconf.setNetConfiguration(conf);
+        runInheritance();
 
         org.deeplearning4j.nn.layers.recurrent.RnnLossLayer ret =
                         new org.deeplearning4j.nn.layers.recurrent.RnnLossLayer(lconf, networkDataType);
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
index bddd9fc30..55e766133 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
@@ -135,6 +135,7 @@ public class SubsamplingLayer extends NoParamLayer {
                                                        Collection<TrainingListener> trainingListeners, int layerIndex, INDArray layerParamsView,
                                                        boolean initializeParams, DataType networkDataType) {
         LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+        runInheritance();
 
         org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer ret =
                 new org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer(lconf, networkDataType);
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
index 9774b8c07..d14f20d85 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
@@ -24,6 +24,8 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
+
 import lombok.*;
 import net.brutex.ai.dnn.api.IModel;
 import org.deeplearning4j.nn.api.ITraininableLayerConfiguration;
@@ -328,13 +330,9 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
   @Override
   public void clearNoiseWeightParams() {}
 
-  public List<String> variables() {
-    return variables;
-  }
-
-  public List<String> variables(boolean copy) {
+  public List<String> getVariables(boolean copy) {
     if (copy) {
-      return variables();
+      return new ArrayList<>(getVariables());
     }
     return variables;
   }
@@ -585,7 +583,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
    */
   @Override
   public INDArray getParams() {
-    // throw new RuntimeException("Not implemented");
+    //throw new RuntimeException("Not implemented");
     return null;
   }
 
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
index 01aede19f..1a055c528 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
@@ -184,6 +184,17 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>
     setParams(params, 'f');
   }
 
+  /**
+   * * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException
+   * * will be triggered when calling this.
+   *
+   * @return 1d parameter vector
+   */
+  @Override
+  public INDArray getParams() {
+    return paramsFlattened;
+  }
+
   /** */
   @Override
   public void close() {}
@@ -358,7 +369,7 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>
 
   protected void setParams(INDArray params, char order) {
     if (params == null) {
-      log.warn(
+      log.trace(
           "setParams(INDArray params, char order): params is null. Skipping setParams in Layer {}[{}] at index {}",
           getLayerConfiguration().getLayerName(),
           getClass().getSimpleName(),
diff --git a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java
index 954ddc250..a7f444c91 100644
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java
@@ -110,14 +110,14 @@ public class DefaultParamInitializer extends AbstractParamInitializer {
         INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams));
 
         params.put(WEIGHT_KEY, createWeightMatrix(layerConf, weightView, initializeParams));
-        layerConf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
+        layerConf.addVariable(WEIGHT_KEY);
 
         long offset = nWeightParams;
         if(hasBias(layerConf)){
             INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                     NDArrayIndex.interval(offset, offset + nOut));
             params.put(BIAS_KEY, createBias(layerConf, biasView, initializeParams));
-            layerConf.getNetConfiguration().addNetWideVariable(BIAS_KEY);
+            layerConf.addVariable(BIAS_KEY);
             offset += nOut;
         }
 
@@ -125,7 +125,7 @@ public class DefaultParamInitializer extends AbstractParamInitializer {
             INDArray gainView = paramsView.get(NDArrayIndex.interval(0,0,true),
                     NDArrayIndex.interval(offset, offset + nOut));
             params.put(GAIN_KEY, createGain(conf, gainView, initializeParams));
-            conf.getNetConfiguration().addNetWideVariable(GAIN_KEY);
+            conf.addVariable(GAIN_KEY);
         }
 
         return params;
diff --git a/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java b/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java
index d6ddf49de..2c1a5860c 100644
--- a/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java
+++ b/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java
@@ -50,7 +50,8 @@ public class CpuOpContext extends BaseOpContext implements OpContext, Deallocata
 
     @Override
     public void close() {
-        // no-op
+        nativeOps.ctxPurge(context);
+        context.deallocate();
     }
 
     @Override
diff --git a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java
index 39dad7bd4..94c2ca71b 100644
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java
@@ -20,6 +20,8 @@
 
 package org.nd4j.jita.workspace;
 
+import java.util.List;
+import java.util.Queue;
 import lombok.NonNull;
 import lombok.extern.slf4j.Slf4j;
 import lombok.val;
@@ -39,10 +41,6 @@ import org.nd4j.linalg.exception.ND4JIllegalStateException;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.nativeblas.NativeOpsHolder;
 
-import java.util.List;
-import java.util.Queue;
-
-
 /**
  * CUDA-aware MemoryWorkspace implementation
  *
@@ -51,395 +49,489 @@ import java.util.Queue;
 @Slf4j
 public class CudaWorkspace extends Nd4jWorkspace {
 
+  public CudaWorkspace(@NonNull WorkspaceConfiguration configuration) {
+    super(configuration);
+  }
 
-    public CudaWorkspace(@NonNull WorkspaceConfiguration configuration) {
-        super(configuration);
+  public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId) {
+    super(configuration, workspaceId);
+  }
+
+  public CudaWorkspace(
+      @NonNull WorkspaceConfiguration configuration,
+      @NonNull String workspaceId,
+      Integer deviceId) {
+    super(configuration, workspaceId);
+    this.deviceId = deviceId;
+  }
+
+  @Override
+  protected void init() {
+    if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) {
+      throw new ND4JIllegalStateException("CUDA do not support MMAP workspaces yet");
     }
 
-    public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId) {
-        super(configuration, workspaceId);
+    super.init();
+
+    if (currentSize.get() > 0) {
+      log.debug("Allocating {} bytes at DEVICE & HOST space...", currentSize.get());
+      isInit.set(true);
+
+      long bytes = currentSize.get();
+
+      log.debug(
+          "Allocating [{}] workspace on device_{}, {} bytes...",
+          id,
+          Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+          bytes);
+
+      if (isDebug.get()) {
+        Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
+      }
+
+      Pointer ptr = memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.HOST, false);
+      if (ptr == null) throw new ND4JIllegalStateException("Can't allocate memory for workspace");
+
+      workspace.setHostPointer(new PagedPointer(ptr));
+
+      if (workspaceConfiguration.getPolicyMirroring() != MirroringPolicy.HOST_ONLY) {
+        workspace.setDevicePointer(
+            new PagedPointer(
+                memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.DEVICE, false)));
+        AllocationsTracker.getInstance()
+            .markAllocated(
+                AllocationKind.GENERAL,
+                Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+                bytes + SAFETY_OFFSET);
+
+        MemoryTracker.getInstance()
+            .incrementWorkspaceAllocatedAmount(
+                Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET);
+
+        // if base pointer isn't aligned to 16 bytes (128 bits) - adjust the offfset then
+        val addr = workspace.getDevicePointer().address();
+        val div = addr % alignmentBase;
+        if (div != 0) {
+          deviceOffset.set(alignmentBase - div);
+          hostOffset.set(alignmentBase - div);
+        }
+      }
+    }
+  }
+
+  @Override
+  public PagedPointer alloc(long requiredMemory, DataType type, boolean initialize) {
+    return this.alloc(requiredMemory, MemoryKind.DEVICE, type, initialize);
+  }
+
+  @Override
+  public synchronized void destroyWorkspace(boolean extended) {
+    val size = currentSize.getAndSet(0);
+    reset();
+
+    if (extended) clearExternalAllocations();
+
+    clearPinnedAllocations(extended);
+
+    if (workspace.getHostPointer() != null)
+      NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(workspace.getHostPointer());
+
+    if (workspace.getDevicePointer() != null) {
+      NativeOpsHolder.getInstance()
+          .getDeviceNativeOps()
+          .freeDevice(workspace.getDevicePointer(), 0);
+      AllocationsTracker.getInstance()
+          .markReleased(
+              AllocationKind.GENERAL,
+              Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+              size + SAFETY_OFFSET);
+
+      MemoryTracker.getInstance()
+          .decrementWorkspaceAmount(
+              Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET);
     }
 
-    public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId, Integer deviceId) {
-        super(configuration, workspaceId);
-        this.deviceId = deviceId;
+    workspace.setDevicePointer(null);
+    workspace.setHostPointer(null);
+  }
+
+  @Override
+  public PagedPointer alloc(
+      long requiredMemory, MemoryKind kind, DataType type, boolean initialize) {
+    long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
+
+    // alignment
+    if (requiredMemory % alignmentBase != 0)
+      requiredMemory += alignmentBase - (requiredMemory % alignmentBase);
+
+    if (!isUsed.get()) {
+      if (disabledCounter.incrementAndGet() % 10 == 0)
+        log.warn(
+            "Workspace was turned off, and wasn't enabled after {} allocations",
+            disabledCounter.get());
+
+      if (kind == MemoryKind.DEVICE) {
+        val pointer =
+            new PagedPointer(
+                memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
+        externalAllocations.add(new PointersPair(null, pointer));
+        MemoryTracker.getInstance()
+            .incrementWorkspaceAllocatedAmount(
+                Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
+        return pointer;
+      } else {
+        val pointer =
+            new PagedPointer(
+                memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
+        externalAllocations.add(new PointersPair(pointer, null));
+        return pointer;
+      }
     }
 
-    @Override
-    protected void init() {
-        if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) {
-            throw new ND4JIllegalStateException("CUDA do not support MMAP workspaces yet");
+    boolean trimmer =
+        (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED
+                && requiredMemory + cycleAllocations.get() > initialBlockSize.get()
+                && initialBlockSize.get() > 0
+                && kind == MemoryKind.DEVICE)
+            || trimmedMode.get();
+
+    if (trimmer
+        && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE
+        && !trimmedMode.get()) {
+      trimmedMode.set(true);
+      trimmedStep.set(stepsCount.get());
+    }
+
+    if (kind == MemoryKind.DEVICE) {
+      if (deviceOffset.get() + requiredMemory <= currentSize.get()
+          && !trimmer
+          && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
+        cycleAllocations.addAndGet(requiredMemory);
+        long prevOffset = deviceOffset.getAndAdd(requiredMemory);
+
+        if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY) return null;
+
+        val ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
+
+        log.debug(
+            "Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}",
+            id,
+            Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+            requiredMemory,
+            numElements,
+            prevOffset,
+            deviceOffset.get(),
+            currentSize.get(),
+            ptr.address());
+
+        if (initialize) {
+          val context = AtomicAllocator.getInstance().getDeviceContext();
+
+          int ret =
+              NativeOpsHolder.getInstance()
+                  .getDeviceNativeOps()
+                  .memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
+          if (ret == 0)
+            throw new ND4JIllegalStateException(
+                "memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
+
+          context.syncSpecialStream();
         }
 
-        super.init();
+        return ptr;
+      } else {
 
-        if (currentSize.get() > 0) {
-            //log.info("Allocating {} bytes at DEVICE & HOST space...", currentSize.get());
-            isInit.set(true);
-
-            long bytes = currentSize.get();
-
-            if (isDebug.get())
-                log.info("Allocating [{}] workspace on device_{}, {} bytes...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes);
-
-            if (isDebug.get()) {
-                Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
-            }
-
-            Pointer ptr = memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.HOST, false);
-            if (ptr == null)
-                throw new ND4JIllegalStateException("Can't allocate memory for workspace");
-
-            workspace.setHostPointer(new PagedPointer(ptr));
-
-            if (workspaceConfiguration.getPolicyMirroring() != MirroringPolicy.HOST_ONLY) {
-                workspace.setDevicePointer(new PagedPointer(memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.DEVICE, false)));
-                AllocationsTracker.getInstance().markAllocated(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET);
-
-                MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET);
-
-                // if base pointer isn't aligned to 16 bytes (128 bits) - adjust the offfset then
-                val addr = workspace.getDevicePointer().address();
-                val div = addr % alignmentBase;
-                if (div != 0) {
-                    deviceOffset.set(alignmentBase - div);
-                    hostOffset.set(alignmentBase - div);
-                }
-            }
-        }
-    }
-
-    @Override
-    public PagedPointer alloc(long requiredMemory, DataType type, boolean initialize) {
-	    return this.alloc(requiredMemory, MemoryKind.DEVICE, type, initialize);
-    }
-
-
-    @Override
-    public synchronized void destroyWorkspace(boolean extended) {
-        val size = currentSize.getAndSet(0);
-        reset();
-
-        if (extended)
-            clearExternalAllocations();
-
-        clearPinnedAllocations(extended);
-
-        if (workspace.getHostPointer() != null)
-            NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(workspace.getHostPointer());
-
-        if (workspace.getDevicePointer() != null) {
-            NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(workspace.getDevicePointer(), 0);
-            AllocationsTracker.getInstance().markReleased(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET);
-
-            MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET);
+        // spill
+        if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED
+            && currentSize.get() > 0
+            && !trimmer
+            && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
+          // log.info("End of space reached. Current offset: {}; requiredMemory: {}",
+          // deviceOffset.get(), requiredMemory);
+          deviceOffset.set(0);
+          resetPlanned.set(true);
+          return alloc(requiredMemory, kind, type, initialize);
         }
 
-        workspace.setDevicePointer(null);
-        workspace.setHostPointer(null);
+        if (!trimmer) spilledAllocationsSize.addAndGet(requiredMemory);
+        else pinnedAllocationsSize.addAndGet(requiredMemory);
 
-    }
+        log.debug(
+            "Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements",
+            id,
+            Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+            requiredMemory,
+            numElements);
 
+        val shape =
+            new AllocationShape(
+                requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
 
-    @Override
-    public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataType type, boolean initialize) {
-        long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
+        cycleAllocations.addAndGet(requiredMemory);
 
-        // alignment
-        if (requiredMemory % alignmentBase != 0)
-            requiredMemory += alignmentBase - (requiredMemory % alignmentBase);
+        if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY) return null;
 
-        if (!isUsed.get()) {
-            if (disabledCounter.incrementAndGet() % 10 == 0)
-                log.warn("Worskpace was turned off, and wasn't enabled after {} allocations", disabledCounter.get());
+        switch (workspaceConfiguration.getPolicySpill()) {
+          case REALLOCATE:
+          case EXTERNAL:
+            if (!trimmer) {
+              externalCount.incrementAndGet();
+              //
+              // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape,
+              // null, AllocationStatus.DEVICE).getDevicePointer()
+              val pointer =
+                  new PagedPointer(
+                      memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize),
+                      numElements);
+              pointer.isLeaked();
 
-            if (kind == MemoryKind.DEVICE) {
-                val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
-                externalAllocations.add(new PointersPair(null, pointer));
-                MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
-                return pointer;
+              val pp = new PointersPair(null, pointer);
+              pp.setRequiredMemory(requiredMemory);
+              externalAllocations.add(pp);
+
+              MemoryTracker.getInstance()
+                  .incrementWorkspaceAllocatedAmount(
+                      Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
+              return pointer;
             } else {
-                val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
-                externalAllocations.add(new PointersPair(pointer, null));
-                return pointer;
+              pinnedCount.incrementAndGet();
+
+              val pointer =
+                  new PagedPointer(
+                      memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize),
+                      numElements);
+              pointer.isLeaked();
+
+              pinnedAllocations.add(
+                  new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
+              MemoryTracker.getInstance()
+                  .incrementWorkspaceAllocatedAmount(
+                      Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
+              return pointer;
             }
+          case FAIL:
+          default:
+            {
+              throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
+            }
+        }
+      }
+    } else if (kind == MemoryKind.HOST) {
+      if (hostOffset.get() + requiredMemory <= currentSize.get()
+          && !trimmer
+          && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
 
+        long prevOffset = hostOffset.getAndAdd(requiredMemory);
 
+        val ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
+
+        // && workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY
+        if (initialize) Pointer.memset(ptr, 0, requiredMemory);
+        return ptr;
+      } else {
+        //     log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory,
+        // numElements);
+        if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED
+            && currentSize.get() > 0
+            && !trimmer
+            && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
+          // log.info("End of space reached. Current offset: {}; requiredMemory: {}",
+          // deviceOffset.get(), requiredMemory);
+          hostOffset.set(0);
+          // resetPlanned.set(true);
+          return alloc(requiredMemory, kind, type, initialize);
         }
 
-        boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0 && kind == MemoryKind.DEVICE) || trimmedMode.get();
+        val shape =
+            new AllocationShape(
+                requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
 
-        if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) {
-            trimmedMode.set(true);
-            trimmedStep.set(stepsCount.get());
-        }
+        switch (workspaceConfiguration.getPolicySpill()) {
+          case REALLOCATE:
+          case EXTERNAL:
+            if (!trimmer) {
+              // memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
+              // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape,
+              // null, AllocationStatus.DEVICE).getDevicePointer()
+              PagedPointer pointer =
+                  new PagedPointer(
+                      memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize),
+                      numElements);
 
-        if (kind == MemoryKind.DEVICE) {
-            if (deviceOffset.get() + requiredMemory <= currentSize.get() && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
-                cycleAllocations.addAndGet(requiredMemory);
-                long prevOffset = deviceOffset.getAndAdd(requiredMemory);
-
-                if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
-                    return null;
-
-                val ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
-
-                if (isDebug.get())
-                    log.info("Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements, prevOffset, deviceOffset.get(), currentSize.get(), ptr.address());
-
-                if (initialize) {
-                    val context = AtomicAllocator.getInstance().getDeviceContext();
-
-                    int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
-                    if (ret == 0)
-                        throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
-
-                    context.syncSpecialStream();
-                }
-
-                return ptr;
+              externalAllocations.add(new PointersPair(pointer, null));
+              return pointer;
             } else {
+              // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape,
+              // null, AllocationStatus.DEVICE).getDevicePointer()
+              PagedPointer pointer =
+                  new PagedPointer(
+                      memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize),
+                      numElements);
+              pointer.isLeaked();
 
-                // spill
-                if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
-                    //log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
-                    deviceOffset.set(0);
-                    resetPlanned.set(true);
-                    return alloc(requiredMemory, kind, type, initialize);
-                }
-
-                if (!trimmer)
-                    spilledAllocationsSize.addAndGet(requiredMemory);
-                else
-                    pinnedAllocationsSize.addAndGet(requiredMemory);
-
-                if (isDebug.get()) {
-                    log.info("Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements);
-                }
-
-                val shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
-
-                cycleAllocations.addAndGet(requiredMemory);
-
-                if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
-                    return null;
-
-                switch (workspaceConfiguration.getPolicySpill()) {
-                    case REALLOCATE:
-                    case EXTERNAL:
-                        if (!trimmer) {
-                            externalCount.incrementAndGet();
-                            //
-                            //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
-                            val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
-                            pointer.isLeaked();
-
-                            val pp = new PointersPair(null, pointer);
-                            pp.setRequiredMemory(requiredMemory);
-                            externalAllocations.add(pp);
-
-                            MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
-                            return pointer;
-                        } else {
-                            pinnedCount.incrementAndGet();
-
-                            val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
-                            pointer.isLeaked();
-
-                            pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
-                            MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
-                            return pointer;
-                        }
-                    case FAIL:
-                    default: {
-                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
-                    }
-                }
+              pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
+              return pointer;
             }
-        } else if (kind == MemoryKind.HOST) {
-            if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
-
-                long prevOffset = hostOffset.getAndAdd(requiredMemory);
-
-                val ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
-
-                // && workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY
-                if (initialize)
-                    Pointer.memset(ptr, 0, requiredMemory);
-                return ptr;
-            } else {
-           //     log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory, numElements);
-                if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
-                    //log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
-                    hostOffset.set(0);
-                    //resetPlanned.set(true);
-                    return alloc(requiredMemory, kind, type, initialize);
-                }
-
-                val shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
-
-                switch (workspaceConfiguration.getPolicySpill()) {
-                    case REALLOCATE:
-                    case EXTERNAL:
-                        if (!trimmer) {
-                            //memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
-                            //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
-                            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
-
-                            externalAllocations.add(new PointersPair(pointer, null));
-                            return pointer;
-                        } else {
-                            //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
-                            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
-                            pointer.isLeaked();
-
-                            pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
-                            return pointer;
-                        }
-                    case FAIL:
-                    default: {
-                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
-                    }
-                }
-            }
-        } else throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);
-
-        //throw new ND4JIllegalStateException("Shouldn't ever reach this line");
-    }
-
-    @Override
-    protected void clearPinnedAllocations(boolean extended) {
-        if (isDebug.get())
-            log.info("Workspace [{}] device_{} threadId {} cycle {}: clearing pinned allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), cyclesCount.get());
-
-        while (!pinnedAllocations.isEmpty()) {
-            val pair = pinnedAllocations.peek();
-            if (pair == null)
-                throw new RuntimeException();
-
-            long stepNumber = pair.getAllocationCycle();
-            long stepCurrent = stepsCount.get();
-
-            if (isDebug.get())
-                log.info("Allocation step: {}; Current step: {}", stepNumber, stepCurrent);
-
-            if (stepNumber + 2 < stepCurrent || extended) {
-                pinnedAllocations.remove();
-
-                if (pair.getDevicePointer() != null) {
-                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
-                    MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), pair.getRequiredMemory());
-                    pinnedCount.decrementAndGet();
-
-                    if (isDebug.get())
-                        log.info("deleting external device allocation ");
-                }
-
-                if (pair.getHostPointer() != null) {
-                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());
-
-                    if (isDebug.get())
-                        log.info("deleting external host allocation ");
-                }
-
-                val sizez = pair.getRequiredMemory() * -1;
-                pinnedAllocationsSize.addAndGet(sizez);
-            } else {
-                break;
+          case FAIL:
+          default:
+            {
+              throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
             }
         }
-    }
+      }
+    } else throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);
 
-    @Override
-    protected void clearExternalAllocations() {
-        if (isDebug.get())
-            log.info("Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), guid);
+    // throw new ND4JIllegalStateException("Shouldn't ever reach this line");
+  }
 
-        Nd4j.getExecutioner().commit();
+  @Override
+  protected void clearPinnedAllocations(boolean extended) {
 
-        try {
-            for (PointersPair pair : externalAllocations) {
-                if (pair.getHostPointer() != null) {
-                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());
+    log.debug(
+        "Workspace [{}] device_{} threadId {} cycle {}: clearing pinned allocations...",
+        id,
+        Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+        Thread.currentThread().getId(),
+        cyclesCount.get());
 
-                    if (isDebug.get())
-                        log.info("deleting external host allocation... ");
-                }
+    while (!pinnedAllocations.isEmpty()) {
+      val pair = pinnedAllocations.peek();
+      if (pair == null) throw new RuntimeException();
 
-                if (pair.getDevicePointer() != null) {
-                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
+      long stepNumber = pair.getAllocationCycle();
+      long stepCurrent = stepsCount.get();
 
-                    if (isDebug.get())
-                        log.info("deleting external device allocation... ");
+      log.debug("Allocation step: {}; Current step: {}", stepNumber, stepCurrent);
 
-                    val sizez = pair.getRequiredMemory();
-                    if (sizez != null) {
-                        AllocationsTracker.getInstance().markReleased(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez);
-                        MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez);
-                    }
-                }
-            }
-        } catch (Exception e) {
-            log.error("RC: Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), guid);
-            throw new RuntimeException(e);
+      if (stepNumber + 2 < stepCurrent || extended) {
+        pinnedAllocations.remove();
+
+        if (pair.getDevicePointer() != null) {
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
+          MemoryTracker.getInstance()
+              .decrementWorkspaceAmount(
+                  Nd4j.getAffinityManager().getDeviceForCurrentThread(), pair.getRequiredMemory());
+          pinnedCount.decrementAndGet();
+
+          log.debug("deleting external device allocation ");
         }
 
-        spilledAllocationsSize.set(0);
-        externalCount.set(0);
-        externalAllocations.clear();
-    }
+        if (pair.getHostPointer() != null) {
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());
 
-    @Override
-    protected void resetWorkspace() {
-        if (currentSize.get() < 1) {
+          log.debug("deleting external host allocation ");
         }
 
+        val sizez = pair.getRequiredMemory() * -1;
+        pinnedAllocationsSize.addAndGet(sizez);
+      } else {
+        break;
+      }
+    }
+  }
 
-/*
-        if (Nd4j.getExecutioner() instanceof GridExecutioner)
-            ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
+  @Override
+  protected void clearExternalAllocations() {
 
-        CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
+    log.debug(
+        "Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...",
+        id,
+        Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+        Thread.currentThread().getId(),
+        guid);
 
-        //log.info("workspace: {}, size: {}", workspace.getDevicePointer().address(), currentSize.get());
+    Nd4j.getExecutioner().commit();
 
-        NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(workspace.getDevicePointer(), 0, currentSize.get() + SAFETY_OFFSET, 0, context.getSpecialStream());
+    try {
+      for (PointersPair pair : externalAllocations) {
+        if (pair.getHostPointer() != null) {
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());
 
-        Pointer.memset(workspace.getHostPointer(), 0, currentSize.get() + SAFETY_OFFSET);
+          log.debug("deleting external host allocation... ");
+        }
 
-        context.getSpecialStream().synchronize();
-        */
+        if (pair.getDevicePointer() != null) {
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
+
+          log.debug("deleting external device allocation... ");
+
+          val sizez = pair.getRequiredMemory();
+          if (sizez != null) {
+            AllocationsTracker.getInstance()
+                .markReleased(
+                    AllocationKind.GENERAL,
+                    Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+                    sizez);
+            MemoryTracker.getInstance()
+                .decrementWorkspaceAmount(
+                    Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez);
+          }
+        }
+      }
+    } catch (Exception e) {
+      log.error(
+          "RC: Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...",
+          id,
+          Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+          Thread.currentThread().getId(),
+          guid);
+      throw new RuntimeException(e);
     }
 
-    protected PointersPair workspace() {
-        return workspace;
-    }
+    spilledAllocationsSize.set(0);
+    externalCount.set(0);
+    externalAllocations.clear();
+  }
 
-    protected Queue<PointersPair> pinnedPointers() {
-        return pinnedAllocations;
-    }
+  @Override
+  protected void resetWorkspace() {
+    if (currentSize.get() < 1) {}
 
-    protected List<PointersPair> externalPointers() {
-        return externalAllocations;
-    }
+    /*
+    if (Nd4j.getExecutioner() instanceof GridExecutioner)
+        ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
 
-    @Override
-    public Deallocator deallocator() {
-        return new CudaWorkspaceDeallocator(this);
-    }
+    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
 
-    @Override
-    public String getUniqueId() {
-        return "Workspace_" + getId() + "_" + Nd4j.getDeallocatorService().nextValue();
-    }
+    //log.info("workspace: {}, size: {}", workspace.getDevicePointer().address(), currentSize.get());
 
-    @Override
-    public int targetDevice() {
-        return deviceId;
-    }
+    NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(workspace.getDevicePointer(), 0, currentSize.get() + SAFETY_OFFSET, 0, context.getSpecialStream());
 
-    @Override
-    public long getPrimaryOffset() {
-        return getDeviceOffset();
-    }
+    Pointer.memset(workspace.getHostPointer(), 0, currentSize.get() + SAFETY_OFFSET);
+
+    context.getSpecialStream().synchronize();
+    */
+  }
+
+  protected PointersPair workspace() {
+    return workspace;
+  }
+
+  protected Queue<PointersPair> pinnedPointers() {
+    return pinnedAllocations;
+  }
+
+  protected List<PointersPair> externalPointers() {
+    return externalAllocations;
+  }
+
+  @Override
+  public Deallocator deallocator() {
+    return new CudaWorkspaceDeallocator(this);
+  }
+
+  @Override
+  public String getUniqueId() {
+    return "Workspace_" + getId() + "_" + Nd4j.getDeallocatorService().nextValue();
+  }
+
+  @Override
+  public int targetDevice() {
+    return deviceId;
+  }
+
+  @Override
+  public long getPrimaryOffset() {
+    return getDeviceOffset();
+  }
 }
diff --git a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java
index 806986fc7..41b936bf7 100644
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java
@@ -48,7 +48,7 @@ public class CudaWorkspaceDeallocator implements Deallocator {
 
     @Override
     public void deallocate() {
-        log.trace("Deallocating CUDA workspace");
+        log.debug("Deallocating CUDA workspace");
 
         // purging workspace planes
         if (pointersPair != null) {
diff --git a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
index 612dfdda8..5524bddbc 100644
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
@@ -1582,7 +1582,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
         }
 
         if (nativeOps.lastErrorCode() != 0)
-            throw new RuntimeException(nativeOps.lastErrorMessage());
+            throw new RuntimeException(nativeOps.lastErrorMessage() + " error code: " + nativeOps.lastErrorCode());
 
         profilingConfigurableHookOut(op, oc, st);
 
diff --git a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java
index 3ba143e36..52af2eeb5 100644
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java
@@ -56,7 +56,8 @@ public class CudaOpContext extends BaseOpContext implements OpContext, Deallocat
 
     @Override
     public void close() {
-        // no-op
+        nativeOps.ctxPurge(context);
+        context.deallocate();
     }
 
     @Override