Playing with some new code 2 - clean build/test

Signed-off-by: brian <brian@brutex.de>
2023-04-17 09:41:12 +02:00 · 2023-04-17 09:41:12 +02:00 · 82e65bdf59
parent 9d4939ccfd
commit 82e65bdf59
15 changed files with 473 additions and 359 deletions
--- a/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java
+++ b/cavis-dnn/cavis-dnn-api/src/main/java/org/nd4j/linalg/workspace/WorkspaceMgr.java
@ -67,7 +67,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {

    /**
     * Set arrays to be scoped out (not in any workspace) for the specified array type.
-     * This means that create, dup, leverage etc methods will return result arrays that are not attached to any workspace
+     * This means that create, dup, leverage etc. methods will return result arrays that are not attached to any workspace
     *
     * @param arrayType Array type to set scoped out for
     */
@ -120,7 +120,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
    boolean isWorkspaceOpen(T arrayType);

    /**
-     * Assert thath the workspace for the specified array type is open.
+     * Assert that the workspace for the specified array type is open.
     * For array types that are set to scoped out, this will be treated as a no-op
     * @param arrayType Array type to check
     * @param msg       May be null. If non-null: include this in the exception
@ -129,7 +129,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {
    void assertOpen(T arrayType, String msg) throws ND4JWorkspaceException;

    /**
-     * Assert thath the workspace for the specified array type is not open.
+     * Assert that the workspace for the specified array type is not open.
     * For array types that are set to scoped out, this will be treated as a no-op
     * @param arrayType Array type to check
     * @param msg       May be null. If non-null: include this in the exception
@ -193,7 +193,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {

    /**
     * Create an uninitialized array in the specified array type's workspace (or detached if none is specified).
-     * Equivalent to {@link org.nd4j.linalg.factory.Nd4j#createUninitialized(int)} (int...)}, other than the array location
+     * Equivalent to {@link org.nd4j.linalg.factory.Nd4j#createUninitialized(int...)}, other than the array location
     * @param arrayType Array type
     * @param dataType  Data type of the created array
     * @param shape     Shape
@ -231,7 +231,7 @@ public interface WorkspaceMgr<T extends Enum<T>> {

    /**
     * Cast the specified array to the specified datatype.<br>
-     * If the array is already the correct type, the bahaviour depends on the 'dupIfCorrectType' argument.<br>
+     * If the array is already the correct type, the behaviour depends on the 'dupIfCorrectType' argument.<br>
     * dupIfCorrectType = false && toCast.dataType() == dataType: return input array as-is (unless workspace is wrong)<br>
     * dupIfCorrectType = true && toCast.dataType() == dataType: duplicate the array into the specified workspace<br>
     * @param arrayType        Array type
--- a/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java
+++ b/cavis-dnn/cavis-dnn-core/src/test/java/org/deeplearning4j/eval/EvaluationToolsTests.java
@ -81,7 +81,7 @@ public class EvaluationToolsTests extends BaseDL4JTest {


            String str = EvaluationTools.rocChartToHtml(roc);
-            //            System.out.println(str);
+            System.out.println(str);
        }
    }

--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/Cnn3DLossLayer.java
@ -58,6 +58,8 @@ public class Cnn3DLossLayer extends FeedForwardLayer {
                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
        setNetConfiguration(conf);
        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+        runInheritance();
+
        org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer ret =
                        new org.deeplearning4j.nn.layers.convolution.Cnn3DLossLayer(lconf, networkDataType);
        ret.addTrainingListeners(trainingListeners);
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/CnnLossLayer.java
@ -63,6 +63,8 @@ public class CnnLossLayer extends FeedForwardLayer {
                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
        setNetConfiguration(conf);
        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+        runInheritance();
+
        org.deeplearning4j.nn.layers.convolution.CnnLossLayer ret =
                        new org.deeplearning4j.nn.layers.convolution.CnnLossLayer(lconf, networkDataType);
        ret.addTrainingListeners(trainingListeners);
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/GravesLSTM.java
@ -77,7 +77,11 @@ public class GravesLSTM extends AbstractLSTM {
    public Layer instantiate(NeuralNetConfiguration conf, Collection<TrainingListener> trainingListeners,
                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {
        LayerValidation.assertNInNOutSet("GravesLSTM", getLayerName(), layerIndex, getNIn(), getNOut());
+
        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+        lconf.setNetConfiguration(conf);
+        runInheritance();
+
        org.deeplearning4j.nn.layers.recurrent.GravesLSTM ret =
                        new org.deeplearning4j.nn.layers.recurrent.GravesLSTM(lconf, networkDataType);

--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/RnnLossLayer.java
@ -61,6 +61,8 @@ public class RnnLossLayer extends FeedForwardLayer {
                             int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) {

        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+        lconf.setNetConfiguration(conf);
+        runInheritance();

        org.deeplearning4j.nn.layers.recurrent.RnnLossLayer ret =
                        new org.deeplearning4j.nn.layers.recurrent.RnnLossLayer(lconf, networkDataType);
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/conf/layers/SubsamplingLayer.java
@ -135,6 +135,7 @@ public class SubsamplingLayer extends NoParamLayer {
                                                       Collection<TrainingListener> trainingListeners, int layerIndex, INDArray layerParamsView,
                                                       boolean initializeParams, DataType networkDataType) {
        LayerConfiguration lconf = conf.getFlattenedLayerConfigurations().get(layerIndex);
+        runInheritance();

        org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer ret =
                new org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer(lconf, networkDataType);
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/AbstractLayer.java
@ -24,6 +24,8 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
+
 import lombok.*;
 import net.brutex.ai.dnn.api.IModel;
 import org.deeplearning4j.nn.api.ITraininableLayerConfiguration;
@ -328,13 +330,9 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
  @Override
  public void clearNoiseWeightParams() {}

-  public List<String> variables() {
-    return variables;
-  }
-
-  public List<String> variables(boolean copy) {
+  public List<String> getVariables(boolean copy) {
    if (copy) {
-      return variables();
+      return new ArrayList<>(getVariables());
    }
    return variables;
  }
@ -585,7 +583,7 @@ public abstract class AbstractLayer<LayerConf_T extends LayerConfiguration> impl
   */
  @Override
  public INDArray getParams() {
-    // throw new RuntimeException("Not implemented");
+    //throw new RuntimeException("Not implemented");
    return null;
  }

--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java
@ -184,6 +184,17 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>
    setParams(params, 'f');
  }

+  /**
+   * * The AbstractLayer does not implement Params, ParamTable and GradientView. A RuntimeException
+   * * will be triggered when calling this.
+   *
+   * @return 1d parameter vector
+   */
+  @Override
+  public INDArray getParams() {
+    return paramsFlattened;
+  }
+
  /** */
  @Override
  public void close() {}
@ -358,7 +369,7 @@ public abstract class BaseLayer<LayerConfT extends BaseLayerConfiguration>

  protected void setParams(INDArray params, char order) {
    if (params == null) {
-      log.warn(
+      log.trace(
          "setParams(INDArray params, char order): params is null. Skipping setParams in Layer {}[{}] at index {}",
          getLayerConfiguration().getLayerName(),
          getClass().getSimpleName(),
--- a/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java
+++ b/cavis-dnn/cavis-dnn-nn/src/main/java/org/deeplearning4j/nn/params/DefaultParamInitializer.java
@ -110,14 +110,14 @@ public class DefaultParamInitializer extends AbstractParamInitializer {
        INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams));

        params.put(WEIGHT_KEY, createWeightMatrix(layerConf, weightView, initializeParams));
-        layerConf.getNetConfiguration().addNetWideVariable(WEIGHT_KEY);
+        layerConf.addVariable(WEIGHT_KEY);

        long offset = nWeightParams;
        if(hasBias(layerConf)){
            INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(offset, offset + nOut));
            params.put(BIAS_KEY, createBias(layerConf, biasView, initializeParams));
-            layerConf.getNetConfiguration().addNetWideVariable(BIAS_KEY);
+            layerConf.addVariable(BIAS_KEY);
            offset += nOut;
        }

@ -125,7 +125,7 @@ public class DefaultParamInitializer extends AbstractParamInitializer {
            INDArray gainView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(offset, offset + nOut));
            params.put(GAIN_KEY, createGain(conf, gainView, initializeParams));
-            conf.getNetConfiguration().addNetWideVariable(GAIN_KEY);
+            conf.addVariable(GAIN_KEY);
        }

        return params;
--- a/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java
+++ b/cavis-native/cavis-native-cpu/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java
@ -50,7 +50,8 @@ public class CpuOpContext extends BaseOpContext implements OpContext, Deallocata

    @Override
    public void close() {
-        // no-op
+        nativeOps.ctxPurge(context);
+        context.deallocate();
    }

    @Override
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspace.java
@ -20,6 +20,8 @@

 package org.nd4j.jita.workspace;

+import java.util.List;
+import java.util.Queue;
 import lombok.NonNull;
 import lombok.extern.slf4j.Slf4j;
 import lombok.val;
@ -39,10 +41,6 @@ import org.nd4j.linalg.exception.ND4JIllegalStateException;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.nativeblas.NativeOpsHolder;

-import java.util.List;
-import java.util.Queue;
-
-
 /**
 * CUDA-aware MemoryWorkspace implementation
 *
@ -51,395 +49,489 @@ import java.util.Queue;
@Slf4j
 public class CudaWorkspace extends Nd4jWorkspace {

+  public CudaWorkspace(@NonNull WorkspaceConfiguration configuration) {
+    super(configuration);
+  }

-    public CudaWorkspace(@NonNull WorkspaceConfiguration configuration) {
-        super(configuration);
+  public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId) {
+    super(configuration, workspaceId);
+  }
+
+  public CudaWorkspace(
+      @NonNull WorkspaceConfiguration configuration,
+      @NonNull String workspaceId,
+      Integer deviceId) {
+    super(configuration, workspaceId);
+    this.deviceId = deviceId;
+  }
+
+  @Override
+  protected void init() {
+    if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) {
+      throw new ND4JIllegalStateException("CUDA do not support MMAP workspaces yet");
    }

-    public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId) {
-        super(configuration, workspaceId);
+    super.init();
+
+    if (currentSize.get() > 0) {
+      log.debug("Allocating {} bytes at DEVICE & HOST space...", currentSize.get());
+      isInit.set(true);
+
+      long bytes = currentSize.get();
+
+      log.debug(
+          "Allocating [{}] workspace on device_{}, {} bytes...",
+          id,
+          Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+          bytes);
+
+      if (isDebug.get()) {
+        Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
+      }
+
+      Pointer ptr = memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.HOST, false);
+      if (ptr == null) throw new ND4JIllegalStateException("Can't allocate memory for workspace");
+
+      workspace.setHostPointer(new PagedPointer(ptr));
+
+      if (workspaceConfiguration.getPolicyMirroring() != MirroringPolicy.HOST_ONLY) {
+        workspace.setDevicePointer(
+            new PagedPointer(
+                memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.DEVICE, false)));
+        AllocationsTracker.getInstance()
+            .markAllocated(
+                AllocationKind.GENERAL,
+                Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+                bytes + SAFETY_OFFSET);
+
+        MemoryTracker.getInstance()
+            .incrementWorkspaceAllocatedAmount(
+                Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET);
+
+        // if base pointer isn't aligned to 16 bytes (128 bits) - adjust the offfset then
+        val addr = workspace.getDevicePointer().address();
+        val div = addr % alignmentBase;
+        if (div != 0) {
+          deviceOffset.set(alignmentBase - div);
+          hostOffset.set(alignmentBase - div);
+        }
+      }
+    }
+  }
+
+  @Override
+  public PagedPointer alloc(long requiredMemory, DataType type, boolean initialize) {
+    return this.alloc(requiredMemory, MemoryKind.DEVICE, type, initialize);
+  }
+
+  @Override
+  public synchronized void destroyWorkspace(boolean extended) {
+    val size = currentSize.getAndSet(0);
+    reset();
+
+    if (extended) clearExternalAllocations();
+
+    clearPinnedAllocations(extended);
+
+    if (workspace.getHostPointer() != null)
+      NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(workspace.getHostPointer());
+
+    if (workspace.getDevicePointer() != null) {
+      NativeOpsHolder.getInstance()
+          .getDeviceNativeOps()
+          .freeDevice(workspace.getDevicePointer(), 0);
+      AllocationsTracker.getInstance()
+          .markReleased(
+              AllocationKind.GENERAL,
+              Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+              size + SAFETY_OFFSET);
+
+      MemoryTracker.getInstance()
+          .decrementWorkspaceAmount(
+              Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET);
    }

-    public CudaWorkspace(@NonNull WorkspaceConfiguration configuration, @NonNull String workspaceId, Integer deviceId) {
-        super(configuration, workspaceId);
-        this.deviceId = deviceId;
+    workspace.setDevicePointer(null);
+    workspace.setHostPointer(null);
+  }
+
+  @Override
+  public PagedPointer alloc(
+      long requiredMemory, MemoryKind kind, DataType type, boolean initialize) {
+    long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
+
+    // alignment
+    if (requiredMemory % alignmentBase != 0)
+      requiredMemory += alignmentBase - (requiredMemory % alignmentBase);
+
+    if (!isUsed.get()) {
+      if (disabledCounter.incrementAndGet() % 10 == 0)
+        log.warn(
+            "Workspace was turned off, and wasn't enabled after {} allocations",
+            disabledCounter.get());
+
+      if (kind == MemoryKind.DEVICE) {
+        val pointer =
+            new PagedPointer(
+                memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
+        externalAllocations.add(new PointersPair(null, pointer));
+        MemoryTracker.getInstance()
+            .incrementWorkspaceAllocatedAmount(
+                Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
+        return pointer;
+      } else {
+        val pointer =
+            new PagedPointer(
+                memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
+        externalAllocations.add(new PointersPair(pointer, null));
+        return pointer;
+      }
    }

-    @Override
-    protected void init() {
-        if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) {
-            throw new ND4JIllegalStateException("CUDA do not support MMAP workspaces yet");
+    boolean trimmer =
+        (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED
+                && requiredMemory + cycleAllocations.get() > initialBlockSize.get()
+                && initialBlockSize.get() > 0
+                && kind == MemoryKind.DEVICE)
+            || trimmedMode.get();
+
+    if (trimmer
+        && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE
+        && !trimmedMode.get()) {
+      trimmedMode.set(true);
+      trimmedStep.set(stepsCount.get());
+    }
+
+    if (kind == MemoryKind.DEVICE) {
+      if (deviceOffset.get() + requiredMemory <= currentSize.get()
+          && !trimmer
+          && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
+        cycleAllocations.addAndGet(requiredMemory);
+        long prevOffset = deviceOffset.getAndAdd(requiredMemory);
+
+        if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY) return null;
+
+        val ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
+
+        log.debug(
+            "Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}",
+            id,
+            Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+            requiredMemory,
+            numElements,
+            prevOffset,
+            deviceOffset.get(),
+            currentSize.get(),
+            ptr.address());
+
+        if (initialize) {
+          val context = AtomicAllocator.getInstance().getDeviceContext();
+
+          int ret =
+              NativeOpsHolder.getInstance()
+                  .getDeviceNativeOps()
+                  .memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
+          if (ret == 0)
+            throw new ND4JIllegalStateException(
+                "memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
+
+          context.syncSpecialStream();
        }

-        super.init();
+        return ptr;
+      } else {

-        if (currentSize.get() > 0) {
-            //log.info("Allocating {} bytes at DEVICE & HOST space...", currentSize.get());
-            isInit.set(true);
-
-            long bytes = currentSize.get();
-
-            if (isDebug.get())
-                log.info("Allocating [{}] workspace on device_{}, {} bytes...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes);
-
-            if (isDebug.get()) {
-                Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
-            }
-
-            Pointer ptr = memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.HOST, false);
-            if (ptr == null)
-                throw new ND4JIllegalStateException("Can't allocate memory for workspace");
-
-            workspace.setHostPointer(new PagedPointer(ptr));
-
-            if (workspaceConfiguration.getPolicyMirroring() != MirroringPolicy.HOST_ONLY) {
-                workspace.setDevicePointer(new PagedPointer(memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.DEVICE, false)));
-                AllocationsTracker.getInstance().markAllocated(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET);
-
-                MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes + SAFETY_OFFSET);
-
-                // if base pointer isn't aligned to 16 bytes (128 bits) - adjust the offfset then
-                val addr = workspace.getDevicePointer().address();
-                val div = addr % alignmentBase;
-                if (div != 0) {
-                    deviceOffset.set(alignmentBase - div);
-                    hostOffset.set(alignmentBase - div);
-                }
-            }
-        }
-    }
-
-    @Override
-    public PagedPointer alloc(long requiredMemory, DataType type, boolean initialize) {
-	    return this.alloc(requiredMemory, MemoryKind.DEVICE, type, initialize);
-    }
-
-
-    @Override
-    public synchronized void destroyWorkspace(boolean extended) {
-        val size = currentSize.getAndSet(0);
-        reset();
-
-        if (extended)
-            clearExternalAllocations();
-
-        clearPinnedAllocations(extended);
-
-        if (workspace.getHostPointer() != null)
-            NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(workspace.getHostPointer());
-
-        if (workspace.getDevicePointer() != null) {
-            NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(workspace.getDevicePointer(), 0);
-            AllocationsTracker.getInstance().markReleased(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET);
-
-            MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), size + SAFETY_OFFSET);
+        // spill
+        if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED
+            && currentSize.get() > 0
+            && !trimmer
+            && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
+          // log.info("End of space reached. Current offset: {}; requiredMemory: {}",
+          // deviceOffset.get(), requiredMemory);
+          deviceOffset.set(0);
+          resetPlanned.set(true);
+          return alloc(requiredMemory, kind, type, initialize);
        }

-        workspace.setDevicePointer(null);
-        workspace.setHostPointer(null);
+        if (!trimmer) spilledAllocationsSize.addAndGet(requiredMemory);
+        else pinnedAllocationsSize.addAndGet(requiredMemory);

-    }
+        log.debug(
+            "Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements",
+            id,
+            Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+            requiredMemory,
+            numElements);

+        val shape =
+            new AllocationShape(
+                requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);

-    @Override
-    public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataType type, boolean initialize) {
-        long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
+        cycleAllocations.addAndGet(requiredMemory);

-        // alignment
-        if (requiredMemory % alignmentBase != 0)
-            requiredMemory += alignmentBase - (requiredMemory % alignmentBase);
+        if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY) return null;

-        if (!isUsed.get()) {
-            if (disabledCounter.incrementAndGet() % 10 == 0)
-                log.warn("Worskpace was turned off, and wasn't enabled after {} allocations", disabledCounter.get());
+        switch (workspaceConfiguration.getPolicySpill()) {
+          case REALLOCATE:
+          case EXTERNAL:
+            if (!trimmer) {
+              externalCount.incrementAndGet();
+              //
+              // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape,
+              // null, AllocationStatus.DEVICE).getDevicePointer()
+              val pointer =
+                  new PagedPointer(
+                      memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize),
+                      numElements);
+              pointer.isLeaked();

-            if (kind == MemoryKind.DEVICE) {
-                val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
-                externalAllocations.add(new PointersPair(null, pointer));
-                MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
-                return pointer;
+              val pp = new PointersPair(null, pointer);
+              pp.setRequiredMemory(requiredMemory);
+              externalAllocations.add(pp);
+
+              MemoryTracker.getInstance()
+                  .incrementWorkspaceAllocatedAmount(
+                      Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
+              return pointer;
            } else {
-                val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
-                externalAllocations.add(new PointersPair(pointer, null));
-                return pointer;
+              pinnedCount.incrementAndGet();
+
+              val pointer =
+                  new PagedPointer(
+                      memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize),
+                      numElements);
+              pointer.isLeaked();
+
+              pinnedAllocations.add(
+                  new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
+              MemoryTracker.getInstance()
+                  .incrementWorkspaceAllocatedAmount(
+                      Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
+              return pointer;
            }
+          case FAIL:
+          default:
+            {
+              throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
+            }
+        }
+      }
+    } else if (kind == MemoryKind.HOST) {
+      if (hostOffset.get() + requiredMemory <= currentSize.get()
+          && !trimmer
+          && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {

+        long prevOffset = hostOffset.getAndAdd(requiredMemory);

+        val ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
+
+        // && workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY
+        if (initialize) Pointer.memset(ptr, 0, requiredMemory);
+        return ptr;
+      } else {
+        //     log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory,
+        // numElements);
+        if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED
+            && currentSize.get() > 0
+            && !trimmer
+            && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
+          // log.info("End of space reached. Current offset: {}; requiredMemory: {}",
+          // deviceOffset.get(), requiredMemory);
+          hostOffset.set(0);
+          // resetPlanned.set(true);
+          return alloc(requiredMemory, kind, type, initialize);
        }

-        boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0 && kind == MemoryKind.DEVICE) || trimmedMode.get();
+        val shape =
+            new AllocationShape(
+                requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);

-        if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) {
-            trimmedMode.set(true);
-            trimmedStep.set(stepsCount.get());
-        }
+        switch (workspaceConfiguration.getPolicySpill()) {
+          case REALLOCATE:
+          case EXTERNAL:
+            if (!trimmer) {
+              // memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
+              // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape,
+              // null, AllocationStatus.DEVICE).getDevicePointer()
+              PagedPointer pointer =
+                  new PagedPointer(
+                      memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize),
+                      numElements);

-        if (kind == MemoryKind.DEVICE) {
-            if (deviceOffset.get() + requiredMemory <= currentSize.get() && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
-                cycleAllocations.addAndGet(requiredMemory);
-                long prevOffset = deviceOffset.getAndAdd(requiredMemory);
-
-                if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
-                    return null;
-
-                val ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
-
-                if (isDebug.get())
-                    log.info("Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements, prevOffset, deviceOffset.get(), currentSize.get(), ptr.address());
-
-                if (initialize) {
-                    val context = AtomicAllocator.getInstance().getDeviceContext();
-
-                    int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
-                    if (ret == 0)
-                        throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
-
-                    context.syncSpecialStream();
-                }
-
-                return ptr;
+              externalAllocations.add(new PointersPair(pointer, null));
+              return pointer;
            } else {
+              // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape,
+              // null, AllocationStatus.DEVICE).getDevicePointer()
+              PagedPointer pointer =
+                  new PagedPointer(
+                      memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize),
+                      numElements);
+              pointer.isLeaked();

-                // spill
-                if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
-                    //log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
-                    deviceOffset.set(0);
-                    resetPlanned.set(true);
-                    return alloc(requiredMemory, kind, type, initialize);
-                }
-
-                if (!trimmer)
-                    spilledAllocationsSize.addAndGet(requiredMemory);
-                else
-                    pinnedAllocationsSize.addAndGet(requiredMemory);
-
-                if (isDebug.get()) {
-                    log.info("Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements);
-                }
-
-                val shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
-
-                cycleAllocations.addAndGet(requiredMemory);
-
-                if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
-                    return null;
-
-                switch (workspaceConfiguration.getPolicySpill()) {
-                    case REALLOCATE:
-                    case EXTERNAL:
-                        if (!trimmer) {
-                            externalCount.incrementAndGet();
-                            //
-                            //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
-                            val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
-                            pointer.isLeaked();
-
-                            val pp = new PointersPair(null, pointer);
-                            pp.setRequiredMemory(requiredMemory);
-                            externalAllocations.add(pp);
-
-                            MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
-                            return pointer;
-                        } else {
-                            pinnedCount.incrementAndGet();
-
-                            val pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
-                            pointer.isLeaked();
-
-                            pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
-                            MemoryTracker.getInstance().incrementWorkspaceAllocatedAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory);
-                            return pointer;
-                        }
-                    case FAIL:
-                    default: {
-                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
-                    }
-                }
+              pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
+              return pointer;
            }
-        } else if (kind == MemoryKind.HOST) {
-            if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
-
-                long prevOffset = hostOffset.getAndAdd(requiredMemory);
-
-                val ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
-
-                // && workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY
-                if (initialize)
-                    Pointer.memset(ptr, 0, requiredMemory);
-                return ptr;
-            } else {
-           //     log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory, numElements);
-                if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer && Nd4j.getWorkspaceManager().getDebugMode() != DebugMode.SPILL_EVERYTHING) {
-                    //log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
-                    hostOffset.set(0);
-                    //resetPlanned.set(true);
-                    return alloc(requiredMemory, kind, type, initialize);
-                }
-
-                val shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
-
-                switch (workspaceConfiguration.getPolicySpill()) {
-                    case REALLOCATE:
-                    case EXTERNAL:
-                        if (!trimmer) {
-                            //memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
-                            //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
-                            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
-
-                            externalAllocations.add(new PointersPair(pointer, null));
-                            return pointer;
-                        } else {
-                            //AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
-                            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
-                            pointer.isLeaked();
-
-                            pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
-                            return pointer;
-                        }
-                    case FAIL:
-                    default: {
-                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
-                    }
-                }
-            }
-        } else throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);
-
-        //throw new ND4JIllegalStateException("Shouldn't ever reach this line");
-    }
-
-    @Override
-    protected void clearPinnedAllocations(boolean extended) {
-        if (isDebug.get())
-            log.info("Workspace [{}] device_{} threadId {} cycle {}: clearing pinned allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), cyclesCount.get());
-
-        while (!pinnedAllocations.isEmpty()) {
-            val pair = pinnedAllocations.peek();
-            if (pair == null)
-                throw new RuntimeException();
-
-            long stepNumber = pair.getAllocationCycle();
-            long stepCurrent = stepsCount.get();
-
-            if (isDebug.get())
-                log.info("Allocation step: {}; Current step: {}", stepNumber, stepCurrent);
-
-            if (stepNumber + 2 < stepCurrent || extended) {
-                pinnedAllocations.remove();
-
-                if (pair.getDevicePointer() != null) {
-                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
-                    MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), pair.getRequiredMemory());
-                    pinnedCount.decrementAndGet();
-
-                    if (isDebug.get())
-                        log.info("deleting external device allocation ");
-                }
-
-                if (pair.getHostPointer() != null) {
-                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());
-
-                    if (isDebug.get())
-                        log.info("deleting external host allocation ");
-                }
-
-                val sizez = pair.getRequiredMemory() * -1;
-                pinnedAllocationsSize.addAndGet(sizez);
-            } else {
-                break;
+          case FAIL:
+          default:
+            {
+              throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
            }
        }
-    }
+      }
+    } else throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);

-    @Override
-    protected void clearExternalAllocations() {
-        if (isDebug.get())
-            log.info("Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), guid);
+    // throw new ND4JIllegalStateException("Shouldn't ever reach this line");
+  }

-        Nd4j.getExecutioner().commit();
+  @Override
+  protected void clearPinnedAllocations(boolean extended) {

-        try {
-            for (PointersPair pair : externalAllocations) {
-                if (pair.getHostPointer() != null) {
-                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());
+    log.debug(
+        "Workspace [{}] device_{} threadId {} cycle {}: clearing pinned allocations...",
+        id,
+        Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+        Thread.currentThread().getId(),
+        cyclesCount.get());

-                    if (isDebug.get())
-                        log.info("deleting external host allocation... ");
-                }
+    while (!pinnedAllocations.isEmpty()) {
+      val pair = pinnedAllocations.peek();
+      if (pair == null) throw new RuntimeException();

-                if (pair.getDevicePointer() != null) {
-                    NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
+      long stepNumber = pair.getAllocationCycle();
+      long stepCurrent = stepsCount.get();

-                    if (isDebug.get())
-                        log.info("deleting external device allocation... ");
+      log.debug("Allocation step: {}; Current step: {}", stepNumber, stepCurrent);

-                    val sizez = pair.getRequiredMemory();
-                    if (sizez != null) {
-                        AllocationsTracker.getInstance().markReleased(AllocationKind.GENERAL, Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez);
-                        MemoryTracker.getInstance().decrementWorkspaceAmount(Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez);
-                    }
-                }
-            }
-        } catch (Exception e) {
-            log.error("RC: Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), Thread.currentThread().getId(), guid);
-            throw new RuntimeException(e);
+      if (stepNumber + 2 < stepCurrent || extended) {
+        pinnedAllocations.remove();
+
+        if (pair.getDevicePointer() != null) {
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
+          MemoryTracker.getInstance()
+              .decrementWorkspaceAmount(
+                  Nd4j.getAffinityManager().getDeviceForCurrentThread(), pair.getRequiredMemory());
+          pinnedCount.decrementAndGet();
+
+          log.debug("deleting external device allocation ");
        }

-        spilledAllocationsSize.set(0);
-        externalCount.set(0);
-        externalAllocations.clear();
-    }
+        if (pair.getHostPointer() != null) {
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());

-    @Override
-    protected void resetWorkspace() {
-        if (currentSize.get() < 1) {
+          log.debug("deleting external host allocation ");
        }

+        val sizez = pair.getRequiredMemory() * -1;
+        pinnedAllocationsSize.addAndGet(sizez);
+      } else {
+        break;
+      }
+    }
+  }

-/*
-        if (Nd4j.getExecutioner() instanceof GridExecutioner)
-            ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
+  @Override
+  protected void clearExternalAllocations() {

-        CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
+    log.debug(
+        "Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...",
+        id,
+        Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+        Thread.currentThread().getId(),
+        guid);

-        //log.info("workspace: {}, size: {}", workspace.getDevicePointer().address(), currentSize.get());
+    Nd4j.getExecutioner().commit();

-        NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(workspace.getDevicePointer(), 0, currentSize.get() + SAFETY_OFFSET, 0, context.getSpecialStream());
+    try {
+      for (PointersPair pair : externalAllocations) {
+        if (pair.getHostPointer() != null) {
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeHost(pair.getHostPointer());

-        Pointer.memset(workspace.getHostPointer(), 0, currentSize.get() + SAFETY_OFFSET);
+          log.debug("deleting external host allocation... ");
+        }

-        context.getSpecialStream().synchronize();
-        */
+        if (pair.getDevicePointer() != null) {
+          NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(pair.getDevicePointer(), 0);
+
+          log.debug("deleting external device allocation... ");
+
+          val sizez = pair.getRequiredMemory();
+          if (sizez != null) {
+            AllocationsTracker.getInstance()
+                .markReleased(
+                    AllocationKind.GENERAL,
+                    Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+                    sizez);
+            MemoryTracker.getInstance()
+                .decrementWorkspaceAmount(
+                    Nd4j.getAffinityManager().getDeviceForCurrentThread(), sizez);
+          }
+        }
+      }
+    } catch (Exception e) {
+      log.error(
+          "RC: Workspace [{}] device_{} threadId {} guid [{}]: clearing external allocations...",
+          id,
+          Nd4j.getAffinityManager().getDeviceForCurrentThread(),
+          Thread.currentThread().getId(),
+          guid);
+      throw new RuntimeException(e);
    }

-    protected PointersPair workspace() {
-        return workspace;
-    }
+    spilledAllocationsSize.set(0);
+    externalCount.set(0);
+    externalAllocations.clear();
+  }

-    protected Queue<PointersPair> pinnedPointers() {
-        return pinnedAllocations;
-    }
+  @Override
+  protected void resetWorkspace() {
+    if (currentSize.get() < 1) {}

-    protected List<PointersPair> externalPointers() {
-        return externalAllocations;
-    }
+    /*
+    if (Nd4j.getExecutioner() instanceof GridExecutioner)
+        ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();

-    @Override
-    public Deallocator deallocator() {
-        return new CudaWorkspaceDeallocator(this);
-    }
+    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();

-    @Override
-    public String getUniqueId() {
-        return "Workspace_" + getId() + "_" + Nd4j.getDeallocatorService().nextValue();
-    }
+    //log.info("workspace: {}, size: {}", workspace.getDevicePointer().address(), currentSize.get());

-    @Override
-    public int targetDevice() {
-        return deviceId;
-    }
+    NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(workspace.getDevicePointer(), 0, currentSize.get() + SAFETY_OFFSET, 0, context.getSpecialStream());

-    @Override
-    public long getPrimaryOffset() {
-        return getDeviceOffset();
-    }
+    Pointer.memset(workspace.getHostPointer(), 0, currentSize.get() + SAFETY_OFFSET);
+
+    context.getSpecialStream().synchronize();
+    */
+  }
+
+  protected PointersPair workspace() {
+    return workspace;
+  }
+
+  protected Queue<PointersPair> pinnedPointers() {
+    return pinnedAllocations;
+  }
+
+  protected List<PointersPair> externalPointers() {
+    return externalAllocations;
+  }
+
+  @Override
+  public Deallocator deallocator() {
+    return new CudaWorkspaceDeallocator(this);
+  }
+
+  @Override
+  public String getUniqueId() {
+    return "Workspace_" + getId() + "_" + Nd4j.getDeallocatorService().nextValue();
+  }
+
+  @Override
+  public int targetDevice() {
+    return deviceId;
+  }
+
+  @Override
+  public long getPrimaryOffset() {
+    return getDeviceOffset();
+  }
 }
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/jita/workspace/CudaWorkspaceDeallocator.java
@ -48,7 +48,7 @@ public class CudaWorkspaceDeallocator implements Deallocator {

    @Override
    public void deallocate() {
-        log.trace("Deallocating CUDA workspace");
+        log.debug("Deallocating CUDA workspace");

        // purging workspace planes
        if (pointersPair != null) {
--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
@ -1582,7 +1582,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
        }

        if (nativeOps.lastErrorCode() != 0)
-            throw new RuntimeException(nativeOps.lastErrorMessage());
+            throw new RuntimeException(nativeOps.lastErrorMessage() + " error code: " + nativeOps.lastErrorCode());

        profilingConfigurableHookOut(op, oc, st);

--- a/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java
+++ b/cavis-native/cavis-native-jcublas/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java
@ -56,7 +56,8 @@ public class CudaOpContext extends BaseOpContext implements OpContext, Deallocat

    @Override
    public void close() {
-        // no-op
+        nativeOps.ctxPurge(context);
+        context.deallocate();
    }

    @Override