From f8364997c0d736a35e884d609e254e05c78b1b2c Mon Sep 17 00:00:00 2001 From: raver119 Date: Sat, 24 Aug 2019 09:20:57 +0300 Subject: [PATCH 01/56] [WIP] maxpool2d_bp fix (#160) * one test for maxpool2d_bp Signed-off-by: raver119 * - maxpool2d_bp cuda fix for NaNs - streamSync after each custom op execution Signed-off-by: raver119 --- libnd4j/blas/cuda/NativeOps.cu | 74 +++++++++---------- .../declarable/helpers/cuda/convolutions.cu | 2 + .../nd4j/linalg/custom/CustomOpsTests.java | 16 ++++ 3 files changed, 54 insertions(+), 38 deletions(-) diff --git a/libnd4j/blas/cuda/NativeOps.cu b/libnd4j/blas/cuda/NativeOps.cu index 5c6dadbaf..e75aa422c 100755 --- a/libnd4j/blas/cuda/NativeOps.cu +++ b/libnd4j/blas/cuda/NativeOps.cu @@ -866,9 +866,10 @@ void initializeFunctions(Nd4jPointer *functions) { Nd4jPointer mallocHost(Nd4jLong memorySize, int flags) { Nd4jPointer pointer; // cudaHostAllocMapped |cudaHostAllocPortable - cudaError_t res = cudaHostAlloc(reinterpret_cast(&pointer), memorySize, cudaHostAllocDefault); + auto res = cudaHostAlloc(reinterpret_cast(&pointer), memorySize, cudaHostAllocDefault); if (res != 0) - pointer = 0L; + throw nd4j::cuda_exception::build("cudaHostAlloc(...) failed", res); + return pointer; } @@ -884,7 +885,7 @@ Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags) { Nd4jPointer pointer; auto res = cudaMalloc(reinterpret_cast(&pointer), memorySize); if (res != 0) - pointer = 0L; + throw nd4j::cuda_exception::build("cudaMalloc(...) failed", res); return pointer; } @@ -894,9 +895,9 @@ Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags) { * @param pointer pointer that'll be freed */ int freeHost(Nd4jPointer pointer) { - cudaError_t res = cudaFreeHost(reinterpret_cast(pointer)); + auto res = cudaFreeHost(reinterpret_cast(pointer)); if (res != 0) - pointer = 0L; + throw nd4j::cuda_exception::build("cudaFreeHost(...) failed", res); return 1L; } @@ -907,9 +908,10 @@ int freeHost(Nd4jPointer pointer) { * @param ptrToDeviceId pointer to deviceId. */ int freeDevice(Nd4jPointer pointer, int deviceId) { - cudaError_t res = cudaFree(reinterpret_cast(pointer)); + auto res = cudaFree(reinterpret_cast(pointer)); if (res != 0) - pointer = 0L; + throw nd4j::cuda_exception::build("cudaFree(...) failed", res); + return 1L; } @@ -934,7 +936,7 @@ Nd4jPointer createStream() { auto stream = new cudaStream_t(); auto dZ = cudaStreamCreate(stream); if (dZ != 0) - throw std::runtime_error("cudaStreamCreate(...) failed"); + throw nd4j::cuda_exception::build("cudaStreamCreate(...) failed", dZ); return stream; } @@ -944,23 +946,21 @@ Nd4jPointer createEvent() { CHECK_ALLOC(nativeEvent, "Failed to allocate new CUDA event buffer", sizeof(cudaEvent_t)); - cudaError_t dZ = cudaEventCreateWithFlags(reinterpret_cast(&nativeEvent), cudaEventDisableTiming); - checkCudaErrors(dZ); + auto dZ = cudaEventCreateWithFlags(reinterpret_cast(&nativeEvent), cudaEventDisableTiming); if (dZ != 0) - throw std::runtime_error("cudaEventCreateWithFlags(...) failed"); + throw nd4j::cuda_exception::build("cudaEventCreateWithFlags(...) failed", dZ); return nativeEvent; } int registerEvent(Nd4jPointer event, Nd4jPointer stream) { - cudaEvent_t *pEvent = reinterpret_cast(&event); - cudaStream_t *pStream = reinterpret_cast(stream); + auto pEvent = reinterpret_cast(&event); + auto pStream = reinterpret_cast(stream); - cudaError_t dZ = cudaEventRecord(*pEvent, *pStream); - checkCudaErrors(dZ); + auto dZ = cudaEventRecord(*pEvent, *pStream); if (dZ != 0) - throw std::runtime_error("cudaEventRecord(...) failed"); + throw nd4j::cuda_exception::build("cudaEventRecord(...) failed", dZ); return 1; } @@ -1065,53 +1065,48 @@ int memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4j } int memsetSync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { - cudaError_t dZ = cudaMemset(reinterpret_cast(dst), value, static_cast(size)); - checkCudaErrors(dZ); + auto dZ = cudaMemset(reinterpret_cast(dst), value, static_cast(size)); if (dZ != 0) - throw std::runtime_error("cudaMemset(...) failed"); + throw nd4j::cuda_exception::build("cudaMemset(...) failed", dZ); return 1; } int memsetAsync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { - cudaStream_t *pStream = reinterpret_cast(reserved); + auto pStream = reinterpret_cast(reserved); - cudaError_t dZ = cudaMemsetAsync(reinterpret_cast(dst), value, static_cast(size), *pStream); - checkCudaErrors(dZ); + auto dZ = cudaMemsetAsync(reinterpret_cast(dst), value, static_cast(size), *pStream); if (dZ != 0) - throw std::runtime_error("cudaMemsetAsync(...) failed"); + throw nd4j::cuda_exception::build("cudaMemsetAsync(...) failed", dZ); return 1; } int destroyEvent(Nd4jPointer event) { - cudaEvent_t *pEvent = reinterpret_cast(&event); - cudaError_t dZ = cudaEventDestroy(*pEvent); - checkCudaErrors(dZ); + auto pEvent = reinterpret_cast(&event); + auto dZ = cudaEventDestroy(*pEvent); if (dZ != 0) - throw std::runtime_error("cudaEvenDestroy(...) failed"); + throw nd4j::cuda_exception::build("cudaEvenDestroy(...) failed", dZ); return 1; } int streamSynchronize(Nd4jPointer stream) { - cudaStream_t *pStream = reinterpret_cast(stream); + auto pStream = reinterpret_cast(stream); - cudaError_t dZ = cudaStreamSynchronize(*pStream); - checkCudaErrors(dZ); + auto dZ = cudaStreamSynchronize(*pStream); if (dZ != 0) - throw std::runtime_error("cudaStreamSynchronize(...) failed"); + throw nd4j::cuda_exception::build("cudaStreamSynchronize(...) failed", dZ); return 1L; } int eventSynchronize(Nd4jPointer event) { - cudaEvent_t *pEvent = reinterpret_cast(&event); + auto pEvent = reinterpret_cast(&event); - cudaError_t dZ = cudaEventSynchronize(*pEvent); - checkCudaErrors(dZ); + auto dZ = cudaEventSynchronize(*pEvent); if (dZ != 0) - throw std::runtime_error("cudaEventSynchronize(...) failed"); + throw nd4j::cuda_exception::build("cudaEventSynchronize(...) failed", dZ); return 1L; } @@ -2697,13 +2692,16 @@ int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opConte auto result = op->execute(context); - // FIXME: remove once CUDA backend is 100% ready + auto res = cudaStreamSynchronize(*context->launchContext()->getCudaStream()); + if (res != 0) + throw nd4j::cuda_exception::build("customOp execution failed", res); + for (auto v:context->fastpath_in()) { - v->makeBothActual(); + v->syncToDevice(); } for (auto v:context->fastpath_out()) { - v->makeBothActual(); + v->syncToDevice(); } return result; diff --git a/libnd4j/include/ops/declarable/helpers/cuda/convolutions.cu b/libnd4j/include/ops/declarable/helpers/cuda/convolutions.cu index e224329f0..98ab86dec 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/convolutions.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/convolutions.cu @@ -907,6 +907,8 @@ __global__ static void pooling2dBPCuda(const void* vx, const Nd4jLong* xShapeInf /*** max ***/ case 0: { + coord2 = hstart; + coord3 = hend; T max = -DataTypeUtils::max(); for (coords[2] = hstart; coords[2] < hend; coords[2] += dH) { diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java index c2f5dedc5..6c4595a0c 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java @@ -732,4 +732,20 @@ public class CustomOpsTests extends BaseNd4jTest { fail("Failed datatypes: " + failed.toString()); } } + + @Test + public void testMaxPool2Dbp_1() { + val x = Nd4j.create(DataType.HALF, 2,3,16,16).assign(Double.NaN); + val y = Nd4j.create(DataType.HALF, 2,3,8,8).assign(Double.NaN); + val z = Nd4j.create(DataType.HALF, 2,3,16,16); + + val op = DynamicCustomOp.builder("maxpool2d_bp") + .addInputs(x, y) + .addOutputs(z) + .addIntegerArguments(2, 2, 2, 2, 8,8, 1,1,1, 0,0) + .build(); + + Nd4j.exec(op); + Nd4j.getExecutioner().commit(); + } } From b85238a6df8ce72021b33f40e9beb99d9d3661a0 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Sat, 24 Aug 2019 17:33:11 +1000 Subject: [PATCH 02/56] MLN/CG: Don't swallow exceptions if a second exception occurs during workspace closing (#161) Signed-off-by: AlexDBlack --- .../nn/graph/ComputationGraph.java | 93 ++++++++++++------- .../nn/multilayer/MultiLayerNetwork.java | 91 ++++++++++++------ 2 files changed, 121 insertions(+), 63 deletions(-) diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java index 99f8aeff0..00c0cf7d6 100755 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java @@ -2278,6 +2278,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { LayerWorkspaceMgr allNone = noWS ? LayerWorkspaceMgr.noWorkspaces(helperWorkspaces) : null; List[] closeAtEndIteraton = (List[])new List[topologicalOrder.length]; MemoryWorkspace initialWorkspace = Nd4j.getMemoryManager().getCurrentWorkspace(); + Throwable t = null; try { for (int i = 0; i <= stopIndex; i++) { GraphVertex current = vertices[topologicalOrder[i]]; @@ -2302,14 +2303,14 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { .with(ArrayType.RNN_FF_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM, WS_RNN_LOOP_WORKING_MEM_CONFIG) .build(); - if(detachedInputs){ + if (detachedInputs) { //Sometimes (like: external errors use cases) we don't want the activations/inputs to be // in a workspace workspaceMgr.setScopedOutFor(ArrayType.INPUT); workspaceMgr.setScopedOutFor(ArrayType.ACTIVATIONS); } else { //Don't leverage out of async MultiDataSetIterator workspaces - if(features[0].isAttached()){ + if (features[0].isAttached()) { workspaceMgr.setNoLeverageOverride(features[0].data().getParentWorkspace().getId()); } } @@ -2326,7 +2327,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { if (ArrayUtils.contains(layerIndexes, vIdx)) { isRequiredOutput = true; - if(outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)){ + if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) { //Place activations in user-specified workspace origWSAct = workspaceMgr.getWorkspaceName(ArrayType.ACTIVATIONS); origWSActConf = workspaceMgr.getConfiguration(ArrayType.ACTIVATIONS); @@ -2345,7 +2346,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { //Open the relevant workspace for the activations. //Note that this will be closed only once the current vertex's activations have been consumed MemoryWorkspace wsActivations = null; - if(outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || !isRequiredOutput ){ //Open WS if (a) no external/output WS (if present, it's already open), or (b) not being placed in external/output WS + if (outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || !isRequiredOutput) { //Open WS if (a) no external/output WS (if present, it's already open), or (b) not being placed in external/output WS wsActivations = workspaceMgr.notifyScopeEntered(ArrayType.ACTIVATIONS); openActivationsWorkspaces.put(wsActivations, workspaceMgr); } @@ -2353,11 +2354,11 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { //Note that because we're opening activation workspaces not in any defined order (i.e., workspace // use isn't simply nested), we'll manually override the previous workspace setting. Otherwise, when we // close these workspaces, the "current" workspace may be set to the incorrect one - if(wsActivations != null ) + if (wsActivations != null) wsActivations.setPreviousWorkspace(initialWorkspace); int closeableAt = vertexOutputsFullyConsumedByStep[vIdx]; - if(outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || (wsActivations != null && !outputWorkspace.getId().equals(wsActivations.getId()))) { + if (outputWorkspace == null || outputWorkspace instanceof DummyWorkspace || (wsActivations != null && !outputWorkspace.getId().equals(wsActivations.getId()))) { if (closeAtEndIteraton[closeableAt] == null) { closeAtEndIteraton[closeableAt] = new ArrayList<>(); } @@ -2373,18 +2374,18 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { out = features[vIdx]; } else { - if(fwdPassType == FwdPassType.STANDARD){ + if (fwdPassType == FwdPassType.STANDARD) { //Standard feed-forward case out = current.doForward(train, workspaceMgr); - } else if(fwdPassType == FwdPassType.RNN_TIMESTEP){ + } else if (fwdPassType == FwdPassType.RNN_TIMESTEP) { if (current.hasLayer()) { //Layer INDArray input = current.getInputs()[0]; Layer l = current.getLayer(); if (l instanceof RecurrentLayer) { out = ((RecurrentLayer) l).rnnTimeStep(reshapeTimeStepInput(input), workspaceMgr); - } else if(l instanceof org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer && ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer)l).getUnderlying() instanceof RecurrentLayer){ - RecurrentLayer rl = ((RecurrentLayer) ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer)l).getUnderlying()); + } else if (l instanceof org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer && ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) l).getUnderlying() instanceof RecurrentLayer) { + RecurrentLayer rl = ((RecurrentLayer) ((org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer) l).getUnderlying()); out = rl.rnnTimeStep(reshapeTimeStepInput(input), workspaceMgr); } else if (l instanceof MultiLayerNetwork) { out = ((MultiLayerNetwork) l).rnnTimeStep(reshapeTimeStepInput(input)); @@ -2402,7 +2403,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { validateArrayWorkspaces(workspaceMgr, out, ArrayType.ACTIVATIONS, vName, false, "Feed forward (inference)"); } - if(inputsTo != null) { //Output vertices may not input to any other vertices + if (inputsTo != null) { //Output vertices may not input to any other vertices for (VertexIndices v : inputsTo) { //Note that we don't have to do anything special here: the activations are always detached in // this method @@ -2412,13 +2413,13 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { } } - if(clearLayerInputs) { + if (clearLayerInputs) { current.clear(); } - if(isRequiredOutput){ + if (isRequiredOutput) { outputs[ArrayUtils.indexOf(layerIndexes, vIdx)] = out; - if(origWSAct != null){ + if (origWSAct != null) { //Reset the configuration, as we may reuse this workspace manager... workspaceMgr.setWorkspace(ArrayType.ACTIVATIONS, origWSAct, origWSActConf); } @@ -2428,14 +2429,16 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { //Close any activations workspaces that we no longer require //Note that activations workspaces can be closed only once the corresponding output activations have // been fully consumed - if(closeAtEndIteraton[i] != null){ - for(MemoryWorkspace wsAct : closeAtEndIteraton[i]){ + if (closeAtEndIteraton[i] != null) { + for (MemoryWorkspace wsAct : closeAtEndIteraton[i]) { wsAct.close(); LayerWorkspaceMgr canNowReuse = openActivationsWorkspaces.remove(wsAct); freeWorkspaceManagers.add(canNowReuse); } } } + } catch (Throwable t2){ + t = t2; } finally { //Close all open workspaces... usually this list will be empty, but not if an exception is thrown //Though if stopIndex < numLayers, some might still be open @@ -2444,7 +2447,15 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { //Edge case here: seems that scoping out can increase the tagScope of the current WS //and if we hit an exception during forward pass, we aren't guaranteed to call close a sufficient // number of times to actually close it, in all cases - ws.close(); + try{ + ws.close(); + } catch (Throwable t2){ + if(t != null){ + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } } Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace); @@ -2581,28 +2592,29 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { boolean traceLog = log.isTraceEnabled(); - try{ - for(int i=topologicalOrder.length-1; i>= 0; i--){ + Throwable t = null; + try { + for (int i = topologicalOrder.length - 1; i >= 0; i--) { boolean hitFrozen = false; GraphVertex current = vertices[topologicalOrder[i]]; int vIdx = current.getVertexIndex(); String vertexName = current.getVertexName(); - if(traceLog){ + if (traceLog) { log.trace("About backprop: {} (\"{}\") - {}", i, vertexName, current.getClass().getSimpleName()); } //FIXME: make the frozen vertex feature extraction more flexible - if (current.hasLayer() && current.getLayer() instanceof FrozenLayer || current instanceof FrozenVertex){ + if (current.hasLayer() && current.getLayer() instanceof FrozenLayer || current instanceof FrozenVertex) { hitFrozen = true; } - if (current.isInputVertex() || hitFrozen){ + if (current.isInputVertex() || hitFrozen) { //Close any activation gradient workspaces that we no longer require //Note that activation gradient workspaces can be closed only once the corresponding activations // gradients have been fully consumed - if(closeAtEndIteraton[i] != null){ - for(MemoryWorkspace wsAct : closeAtEndIteraton[i]){ + if (closeAtEndIteraton[i] != null) { + for (MemoryWorkspace wsAct : closeAtEndIteraton[i]) { wsAct.close(); LayerWorkspaceMgr canNowReuse = openActivationsWorkspaces.remove(wsAct); freeWorkspaceManagers.add(canNowReuse); @@ -2680,7 +2692,7 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { wsActivationGrads.setPreviousWorkspace(initialWorkspace); int closeableAt = vertexActGradsFullyConsumedByStep[vIdx]; - if(closeableAt >= 0) { + if (closeableAt >= 0) { if (closeAtEndIteraton[closeableAt] == null) { closeAtEndIteraton[closeableAt] = new ArrayList<>(); } @@ -2689,14 +2701,14 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { Pair pair; INDArray[] epsilons; - try(MemoryWorkspace wsWorkingMem = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)){ + try (MemoryWorkspace wsWorkingMem = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)) { pair = current.doBackward(truncatedBPTT, workspaceMgr); epsilons = pair.getSecond(); //Validate workspace location for the activation gradients: //validateArrayWorkspaces(LayerWorkspaceMgr mgr, INDArray array, ArrayType arrayType, String vertexName, boolean isInputVertex, String op){ for (INDArray epsilon : epsilons) { - if(epsilon != null) { + if (epsilon != null) { //May be null for EmbeddingLayer, etc validateArrayWorkspaces(workspaceMgr, epsilon, ArrayType.ACTIVATION_GRAD, vertexName, false, "Backprop"); } @@ -2732,15 +2744,15 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { tempList.addFirst(new Triple<>(newName, entry.getValue(), g.flatteningOrderForVariable(origName))); } - for (Triple t : tempList) - gradients.addFirst(t); + for (Triple triple : tempList) + gradients.addFirst(triple); } //Close any activation gradient workspaces that we no longer require //Note that activation gradient workspaces can be closed only once the corresponding activations // gradients have been fully consumed - if(closeAtEndIteraton[i] != null){ - for(MemoryWorkspace wsAct : closeAtEndIteraton[i]){ + if (closeAtEndIteraton[i] != null) { + for (MemoryWorkspace wsAct : closeAtEndIteraton[i]) { wsAct.close(); LayerWorkspaceMgr canNowReuse = openActivationsWorkspaces.remove(wsAct); freeWorkspaceManagers.add(canNowReuse); @@ -2748,23 +2760,32 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork { closeAtEndIteraton[i] = null; } - if(traceLog){ + if (traceLog) { log.trace("Completed backprop: {} (\"{}\") - {}", i, vertexName, current.getClass().getSimpleName()); } } - + } catch (Throwable t2){ + t = t2; } finally { //Close all open workspaces... usually this list will be empty, but not if an exception is thrown for(MemoryWorkspace ws : openActivationsWorkspaces.keySet()){ - ws.close(); + try{ + ws.close(); + } catch (Throwable t2){ + if(t != null){ + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace); } //Now, add the gradients in the order we need them in for flattening (same as params order) Gradient gradient = new DefaultGradient(flattenedGradients); - for (Triple t : gradients) { - gradient.setGradientFor(t.getFirst(), t.getSecond(), t.getThird()); + for (Triple tr : gradients) { + gradient.setGradientFor(tr.getFirst(), tr.getSecond(), tr.getThird()); } this.gradient = gradient; diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java index 731ca398b..dd495a620 100755 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java @@ -1242,17 +1242,18 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura boolean traceLog = log.isTraceEnabled(); + Throwable t = null; try { for (int i = 0; i <= layerIndex; i++) { LayerWorkspaceMgr mgr = (i % 2 == 0 ? mgrEven : mgrOdd); - if(traceLog){ + if (traceLog) { log.trace("About to forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); } //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet) //Hence: put inputs in working memory - if(i == 0 && wsm != WorkspaceMode.NONE){ + if (i == 0 && wsm != WorkspaceMode.NONE) { mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_WORKING_MEM, WS_LAYER_WORKING_MEM_CONFIG); } @@ -1268,7 +1269,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura temp.setPreviousWorkspace(initialWorkspace); - if(i == 0 && input.isAttached()){ + if (i == 0 && input.isAttached()) { //Don't leverage out of async DataSetIterator workspaces mgr.setNoLeverageOverride(input.data().getParentWorkspace().getId()); } @@ -1279,8 +1280,8 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, true, "Output of layer (inference)"); } - if ( i == layerIndex ) { - if(outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)){ + if (i == layerIndex) { + if (outputWorkspace != null && !(outputWorkspace instanceof DummyWorkspace)) { //Place activations in user-specified workspace mgr.setWorkspace(ArrayType.ACTIVATIONS, outputWorkspace.getId(), outputWorkspace.getWorkspaceConfiguration()); } else { @@ -1289,15 +1290,15 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura } } - if(fwdPassType == FwdPassType.STANDARD){ + if (fwdPassType == FwdPassType.STANDARD) { //Standard feed-forward case input = layers[i].activate(input, train, mgr); - } else if(fwdPassType == FwdPassType.RNN_TIMESTEP){ + } else if (fwdPassType == FwdPassType.RNN_TIMESTEP) { //rnnTimeStep case if (layers[i] instanceof RecurrentLayer) { input = ((RecurrentLayer) layers[i]).rnnTimeStep(reshapeTimeStepInput(input), mgr); - } else if(layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer)layers[i]).getUnderlying() instanceof RecurrentLayer){ - RecurrentLayer rl = ((RecurrentLayer) ((BaseWrapperLayer)layers[i]).getUnderlying()); + } else if (layers[i] instanceof BaseWrapperLayer && ((BaseWrapperLayer) layers[i]).getUnderlying() instanceof RecurrentLayer) { + RecurrentLayer rl = ((RecurrentLayer) ((BaseWrapperLayer) layers[i]).getUnderlying()); input = rl.rnnTimeStep(reshapeTimeStepInput(input), mgr); } else if (layers[i] instanceof MultiLayerNetwork) { input = ((MultiLayerNetwork) layers[i]).rnnTimeStep(reshapeTimeStepInput(input)); @@ -1311,34 +1312,51 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura //Validation: Exception if invalid (bad layer implementation) validateArrayWorkspaces(mgr, input, ArrayType.ACTIVATIONS, i, false, "Output of layer (inference)"); - if(wsActCloseNext != null){ + if (wsActCloseNext != null) { wsActCloseNext.close(); } wsActCloseNext = temp; temp = null; } - if(traceLog){ + if (traceLog) { log.trace("Completed forward pass: {} - {}", i, layers[i].getClass().getSimpleName()); } //Edge case: for first layer with dropout, inputs can't be in previous workspace (as it hasn't been opened yet) //Hence: put inputs in working memory -> set back to default for next use of workspace mgr - if(i == 0 && wsm != WorkspaceMode.NONE){ + if (i == 0 && wsm != WorkspaceMode.NONE) { mgr.setWorkspace(ArrayType.INPUT, WS_LAYER_ACT_2, WS_LAYER_ACT_X_CONFIG); //Inputs should always be in the previous WS } } - + } catch (Throwable t2){ + t = t2; } finally { if(wsActCloseNext != null){ - wsActCloseNext.close(); + try { + wsActCloseNext.close(); + } catch (Throwable t2){ + if(t != null){ + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } if(temp != null){ //Should only be non-null on exception while(temp.isScopeActive()){ //For safety, should never occur in theory: a single close() call may not be sufficient, if // workspace scope was borrowed and not properly closed when exception occurred - temp.close(); + try{ + temp.close(); + } catch (Throwable t2){ + if(t != null){ + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } } @@ -1871,13 +1889,14 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura boolean traceLog = log.isTraceEnabled(); + Throwable t = null; try { for (int i = layers.length - 1; i >= 0; i--) { if (layers[i] instanceof FrozenLayer) { break; } - if(traceLog){ + if (traceLog) { log.trace("About to backprop: {} - {}", i, layers[i].getClass().getSimpleName()); } @@ -1897,7 +1916,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura //Open activation gradients WS *then* BP working memory, so BP working memory is opened last for use in layers wsActGradTemp = workspaceMgr.notifyScopeEntered(ArrayType.ACTIVATION_GRAD); - try(MemoryWorkspace wsBPWorking = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)){ + try (MemoryWorkspace wsBPWorking = workspaceMgr.notifyScopeEntered(ArrayType.BP_WORKING_MEM)) { //Note that because we're opening activation workspaces not in a simple nested order, we'll manually // override the previous workspace setting. Otherwise, when we close these workspaces, the "current" @@ -1907,7 +1926,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura INDArray eps = (i == layers.length - 1 ? epsilon : currPair.getRight()); //eps is null for OutputLayer - if(!tbptt){ + if (!tbptt) { //Standard case currPair = layers[i].backpropGradient(eps, workspaceMgr); } else { @@ -1920,7 +1939,7 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura } } - if(currPair.getSecond() != null) { + if (currPair.getSecond() != null) { //Edge case: may be null for Embedding layer, for example validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, i, false, "Backprop"); @@ -1936,38 +1955,56 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura currPair = new Pair<>(currPair.getFirst(), this.layerWiseConfigurations.getInputPreProcess(i) .backprop(currPair.getSecond(), getInputMiniBatchSize(), workspaceMgr)); - if (i > 0 && currPair.getSecond() != null){ + if (i > 0 && currPair.getSecond() != null) { validateArrayWorkspaces(workspaceMgr, currPair.getSecond(), ArrayType.ACTIVATION_GRAD, i, true, "Backprop"); } } - if(i == 0 ){ - if(returnInputActGrad && currPair.getSecond() != null){ + if (i == 0) { + if (returnInputActGrad && currPair.getSecond() != null) { currPair.setSecond(currPair.getSecond().detach()); } else { currPair.setSecond(null); } } - if(wsActGradCloseNext != null){ + if (wsActGradCloseNext != null) { wsActGradCloseNext.close(); } wsActGradCloseNext = wsActGradTemp; wsActGradTemp = null; } - if(traceLog){ + if (traceLog) { log.trace("Completed backprop: {} - {}", i, layers[i].getClass().getSimpleName()); } } + } catch (Throwable thr ){ + t = thr; } finally { if(wsActGradCloseNext != null){ - wsActGradCloseNext.close(); + try { + wsActGradCloseNext.close(); + } catch (Throwable t2){ + if(t != null){ + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } - if(wsActGradTemp != null){ + if(wsActGradTemp != null) { //Should only be non-null on exception - wsActGradTemp.close(); + try { + wsActGradTemp.close(); + } catch (Throwable t2) { + if (t != null) { + log.error("Encountered second exception while trying to close workspace after initial exception"); + log.error("Original exception:", t); + throw t2; + } + } } Nd4j.getMemoryManager().setCurrentWorkspace(initialWorkspace); } From a9b08cc163847eda270b940056035d08311215f0 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Sat, 24 Aug 2019 19:22:36 +1000 Subject: [PATCH 03/56] Upgrade protobuf version (#162) * First steps for protobuf version upgrade Signed-off-by: AlexDBlack * Phase 2 Signed-off-by: AlexDBlack * Update imports to shaded protobuf Signed-off-by: AlexDBlack * Version fix Signed-off-by: AlexDBlack * Switch to single execution for protobuf codegen to work around plugin bug Signed-off-by: AlexDBlack * Automatically delete old PB generated files after name change Signed-off-by: Alex Black --- .../nd4j-api-parent/nd4j-api/pom.xml | 92 ++++--- .../functions/DifferentialFunction.java | 10 +- .../nd4j/autodiff/samediff/SDVariable.java | 2 +- .../TensorflowDescriptorParser.java | 2 +- .../imports/graphmapper/BaseGraphMapper.java | 4 +- .../nd4j/imports/graphmapper/GraphMapper.java | 2 +- .../graphmapper/onnx/OnnxGraphMapper.java | 120 ++++----- .../imports/graphmapper/tf/TFGraphMapper.java | 2 +- .../tf/tensors/TFTensorMappers.java | 2 +- .../linalg/api/ops/BaseBroadcastBoolOp.java | 4 +- .../nd4j/linalg/api/ops/BaseBroadcastOp.java | 4 +- .../java/org/nd4j/linalg/api/ops/BaseOp.java | 4 +- .../org/nd4j/linalg/api/ops/BaseReduceOp.java | 4 +- .../nd4j/linalg/api/ops/DynamicCustomOp.java | 4 +- .../java/org/nd4j/linalg/api/ops/NoOp.java | 4 +- .../linalg/api/ops/impl/controlflow/If.java | 4 +- .../api/ops/impl/controlflow/While.java | 4 +- .../impl/layers/ExternalErrorsFunction.java | 4 +- .../linalg/api/ops/impl/layers/Linear.java | 4 +- .../impl/layers/convolution/AvgPooling2D.java | 4 +- .../impl/layers/convolution/AvgPooling3D.java | 4 +- .../impl/layers/convolution/BatchNorm.java | 4 +- .../ops/impl/layers/convolution/Conv1D.java | 2 +- .../ops/impl/layers/convolution/Conv2D.java | 4 +- .../ops/impl/layers/convolution/DeConv2D.java | 4 +- .../layers/convolution/DepthwiseConv2D.java | 4 +- .../LocalResponseNormalization.java | 4 +- .../impl/layers/convolution/MaxPooling2D.java | 4 +- .../impl/layers/convolution/MaxPooling3D.java | 4 +- .../impl/layers/convolution/Pooling2D.java | 4 +- .../ops/impl/layers/recurrent/GRUCell.java | 2 +- .../ops/impl/layers/recurrent/LSTMCell.java | 4 +- .../api/ops/impl/layers/recurrent/SRU.java | 4 +- .../ops/impl/layers/recurrent/SRUCell.java | 4 +- .../nd4j/linalg/api/ops/impl/reduce/Mmul.java | 4 +- .../api/ops/impl/reduce/TensorMmul.java | 4 +- .../linalg/api/ops/impl/shape/Concat.java | 4 +- .../nd4j/linalg/api/ops/impl/shape/Diag.java | 4 +- .../linalg/api/ops/impl/shape/DiagPart.java | 4 +- .../linalg/api/ops/impl/shape/Gather.java | 4 +- .../linalg/api/ops/impl/shape/GatherNd.java | 2 +- .../linalg/api/ops/impl/shape/MergeAvg.java | 4 +- .../linalg/api/ops/impl/shape/MergeMax.java | 4 +- .../linalg/api/ops/impl/shape/MergeSum.java | 4 +- .../api/ops/impl/shape/ParallelStack.java | 4 +- .../nd4j/linalg/api/ops/impl/shape/Rank.java | 4 +- .../linalg/api/ops/impl/shape/Repeat.java | 4 +- .../linalg/api/ops/impl/shape/Reshape.java | 4 +- .../api/ops/impl/shape/SequenceMask.java | 2 +- .../nd4j/linalg/api/ops/impl/shape/Shape.java | 4 +- .../linalg/api/ops/impl/shape/ShapeN.java | 2 +- .../nd4j/linalg/api/ops/impl/shape/Size.java | 2 +- .../nd4j/linalg/api/ops/impl/shape/Stack.java | 4 +- .../linalg/api/ops/impl/shape/Transpose.java | 4 +- .../linalg/api/ops/impl/shape/Unstack.java | 4 +- .../api/ops/impl/shape/bp/ConcatBp.java | 4 +- .../shape/tensorops/TensorArrayConcat.java | 4 +- .../shape/tensorops/TensorArrayGather.java | 4 +- .../impl/shape/tensorops/TensorArrayRead.java | 4 +- .../shape/tensorops/TensorArrayScatter.java | 4 +- .../impl/shape/tensorops/TensorArraySize.java | 4 +- .../shape/tensorops/TensorArraySplit.java | 4 +- .../ops/impl/transforms/clip/ClipByNorm.java | 4 +- .../ops/impl/transforms/clip/ClipByValue.java | 4 +- .../ops/impl/transforms/custom/Assign.java | 4 +- .../ops/impl/transforms/custom/CumProd.java | 4 +- .../ops/impl/transforms/custom/CumSum.java | 4 +- .../api/ops/impl/transforms/custom/Fill.java | 4 +- .../impl/transforms/strict/RectifiedTanh.java | 4 +- .../api/ops/random/impl/DropOutInverted.java | 4 +- .../linalg/api/ops/random/impl/Range.java | 2 +- .../onnx/{onnx-ml.proto3 => onnx-ml.proto} | 0 ...-operators.proto3 => onnx-operators.proto} | 2 +- .../protobuf/onnx/{onnx.proto3 => onnx.proto} | 0 .../conversion/GraphRunnerTest.java | 2 +- .../conversion/GpuGraphRunnerTest.java | 2 +- nd4j/nd4j-shade/pom.xml | 1 + nd4j/nd4j-shade/protobuf/pom.xml | 228 ++++++++++++++++++ .../conversion/TensorflowConversion.java | 2 +- .../conversion/graphrunner/GraphRunner.java | 8 +- 80 files changed, 487 insertions(+), 228 deletions(-) rename nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/{onnx-ml.proto3 => onnx-ml.proto} (100%) rename nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/{onnx-operators.proto3 => onnx-operators.proto} (99%) rename nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/{onnx.proto3 => onnx.proto} (100%) create mode 100644 nd4j/nd4j-shade/protobuf/pom.xml diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/pom.xml b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/pom.xml index 18680e699..21924f80a 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/pom.xml +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/pom.xml @@ -31,10 +31,35 @@ + + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.8 + + + generate-sources + + run + + + + + + + + + + + + com.github.os72 protoc-jar-maven-plugin - 3.5.1.1 + 3.8.0 tensorflow @@ -43,30 +68,14 @@ run - java-shaded - 3.5.1 + 3.8.0 + .proto src/main/protobuf/tf + src/main/protobuf/onnx src/main/protobuf/tf/tensorflow - - main - false - src/main/java/ - - - - onnx - generate-sources - - run - - - java-shaded - .proto3 - 3.5.1 - src/main/protobuf/onnx main @@ -76,6 +85,32 @@ + + + com.google.code.maven-replacer-plugin + replacer + 1.5.3 + + + ${project.build.sourceDirectory}/org/tensorflow/** + ${project.build.sourceDirectory}/tensorflow/** + ${project.build.sourceDirectory}/onnx/** + + com.google.protobuf. + org.nd4j.shade.protobuf. + + + + replace-imports + generate-sources + + replace + + + + + + org.apache.maven.plugins maven-compiler-plugin @@ -148,20 +183,15 @@ ${flatbuffers.version} - + - com.github.os72 - protobuf-java-shaded-351 - 0.9 - - - com.github.os72 - protobuf-java-util-shaded-351 - 0.9 + org.nd4j + protobuf + ${project.version} + org.objenesis objenesis diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java index 2d49ce56f..71bbd26ee 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunction.java @@ -21,7 +21,7 @@ import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.autodiff.samediff.serde.FlatBuffersMapper; @@ -101,10 +101,10 @@ public abstract class DifferentialFunction { /** * Initialize the function from the given - * {@link onnx.OnnxProto3.NodeProto} + * {@link onnx.Onnx.NodeProto} * @param node */ - public DifferentialFunction(SameDiff sameDiff,onnx.OnnxProto3.NodeProto node,Map attributesForNode, OnnxProto3.GraphProto graph) { + public DifferentialFunction(SameDiff sameDiff,onnx.Onnx.NodeProto node,Map attributesForNode, Onnx.GraphProto graph) { this.sameDiff = sameDiff; setInstanceId(); initFromOnnx(node, sameDiff, attributesForNode, graph); @@ -731,13 +731,13 @@ public abstract class DifferentialFunction { /** * Iniitialize the function from the given - * {@link onnx.OnnxProto3.NodeProto} + * {@link onnx.Onnx.NodeProto} * @param node * @param initWith * @param attributesForNode * @param graph */ - public abstract void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph); + public abstract void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/SDVariable.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/SDVariable.java index a7fb35520..430b4d83a 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/SDVariable.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/SDVariable.java @@ -19,7 +19,7 @@ package org.nd4j.autodiff.samediff; import java.util.Objects; import lombok.*; import lombok.extern.slf4j.Slf4j; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.internal.Variable; import org.nd4j.base.Preconditions; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/descriptors/tensorflow/TensorflowDescriptorParser.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/descriptors/tensorflow/TensorflowDescriptorParser.java index fad55d101..3d0464782 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/descriptors/tensorflow/TensorflowDescriptorParser.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/descriptors/tensorflow/TensorflowDescriptorParser.java @@ -16,7 +16,7 @@ package org.nd4j.imports.descriptors.tensorflow; -import com.github.os72.protobuf351.TextFormat; +import org.nd4j.shade.protobuf.TextFormat; import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.io.ClassPathResource; import org.tensorflow.framework.OpDef; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/BaseGraphMapper.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/BaseGraphMapper.java index 92c888e0c..fe252aeeb 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/BaseGraphMapper.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/BaseGraphMapper.java @@ -16,8 +16,8 @@ package org.nd4j.imports.graphmapper; -import com.github.os72.protobuf351.Message; -import com.github.os72.protobuf351.TextFormat; +import org.nd4j.shade.protobuf.Message; +import org.nd4j.shade.protobuf.TextFormat; import lombok.extern.slf4j.Slf4j; import lombok.val; import org.apache.commons.io.IOUtils; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/GraphMapper.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/GraphMapper.java index 8aad0f4d9..2d89a2b07 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/GraphMapper.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/GraphMapper.java @@ -16,7 +16,7 @@ package org.nd4j.imports.graphmapper; -import com.github.os72.protobuf351.Message; +import org.nd4j.shade.protobuf.Message; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.imports.descriptors.properties.PropertyMapping; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/onnx/OnnxGraphMapper.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/onnx/OnnxGraphMapper.java index 0bbfece6f..719ac792d 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/onnx/OnnxGraphMapper.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/onnx/OnnxGraphMapper.java @@ -16,13 +16,13 @@ package org.nd4j.imports.graphmapper.onnx; -import com.github.os72.protobuf351.ByteString; -import com.github.os72.protobuf351.Message; +import org.nd4j.shade.protobuf.ByteString; +import org.nd4j.shade.protobuf.Message; import com.google.common.primitives.Floats; import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -52,7 +52,7 @@ import java.util.*; * * @author Adam Gibson */ -public class OnnxGraphMapper extends BaseGraphMapper { +public class OnnxGraphMapper extends BaseGraphMapper { private static OnnxGraphMapper INSTANCE = new OnnxGraphMapper(); @@ -64,9 +64,9 @@ public class OnnxGraphMapper extends BaseGraphMapper attributesForNode, OnnxProto3.NodeProto node, OnnxProto3.GraphProto graph) { + public void initFunctionFromProperties(String mappedTfName, DifferentialFunction on, Map attributesForNode, Onnx.NodeProto node, Onnx.GraphProto graph) { val properties = on.mappingsForFunction(); val tfProperties = properties.get(mappedTfName); val fields = DifferentialFunctionClassHolder.getInstance().getFieldsForFunction(on); @@ -170,18 +170,18 @@ public class OnnxGraphMapper extends BaseGraphMapper> propertyMappingsForFunction) { + public void mapProperty(String name, DifferentialFunction on, Onnx.NodeProto node, Onnx.GraphProto graph, SameDiff sameDiff, Map> propertyMappingsForFunction) { val mapping = propertyMappingsForFunction.get(name).get(getTargetMappingForOp(on, node)); val fields = DifferentialFunctionClassHolder.getInstance().getFieldsForFunction(on); /** @@ -263,7 +263,7 @@ public class OnnxGraphMapper extends BaseGraphMapper getControlDependencies(OnnxProto3.NodeProto node) { + public List getControlDependencies(Onnx.NodeProto node) { throw new UnsupportedOperationException("Not yet implemented"); } @Override public void dumpBinaryProtoAsText(File inputFile, File outputFile) { try { - OnnxProto3.ModelProto graphDef = OnnxProto3.ModelProto.parseFrom(new BufferedInputStream(new FileInputStream(inputFile))); + Onnx.ModelProto graphDef = Onnx.ModelProto.parseFrom(new BufferedInputStream(new FileInputStream(inputFile))); BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(outputFile,true)); - for(OnnxProto3.NodeProto node : graphDef.getGraph().getNodeList()) { + for(Onnx.NodeProto node : graphDef.getGraph().getNodeList()) { bufferedWriter.write(node.toString()); } @@ -316,12 +316,12 @@ public class OnnxGraphMapper extends BaseGraphMapper variablesForGraph(OnnxProto3.GraphProto graphProto) { + public Map variablesForGraph(Onnx.GraphProto graphProto) { /** * Need to figure out why * gpu_0/conv1_1 isn't present in VGG */ - Map ret = new HashMap<>(); + Map ret = new HashMap<>(); for(int i = 0; i < graphProto.getInputCount(); i++) { ret.put(graphProto.getInput(i).getName(),graphProto.getInput(i).getType().getTensorType()); } @@ -356,19 +356,19 @@ public class OnnxGraphMapper extends BaseGraphMapper to) { - OnnxProto3.TensorShapeProto.Dimension dim = OnnxProto3.TensorShapeProto.Dimension. + protected void addDummyTensor(String name, Map to) { + Onnx.TensorShapeProto.Dimension dim = Onnx.TensorShapeProto.Dimension. newBuilder() .setDimValue(-1) .build(); - OnnxProto3.TypeProto.Tensor typeProto = OnnxProto3.TypeProto.Tensor.newBuilder() + Onnx.TypeProto.Tensor typeProto = Onnx.TypeProto.Tensor.newBuilder() .setShape( - OnnxProto3.TensorShapeProto.newBuilder() + Onnx.TensorShapeProto.newBuilder() .addDim(dim) .addDim(dim).build()) .build(); @@ -377,23 +377,23 @@ public class OnnxGraphMapper extends BaseGraphMapper importState, - OpImportOverride opImportOverride, - OpImportFilter opFilter) { + public void mapNodeType(Onnx.NodeProto tfNode, ImportState importState, + OpImportOverride opImportOverride, + OpImportFilter opFilter) { val differentialFunction = DifferentialFunctionClassHolder.getInstance().getOpWithOnnxName(tfNode.getOpType()); if(differentialFunction == null) { throw new NoOpNameFoundException("No op name found " + tfNode.getOpType()); @@ -425,13 +425,13 @@ public class OnnxGraphMapper extends BaseGraphMapper= 2) @@ -548,11 +548,11 @@ public class OnnxGraphMapper extends BaseGraphMapper= 2) @@ -577,74 +577,74 @@ public class OnnxGraphMapper extends BaseGraphMapper getAttrMap(OnnxProto3.NodeProto nodeProto) { - Map proto = new HashMap<>(); + public Map getAttrMap(Onnx.NodeProto nodeProto) { + Map proto = new HashMap<>(); for(int i = 0; i < nodeProto.getAttributeCount(); i++) { - OnnxProto3.AttributeProto attributeProto = nodeProto.getAttribute(i); + Onnx.AttributeProto attributeProto = nodeProto.getAttribute(i); proto.put(attributeProto.getName(),attributeProto); } return proto; } @Override - public String getName(OnnxProto3.NodeProto nodeProto) { + public String getName(Onnx.NodeProto nodeProto) { return nodeProto.getName(); } @Override - public boolean alreadySeen(OnnxProto3.NodeProto nodeProto) { + public boolean alreadySeen(Onnx.NodeProto nodeProto) { return false; } @Override - public boolean isVariableNode(OnnxProto3.NodeProto nodeProto) { + public boolean isVariableNode(Onnx.NodeProto nodeProto) { return nodeProto.getOpType().contains("Var"); } @Override - public boolean shouldSkip(OnnxProto3.NodeProto opType) { + public boolean shouldSkip(Onnx.NodeProto opType) { return false; } @Override - public boolean hasShape(OnnxProto3.NodeProto nodeProto) { + public boolean hasShape(Onnx.NodeProto nodeProto) { return false; } @Override - public long[] getShape(OnnxProto3.NodeProto nodeProto) { + public long[] getShape(Onnx.NodeProto nodeProto) { return null; } @Override - public INDArray getArrayFrom(OnnxProto3.NodeProto nodeProto, OnnxProto3.GraphProto graph) { + public INDArray getArrayFrom(Onnx.NodeProto nodeProto, Onnx.GraphProto graph) { return null; } @Override - public String getOpType(OnnxProto3.NodeProto nodeProto) { + public String getOpType(Onnx.NodeProto nodeProto) { return nodeProto.getOpType(); } @Override - public List getNodeList(OnnxProto3.GraphProto graphProto) { + public List getNodeList(Onnx.GraphProto graphProto) { return graphProto.getNodeList(); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/tf/TFGraphMapper.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/tf/TFGraphMapper.java index 5579569c3..f57fef4c7 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/tf/TFGraphMapper.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/tf/TFGraphMapper.java @@ -16,7 +16,7 @@ package org.nd4j.imports.graphmapper.tf; -import com.github.os72.protobuf351.Message; +import org.nd4j.shade.protobuf.Message; import com.google.common.primitives.Floats; import com.google.common.primitives.Ints; import lombok.extern.slf4j.Slf4j; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/tf/tensors/TFTensorMappers.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/tf/tensors/TFTensorMappers.java index 722168541..e9a99c6c0 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/tf/tensors/TFTensorMappers.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/graphmapper/tf/tensors/TFTensorMappers.java @@ -1,6 +1,6 @@ package org.nd4j.imports.graphmapper.tf.tensors; -import com.github.os72.protobuf351.Descriptors; +import org.nd4j.shade.protobuf.Descriptors; import org.bytedeco.javacpp.indexer.Bfloat16ArrayIndexer; import org.bytedeco.javacpp.indexer.HalfIndexer; import org.nd4j.linalg.api.buffer.DataType; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseBroadcastBoolOp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseBroadcastBoolOp.java index a844b04c7..a41dc8790 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseBroadcastBoolOp.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseBroadcastBoolOp.java @@ -19,7 +19,7 @@ package org.nd4j.linalg.api.ops; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -205,7 +205,7 @@ public abstract class BaseBroadcastBoolOp extends BaseOp implements BroadcastOp @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseBroadcastOp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseBroadcastOp.java index d65ff377e..7f0d7e40c 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseBroadcastOp.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseBroadcastOp.java @@ -19,7 +19,7 @@ package org.nd4j.linalg.api.ops; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -200,7 +200,7 @@ public abstract class BaseBroadcastOp extends BaseOp implements BroadcastOp { @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseOp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseOp.java index 925a5924f..8c9cdf4e0 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseOp.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseOp.java @@ -20,7 +20,7 @@ import lombok.Data; import lombok.Getter; import lombok.Setter; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -134,7 +134,7 @@ public abstract class BaseOp extends DifferentialFunction implements Op { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } @Override diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseReduceOp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseReduceOp.java index ebf9b9c18..7fc0679db 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseReduceOp.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseReduceOp.java @@ -21,7 +21,7 @@ import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.imports.graphmapper.onnx.OnnxGraphMapper; @@ -218,7 +218,7 @@ public abstract class BaseReduceOp extends BaseOp implements ReduceOp { @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { if (!attributesForNode.containsKey("axes")) { this.dimensions = new int[] { Integer.MAX_VALUE }; } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/DynamicCustomOp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/DynamicCustomOp.java index 9b5b190c1..f52450eee 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/DynamicCustomOp.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/DynamicCustomOp.java @@ -21,7 +21,7 @@ import com.google.common.primitives.Doubles; import com.google.common.primitives.Longs; import lombok.*; import lombok.extern.slf4j.Slf4j; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -603,7 +603,7 @@ public class DynamicCustomOp extends DifferentialFunction implements CustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/NoOp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/NoOp.java index 19d8fe987..6b174bd07 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/NoOp.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/NoOp.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.buffer.DataType; @@ -61,7 +61,7 @@ public class NoOp extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/controlflow/If.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/controlflow/If.java index 6e0db97a5..03dc26313 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/controlflow/If.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/controlflow/If.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.controlflow; import lombok.*; import lombok.extern.slf4j.Slf4j; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -367,7 +367,7 @@ public class If extends DifferentialFunction implements CustomOp { @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/controlflow/While.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/controlflow/While.java index eba0e1145..e26b0ea5f 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/controlflow/While.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/controlflow/While.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.controlflow; import lombok.*; import lombok.extern.slf4j.Slf4j; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -468,7 +468,7 @@ public class While extends DifferentialFunction implements CustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/ExternalErrorsFunction.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/ExternalErrorsFunction.java index 378fbb06b..fd2134aad 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/ExternalErrorsFunction.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/ExternalErrorsFunction.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.layers; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -122,7 +122,7 @@ public class ExternalErrorsFunction extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/Linear.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/Linear.java index da2c26f54..27f357b4b 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/Linear.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/Linear.java @@ -19,7 +19,7 @@ package org.nd4j.linalg.api.ops.impl.layers; import lombok.Builder; import lombok.NoArgsConstructor; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -96,7 +96,7 @@ public class Linear extends BaseModule { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/AvgPooling2D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/AvgPooling2D.java index 3198a6a56..ac13c6224 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/AvgPooling2D.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/AvgPooling2D.java @@ -21,7 +21,7 @@ import lombok.Getter; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -260,7 +260,7 @@ public class AvgPooling2D extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { val paddingVal = !attributesForNode.containsKey("auto_pad") ? "VALID" : attributesForNode.get("auto_pad").getS().toStringUtf8(); val kernelShape = attributesForNode.get("kernel_shape").getIntsList(); val padding = !attributesForNode.containsKey("pads") ? Arrays.asList(1L) : attributesForNode.get("pads").getIntsList(); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/AvgPooling3D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/AvgPooling3D.java index 2c57c68de..6f58884f0 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/AvgPooling3D.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/AvgPooling3D.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.layers.convolution; import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -78,7 +78,7 @@ public class AvgPooling3D extends Pooling3D { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { throw new UnsupportedOperationException("Not yet implemented"); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/BatchNorm.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/BatchNorm.java index 67fc9f3a5..bad975cb5 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/BatchNorm.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/BatchNorm.java @@ -21,7 +21,7 @@ import lombok.Getter; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.autodiff.samediff.internal.SameDiffOp; @@ -139,7 +139,7 @@ public class BatchNorm extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { OnnxGraphMapper.getInstance().initFunctionFromProperties(node.getOpType(), this, attributesForNode, node, graph); addArgs(); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Conv1D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Conv1D.java index 3d61de716..5ae2ac144 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Conv1D.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Conv1D.java @@ -21,7 +21,7 @@ import lombok.Getter; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Conv2D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Conv2D.java index 4335f4561..04db5874c 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Conv2D.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Conv2D.java @@ -21,7 +21,7 @@ import lombok.Getter; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -127,7 +127,7 @@ public class Conv2D extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { OnnxGraphMapper.getInstance().initFunctionFromProperties(node.getOpType(), this, attributesForNode, node, graph); addArgs(); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java index 6cba853d0..65c0fccc3 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DeConv2D.java @@ -21,7 +21,7 @@ import lombok.Getter; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -247,7 +247,7 @@ public class DeConv2D extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { val autoPad = !attributesForNode.containsKey("auto_pad") ? "VALID" : attributesForNode.get("auto_pad").getS().toStringUtf8(); val dilations = attributesForNode.get("dilations"); val dilationY = dilations == null ? 1 : dilations.getIntsList().get(0).intValue(); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DepthwiseConv2D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DepthwiseConv2D.java index 0ea84e081..92a39f188 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DepthwiseConv2D.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/DepthwiseConv2D.java @@ -20,7 +20,7 @@ import lombok.Builder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -151,7 +151,7 @@ public class DepthwiseConv2D extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { OnnxGraphMapper.getInstance().initFunctionFromProperties(node.getOpType(), this, attributesForNode, node, graph); addArgs(); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/LocalResponseNormalization.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/LocalResponseNormalization.java index de4e763bc..421598d13 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/LocalResponseNormalization.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/LocalResponseNormalization.java @@ -21,7 +21,7 @@ import lombok.Getter; import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -115,7 +115,7 @@ public class LocalResponseNormalization extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { val aAlpha = attributesForNode.get("alpha"); val aBeta = attributesForNode.get("beta"); val aBias = attributesForNode.get("bias"); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/MaxPooling2D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/MaxPooling2D.java index f996fc29f..b321334a5 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/MaxPooling2D.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/MaxPooling2D.java @@ -21,7 +21,7 @@ import lombok.Getter; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -221,7 +221,7 @@ public class MaxPooling2D extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { val paddingVal = !attributesForNode.containsKey("auto_pad") ? "VALID" : attributesForNode.get("auto_pad").getS().toStringUtf8(); val isSameNode = paddingVal.equals("SAME"); val kernelShape = attributesForNode.get("kernel_shape").getIntsList(); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/MaxPooling3D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/MaxPooling3D.java index a243dec9b..99d73d2af 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/MaxPooling3D.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/MaxPooling3D.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.layers.convolution; import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -78,7 +78,7 @@ public class MaxPooling3D extends Pooling3D { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { throw new UnsupportedOperationException("Not yet implemented"); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Pooling2D.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Pooling2D.java index f7f21e78d..c45d106e7 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Pooling2D.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/convolution/Pooling2D.java @@ -20,7 +20,7 @@ import lombok.Builder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -183,7 +183,7 @@ public class Pooling2D extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { val isSameNode = attributesForNode.get("auto_pad").getS().equals("SAME"); val kernelShape = attributesForNode.get("kernel_shape").getIntsList(); val padding = attributesForNode.get("pads").getIntsList(); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/GRUCell.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/GRUCell.java index 678a4afef..6c7daca69 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/GRUCell.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/GRUCell.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.layers.recurrent; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/LSTMCell.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/LSTMCell.java index 1fdd6b191..e9d2ffd3b 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/LSTMCell.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/LSTMCell.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.layers.recurrent; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.ops.DynamicCustomOp; import org.nd4j.linalg.api.ops.impl.layers.recurrent.config.LSTMCellConfiguration; @@ -73,7 +73,7 @@ public class LSTMCell extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/SRU.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/SRU.java index aaac14131..b916d4961 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/SRU.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/SRU.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.layers.recurrent; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.imports.NoOpNameFoundException; import org.nd4j.linalg.api.ops.DynamicCustomOp; @@ -65,7 +65,7 @@ public class SRU extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/SRUCell.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/SRUCell.java index 625e09e91..4880b90fe 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/SRUCell.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/layers/recurrent/SRUCell.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.layers.recurrent; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.imports.NoOpNameFoundException; import org.nd4j.linalg.api.ops.DynamicCustomOp; @@ -66,7 +66,7 @@ public class SRUCell extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/reduce/Mmul.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/reduce/Mmul.java index 00cad1f88..7d711ca58 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/reduce/Mmul.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/reduce/Mmul.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.reduce; import lombok.EqualsAndHashCode; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -204,7 +204,7 @@ public class Mmul extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { val isTransposeA = !attributesForNode.containsKey("transA") ? false : attributesForNode.get("transA").getI() > 0; val isTransposeB = !attributesForNode.containsKey("transB") ? false : attributesForNode.get("transB").getI() > 0; MMulTranspose mMulTranspose = MMulTranspose.builder() diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/reduce/TensorMmul.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/reduce/TensorMmul.java index 62e373832..3de44537a 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/reduce/TensorMmul.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/reduce/TensorMmul.java @@ -20,7 +20,7 @@ import com.google.common.primitives.Ints; import com.google.common.primitives.Longs; import lombok.NoArgsConstructor; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.blas.params.MMulTranspose; @@ -283,7 +283,7 @@ public class TensorMmul extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { val isTransposeA = !attributesForNode.containsKey("transA") ? false : attributesForNode.get("transA").getI() > 0; val isTransposeB = !attributesForNode.containsKey("transB") ? false : attributesForNode.get("transB").getI() > 0; MMulTranspose mMulTranspose = MMulTranspose.builder() diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Concat.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Concat.java index e0b0450d3..5c6beb945 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Concat.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Concat.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -163,7 +163,7 @@ public class Concat extends DynamicCustomOp { @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Diag.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Diag.java index 90aed14bf..b6d08784b 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Diag.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Diag.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.shape; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -77,7 +77,7 @@ public class Diag extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/DiagPart.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/DiagPart.java index d2807e36d..6b1688602 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/DiagPart.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/DiagPart.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.shape; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -79,7 +79,7 @@ public class DiagPart extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Gather.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Gather.java index 1782f75df..31718d337 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Gather.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Gather.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.NoArgsConstructor; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.imports.descriptors.properties.PropertyMapping; @@ -78,7 +78,7 @@ public class Gather extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { OnnxGraphMapper.getInstance().initFunctionFromProperties(node.getOpType(), this, attributesForNode, node, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/GatherNd.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/GatherNd.java index 1be7c56bf..cfe4fe8be 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/GatherNd.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/GatherNd.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.NoArgsConstructor; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeAvg.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeAvg.java index d8319cab2..ec86c6553 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeAvg.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeAvg.java @@ -17,7 +17,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.extern.slf4j.Slf4j; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -65,7 +65,7 @@ public class MergeAvg extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeMax.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeMax.java index c9118990c..046f06c3c 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeMax.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeMax.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -64,7 +64,7 @@ public class MergeMax extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeSum.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeSum.java index b7c370615..6b87ca5c8 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeSum.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/MergeSum.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -66,7 +66,7 @@ public class MergeSum extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/ParallelStack.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/ParallelStack.java index 8d7dcf6a6..1856e6804 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/ParallelStack.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/ParallelStack.java @@ -17,7 +17,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -68,7 +68,7 @@ public class ParallelStack extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { throw new UnsupportedOperationException("No analog found for onnx for " + opName()); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Rank.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Rank.java index 96f28dbf1..aacfa19e1 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Rank.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Rank.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -66,7 +66,7 @@ public class Rank extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Repeat.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Repeat.java index 14d67d912..02f8f9445 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Repeat.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Repeat.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.NoArgsConstructor; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -106,7 +106,7 @@ public class Repeat extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Reshape.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Reshape.java index 42e401859..b30bacc22 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Reshape.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Reshape.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -126,7 +126,7 @@ public class Reshape extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { val shape = new OnnxGraphMapper().getShape(node); this.shape = shape; } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/SequenceMask.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/SequenceMask.java index 5faa82609..a2f6bd208 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/SequenceMask.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/SequenceMask.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.NoArgsConstructor; import lombok.val; -import onnx.OnnxMlProto3; +import onnx.OnnxMl; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Shape.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Shape.java index a1133ee82..6cd2eec06 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Shape.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Shape.java @@ -17,7 +17,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.autodiff.samediff.serde.FlatBuffersMapper; @@ -87,7 +87,7 @@ public class Shape extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { throw new NoOpNameFoundException("No onnx name found for shape " + opName()); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/ShapeN.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/ShapeN.java index 241cc950f..55d9dd806 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/ShapeN.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/ShapeN.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.shape; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Size.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Size.java index 1ba9156bc..71b52a92a 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Size.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Size.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.shape; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Stack.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Stack.java index 44cb0539c..6cd09f9bd 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Stack.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Stack.java @@ -17,7 +17,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -93,7 +93,7 @@ public class Stack extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { throw new UnsupportedOperationException("No analog found for onnx for " + opName()); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Transpose.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Transpose.java index 965d071c3..2de0a29c5 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Transpose.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Transpose.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import com.google.common.primitives.Ints; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.autodiff.samediff.VariableType; @@ -156,7 +156,7 @@ public class Transpose extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { if (!attributesForNode.containsKey("perm")) { } else diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Unstack.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Unstack.java index 3d7e07a72..9dd6b6338 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Unstack.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/Unstack.java @@ -17,7 +17,7 @@ package org.nd4j.linalg.api.ops.impl.shape; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -127,7 +127,7 @@ public class Unstack extends DynamicCustomOp { @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { throw new UnsupportedOperationException("No analog found for onnx for " + opName()); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/bp/ConcatBp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/bp/ConcatBp.java index ead0f2747..70bc1b087 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/bp/ConcatBp.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/bp/ConcatBp.java @@ -18,7 +18,7 @@ package org.nd4j.linalg.api.ops.impl.shape.bp; import lombok.extern.slf4j.Slf4j; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -71,7 +71,7 @@ public class ConcatBp extends DynamicCustomOp { @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { //No op } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayConcat.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayConcat.java index 07bdab586..7759f96dd 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayConcat.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayConcat.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.shape.tensorops; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.imports.NoOpNameFoundException; @@ -59,7 +59,7 @@ public class TensorArrayConcat extends BaseTensorOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { throw new UnsupportedOperationException(); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayGather.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayGather.java index 3ab0d91c9..9e7669725 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayGather.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayGather.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.shape.tensorops; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.imports.NoOpNameFoundException; @@ -59,7 +59,7 @@ public class TensorArrayGather extends BaseTensorOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { throw new UnsupportedOperationException(); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayRead.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayRead.java index 619216813..6d8cff91c 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayRead.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayRead.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.shape.tensorops; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -54,7 +54,7 @@ public class TensorArrayRead extends BaseTensorOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } @Override diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayScatter.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayScatter.java index add288d89..9e1d93e2f 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayScatter.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArrayScatter.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.shape.tensorops; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.buffer.DataType; @@ -52,7 +52,7 @@ public class TensorArrayScatter extends BaseTensorOp { @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } @Override diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArraySize.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArraySize.java index 9734515d3..276dadcab 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArraySize.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArraySize.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.shape.tensorops; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.imports.descriptors.properties.PropertyMapping; import org.nd4j.linalg.api.buffer.DataType; @@ -58,7 +58,7 @@ public class TensorArraySize extends BaseTensorOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } @Override diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArraySplit.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArraySplit.java index fb52c78a7..589805641 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArraySplit.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/shape/tensorops/TensorArraySplit.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.shape.tensorops; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.buffer.DataType; @@ -52,7 +52,7 @@ public class TensorArraySplit extends BaseTensorOp { @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { } @Override diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/clip/ClipByNorm.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/clip/ClipByNorm.java index 1cda0257d..59b7ec2f5 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/clip/ClipByNorm.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/clip/ClipByNorm.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.transforms.clip; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -64,7 +64,7 @@ public class ClipByNorm extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { throw new UnsupportedOperationException("Not yet implemented"); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/clip/ClipByValue.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/clip/ClipByValue.java index d25b0df62..11d3e9004 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/clip/ClipByValue.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/clip/ClipByValue.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.transforms.clip; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -77,7 +77,7 @@ public class ClipByValue extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { throw new UnsupportedOperationException("Not yet implemented"); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/Assign.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/Assign.java index 7ca0b342a..35c209870 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/Assign.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/Assign.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.transforms.custom; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -62,7 +62,7 @@ public class Assign extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/CumProd.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/CumProd.java index d1d0176ef..9c04aeb12 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/CumProd.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/CumProd.java @@ -17,7 +17,7 @@ package org.nd4j.linalg.api.ops.impl.transforms.custom; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -132,7 +132,7 @@ public class CumProd extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/CumSum.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/CumSum.java index 2b62b73cf..b8c7d5c51 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/CumSum.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/CumSum.java @@ -17,7 +17,7 @@ package org.nd4j.linalg.api.ops.impl.transforms.custom; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -133,7 +133,7 @@ public class CumSum extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/Fill.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/Fill.java index db95ee728..af4097870 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/Fill.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/Fill.java @@ -17,7 +17,7 @@ package org.nd4j.linalg.api.ops.impl.transforms.custom; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -80,7 +80,7 @@ public class Fill extends DynamicCustomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/strict/RectifiedTanh.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/strict/RectifiedTanh.java index 4bd56ea4d..da439cec7 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/strict/RectifiedTanh.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/strict/RectifiedTanh.java @@ -16,7 +16,7 @@ package org.nd4j.linalg.api.ops.impl.transforms.strict; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.functions.DifferentialFunction; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; @@ -81,7 +81,7 @@ public class RectifiedTanh extends BaseTransformStrictOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java index bb4b86f12..6b174ae63 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/DropOutInverted.java @@ -17,7 +17,7 @@ package org.nd4j.linalg.api.ops.random.impl; import lombok.NonNull; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.linalg.api.ndarray.INDArray; @@ -75,7 +75,7 @@ public class DropOutInverted extends BaseRandomOp { } @Override - public void initFromOnnx(OnnxProto3.NodeProto node, SameDiff initWith, Map attributesForNode, OnnxProto3.GraphProto graph) { + public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) { super.initFromOnnx(node, initWith, attributesForNode, graph); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/Range.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/Range.java index 27e9d9f3c..c3670b52f 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/Range.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/impl/Range.java @@ -17,7 +17,7 @@ package org.nd4j.linalg.api.ops.random.impl; import lombok.val; -import onnx.OnnxProto3; +import onnx.Onnx; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx-ml.proto3 b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx-ml.proto similarity index 100% rename from nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx-ml.proto3 rename to nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx-ml.proto diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx-operators.proto3 b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx-operators.proto similarity index 99% rename from nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx-operators.proto3 rename to nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx-operators.proto index a8db3ca23..48890a516 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx-operators.proto3 +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx-operators.proto @@ -9,7 +9,7 @@ syntax = "proto3"; package onnx; -import "onnx.proto3"; +import "onnx.proto"; // // This file contains the proto definitions for OperatorSetProto and diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx.proto3 b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx.proto similarity index 100% rename from nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx.proto3 rename to nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/protobuf/onnx/onnx.proto diff --git a/nd4j/nd4j-backends/nd4j-tests-tensorflow/src/test/cpujava/org/nd4j/tensorflow/conversion/GraphRunnerTest.java b/nd4j/nd4j-backends/nd4j-tests-tensorflow/src/test/cpujava/org/nd4j/tensorflow/conversion/GraphRunnerTest.java index af1af3a75..ee188605d 100644 --- a/nd4j/nd4j-backends/nd4j-tests-tensorflow/src/test/cpujava/org/nd4j/tensorflow/conversion/GraphRunnerTest.java +++ b/nd4j/nd4j-backends/nd4j-tests-tensorflow/src/test/cpujava/org/nd4j/tensorflow/conversion/GraphRunnerTest.java @@ -16,7 +16,7 @@ package org.nd4j.tensorflow.conversion; -import com.github.os72.protobuf351.util.JsonFormat; +import org.nd4j.shade.protobuf.util.JsonFormat; import org.apache.commons.io.IOUtils; import org.junit.Ignore; import org.junit.Rule; diff --git a/nd4j/nd4j-backends/nd4j-tests-tensorflow/src/test/gpujava/org/nd4j/tensorflow/conversion/GpuGraphRunnerTest.java b/nd4j/nd4j-backends/nd4j-tests-tensorflow/src/test/gpujava/org/nd4j/tensorflow/conversion/GpuGraphRunnerTest.java index accde5b1b..1ecc0e39a 100644 --- a/nd4j/nd4j-backends/nd4j-tests-tensorflow/src/test/gpujava/org/nd4j/tensorflow/conversion/GpuGraphRunnerTest.java +++ b/nd4j/nd4j-backends/nd4j-tests-tensorflow/src/test/gpujava/org/nd4j/tensorflow/conversion/GpuGraphRunnerTest.java @@ -16,7 +16,7 @@ package org.nd4j.tensorflow.conversion; -import com.github.os72.protobuf351.util.JsonFormat; +import org.nd4j.shade.protobuf.util.JsonFormat; import org.apache.commons.io.IOUtils; import org.junit.Ignore; import org.junit.Test; diff --git a/nd4j/nd4j-shade/pom.xml b/nd4j/nd4j-shade/pom.xml index 4a2c4ca1b..36b58087b 100644 --- a/nd4j/nd4j-shade/pom.xml +++ b/nd4j/nd4j-shade/pom.xml @@ -29,6 +29,7 @@ pom jackson + protobuf diff --git a/nd4j/nd4j-shade/protobuf/pom.xml b/nd4j/nd4j-shade/protobuf/pom.xml new file mode 100644 index 000000000..1cbd7d5a8 --- /dev/null +++ b/nd4j/nd4j-shade/protobuf/pom.xml @@ -0,0 +1,228 @@ + + + + nd4j-shade + org.nd4j + 1.0.0-SNAPSHOT + + 4.0.0 + + protobuf + + + true + + + + + com.google.protobuf + protobuf-java + 3.8.0 + + + com.google.protobuf + protobuf-java-util + 3.8.0 + + + + + + + custom-lifecycle + + + !skip.custom.lifecycle + + + + + + org.apache.portals.jetspeed-2 + jetspeed-mvn-maven-plugin + 2.3.1 + + + compile-and-pack + compile + + mvn + + + + + + org.apache.maven.shared + maven-invoker + 2.2 + + + + + + + create-shaded-jars + @rootdir@/nd4j/nd4j-shade/protobuf/ + clean,compile,package + + true + + + + + create-shaded-jars + + + + + + + + + + + + + + com.lewisd + lint-maven-plugin + 0.0.11 + + + pom-lint + none + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + ${maven-shade-plugin.version} + + + package + + shade + + + + + reference.conf + + + + + + + + + + + + false + true + true + + + + com.google.protobuf:* + com.google.protobuf.*:* + + + + + + + com.google.protobuf + org.nd4j.shade.protobuf + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + true + + + + empty-javadoc-jar + package + + jar + + + javadoc + ${basedir}/javadoc + + + + empty-sources-jar + package + + jar + + + sources + ${basedir}/src + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.0.0 + + + unpack + package + + unpack + + + + + org.nd4j + protobuf + ${project.version} + jar + false + ${project.build.directory}/classes/ + **/*.class,**/*.xml + + + + + + + + + + \ No newline at end of file diff --git a/nd4j/nd4j-tensorflow/src/main/java/org/nd4j/tensorflow/conversion/TensorflowConversion.java b/nd4j/nd4j-tensorflow/src/main/java/org/nd4j/tensorflow/conversion/TensorflowConversion.java index b47cd30d1..6eff18ecc 100644 --- a/nd4j/nd4j-tensorflow/src/main/java/org/nd4j/tensorflow/conversion/TensorflowConversion.java +++ b/nd4j/nd4j-tensorflow/src/main/java/org/nd4j/tensorflow/conversion/TensorflowConversion.java @@ -16,7 +16,7 @@ package org.nd4j.tensorflow.conversion; -import com.github.os72.protobuf351.InvalidProtocolBufferException; +import org.nd4j.shade.protobuf.InvalidProtocolBufferException; import org.bytedeco.javacpp.*; import org.bytedeco.javacpp.indexer.*; import org.nd4j.linalg.api.buffer.DataBuffer; diff --git a/nd4j/nd4j-tensorflow/src/main/java/org/nd4j/tensorflow/conversion/graphrunner/GraphRunner.java b/nd4j/nd4j-tensorflow/src/main/java/org/nd4j/tensorflow/conversion/graphrunner/GraphRunner.java index 633535197..79d45f781 100644 --- a/nd4j/nd4j-tensorflow/src/main/java/org/nd4j/tensorflow/conversion/graphrunner/GraphRunner.java +++ b/nd4j/nd4j-tensorflow/src/main/java/org/nd4j/tensorflow/conversion/graphrunner/GraphRunner.java @@ -16,9 +16,9 @@ package org.nd4j.tensorflow.conversion.graphrunner; -import com.github.os72.protobuf351.ByteString; -import com.github.os72.protobuf351.InvalidProtocolBufferException; -import com.github.os72.protobuf351.util.JsonFormat; +import org.nd4j.shade.protobuf.ByteString; +import org.nd4j.shade.protobuf.InvalidProtocolBufferException; +import org.nd4j.shade.protobuf.util.JsonFormat; import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; @@ -638,7 +638,7 @@ public class GraphRunner implements Closeable { /** * Convert a json string written out - * by {@link com.github.os72.protobuf351.util.JsonFormat} + * by {@link org.nd4j.shade.protobuf.util.JsonFormat} * to a {@link org.bytedeco.tensorflow.ConfigProto} * @param json the json to read * @return the config proto to use From b091e972ef48affb5efd5337963f68f4788db592 Mon Sep 17 00:00:00 2001 From: raver119 Date: Sat, 24 Aug 2019 14:16:34 +0300 Subject: [PATCH 04/56] - string NDArray flat serde impl + tests (#163) - string NDArray equalsTo impl Signed-off-by: raver119 --- libnd4j/blas/NDArray.hpp | 76 +++++++++---- libnd4j/blas/cpu/GraphExecutioner.cpp | 11 +- libnd4j/include/graph/FlatUtils.h | 2 + libnd4j/include/graph/impl/FlatUtils.cpp | 11 ++ .../tests_cpu/layers_tests/FlatUtilsTests.cpp | 100 ++++++++++++++++++ .../tests_cpu/layers_tests/StringTests.cpp | 3 +- 6 files changed, 172 insertions(+), 31 deletions(-) create mode 100644 libnd4j/tests_cpu/layers_tests/FlatUtilsTests.cpp diff --git a/libnd4j/blas/NDArray.hpp b/libnd4j/blas/NDArray.hpp index 1404afc96..fdbcae49f 100644 --- a/libnd4j/blas/NDArray.hpp +++ b/libnd4j/blas/NDArray.hpp @@ -476,19 +476,36 @@ std::vector NDArray::getShapeInfoAsVector() { //////////////////////////////////////////////////////////////////////// std::vector NDArray::asByteVector() { - std::vector result((unsigned long long) this->lengthOf() * sizeOfT()); - if (this->isView()) { - auto tmp = this->dup(this->ordering()); - memcpy(result.data(), tmp->getBuffer(), (unsigned long long) lengthOf() * sizeOfT()); + if (isS()) { + // string data type requires special treatment + syncToHost(); + auto numWords = this->lengthOf(); + auto offsetsBuffer = this->bufferAsT(); + auto headerLength = ShapeUtils::stringBufferHeaderRequirements(numWords); + auto dataLength = offsetsBuffer[numWords]; + std::vector result(headerLength + dataLength); - delete tmp; + memcpy(result.data(), getBuffer(), headerLength + dataLength); + + return result; + } else { + // all other types are linear + std::vector result((unsigned long long) this->lengthOf() * sizeOfT()); + + if (this->isView()) { + auto tmp = this->dup(this->ordering()); + syncToHost(); + memcpy(result.data(), tmp->getBuffer(), (unsigned long long) lengthOf() * sizeOfT()); + + delete tmp; + } else { + syncToHost(); + memcpy(result.data(), getBuffer(), (unsigned long long) lengthOf() * sizeOfT()); + } + return result; } - else { - memcpy(result.data(), getBuffer(), (unsigned long long) lengthOf() * sizeOfT()); - } - return result; } ////////////////////////////////////////////////////////////////////////// @@ -1584,9 +1601,7 @@ std::string* NDArray::bufferAsT() const { ////////////////////////////////////////////////////////////////////////// template T* NDArray::bufferAsT() const { - if (isS()) - throw std::runtime_error("You can't use this method on String array"); - + // FIXME: do we REALLY want sync here? syncToHost(); return reinterpret_cast(getBuffer()); @@ -3202,20 +3217,39 @@ bool NDArray::equalsTo(const NDArray *other, double eps) const { } else if (!shape::equalsSoft(getShapeInfo(), other->getShapeInfo())) return false; - NDArray tmp(nd4j::DataType::FLOAT32, getContext()); // scalar = 0 + if (isS()) { + // string is special case, we'll compare them one by one, considering both arrays are guaranteed to have the same length + for (int e = 0; e < this->lengthOf(); e++) { + auto s1 = this->e(e); + auto s2 = other->e(e); - ExtraArguments extras({eps}); + if (s1 != s2) + return false; + } - NDArray::prepareSpecialUse({&tmp}, {this, other}); - NativeOpExecutioner::execReduce3Scalar(getContext(), reduce3::EqualsWithEps, getBuffer(), getShapeInfo(), getSpecialBuffer(), getSpecialShapeInfo(), extras.argumentsAsT(DataType::FLOAT32), other->getBuffer(), other->getShapeInfo(), other->getSpecialBuffer(), other->getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), tmp.specialBuffer(), tmp.specialShapeInfo()); - NDArray::registerSpecialUse({&tmp}, {this, other}); + return true; + } else { + // regular numeric types + NDArray tmp(nd4j::DataType::FLOAT32, getContext()); // scalar = 0 - synchronize("NDArray::equalsTo"); + ExtraArguments extras({eps}); - if (tmp.e(0) > 0) - return false; + NDArray::prepareSpecialUse({&tmp}, {this, other}); + NativeOpExecutioner::execReduce3Scalar(getContext(), reduce3::EqualsWithEps, getBuffer(), getShapeInfo(), + getSpecialBuffer(), getSpecialShapeInfo(), + extras.argumentsAsT(DataType::FLOAT32), other->getBuffer(), + other->getShapeInfo(), other->getSpecialBuffer(), + other->getSpecialShapeInfo(), tmp.buffer(), tmp.shapeInfo(), + tmp.specialBuffer(), tmp.specialShapeInfo()); + NDArray::registerSpecialUse({&tmp}, {this, other}); - return true; + synchronize("NDArray::equalsTo"); + + if (tmp.e(0) > 0) + return false; + + return true; + } } ////////////////////////////////////////////////////////////////////////// diff --git a/libnd4j/blas/cpu/GraphExecutioner.cpp b/libnd4j/blas/cpu/GraphExecutioner.cpp index b5e7d9bf2..6f97bc024 100644 --- a/libnd4j/blas/cpu/GraphExecutioner.cpp +++ b/libnd4j/blas/cpu/GraphExecutioner.cpp @@ -54,6 +54,7 @@ #include #include #include +#include namespace nd4j{ namespace graph { @@ -575,15 +576,9 @@ Nd4jStatus GraphExecutioner::execute(Graph *graph, VariableSpace* variableSpace) continue; - NDArray* array = var->getNDArray(); - auto byteVector = array->asByteVector(); + auto array = var->getNDArray(); - auto fBuffer = builder.CreateVector(byteVector); - auto fShape = builder.CreateVector(array->getShapeInfoAsFlatVector()); - - auto bo = static_cast(BitwiseUtils::asByteOrder()); - - auto fArray = CreateFlatArray(builder, fShape, fBuffer, static_cast(array->dataType()), bo); + auto fArray = FlatUtils::toFlatArray(builder, *array); auto fName = builder.CreateString(*(var->getName())); auto id = CreateIntPair(builder, var->id(), var->index()); diff --git a/libnd4j/include/graph/FlatUtils.h b/libnd4j/include/graph/FlatUtils.h index abfff5915..939db1fb7 100644 --- a/libnd4j/include/graph/FlatUtils.h +++ b/libnd4j/include/graph/FlatUtils.h @@ -36,6 +36,8 @@ namespace nd4j { static std::pair fromLongPair(LongPair* pair); static NDArray* fromFlatArray(const nd4j::graph::FlatArray* flatArray); + + static flatbuffers::Offset toFlatArray(flatbuffers::FlatBufferBuilder &builder, NDArray &array); }; } } diff --git a/libnd4j/include/graph/impl/FlatUtils.cpp b/libnd4j/include/graph/impl/FlatUtils.cpp index ad0c5112d..bc8ff7e33 100644 --- a/libnd4j/include/graph/impl/FlatUtils.cpp +++ b/libnd4j/include/graph/impl/FlatUtils.cpp @@ -102,5 +102,16 @@ namespace nd4j { delete[] newShape; return array; } + + flatbuffers::Offset FlatUtils::toFlatArray(flatbuffers::FlatBufferBuilder &builder, NDArray &array) { + auto byteVector = array.asByteVector(); + + auto fBuffer = builder.CreateVector(byteVector); + auto fShape = builder.CreateVector(array.getShapeInfoAsFlatVector()); + + auto bo = static_cast(BitwiseUtils::asByteOrder()); + + return CreateFlatArray(builder, fShape, fBuffer, static_cast(array.dataType()), bo); + } } } \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/FlatUtilsTests.cpp b/libnd4j/tests_cpu/layers_tests/FlatUtilsTests.cpp new file mode 100644 index 000000000..bf428b833 --- /dev/null +++ b/libnd4j/tests_cpu/layers_tests/FlatUtilsTests.cpp @@ -0,0 +1,100 @@ +/******************************************************************************* + * Copyright (c) 2015-2019 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +#include +#include +#include "testlayers.h" +#include +#include + +using namespace nd4j; + +class FlatUtilsTests : public testing::Test { +public: + +}; + +TEST_F(FlatUtilsTests, flat_float_serde_1) { + auto array = NDArrayFactory::create('c', {4}, {1.f, 2.f, 3.f, 4.f}); + + flatbuffers::FlatBufferBuilder builder(1024); + auto flatArray = FlatUtils::toFlatArray(builder, array); + builder.Finish(flatArray); + + + auto pfArray = GetFlatArray(builder.GetBufferPointer()); + + auto restored = FlatUtils::fromFlatArray(pfArray); + + ASSERT_EQ(array, *restored); + + delete restored; +} + +TEST_F(FlatUtilsTests, flat_int_serde_1) { + auto array = NDArrayFactory::create('c', {4}, {1, 2, 3, 4}); + + flatbuffers::FlatBufferBuilder builder(1024); + auto flatArray = FlatUtils::toFlatArray(builder, array); + builder.Finish(flatArray); + + + auto pfArray = GetFlatArray(builder.GetBufferPointer()); + + auto restored = FlatUtils::fromFlatArray(pfArray); + + ASSERT_EQ(array, *restored); + + delete restored; +} + +TEST_F(FlatUtilsTests, flat_bool_serde_1) { + auto array = NDArrayFactory::create('c', {4}, {true, false, true, false}); + + flatbuffers::FlatBufferBuilder builder(1024); + auto flatArray = FlatUtils::toFlatArray(builder, array); + builder.Finish(flatArray); + + + auto pfArray = GetFlatArray(builder.GetBufferPointer()); + + auto restored = FlatUtils::fromFlatArray(pfArray); + + ASSERT_EQ(array, *restored); + + delete restored; +} + +TEST_F(FlatUtilsTests, flat_string_serde_1) { + auto array = NDArrayFactory::string('c', {3}, {"alpha", "beta", "gamma"}); + + flatbuffers::FlatBufferBuilder builder(1024); + auto flatArray = FlatUtils::toFlatArray(builder, array); + builder.Finish(flatArray); + + + auto pfArray = GetFlatArray(builder.GetBufferPointer()); + + auto restored = FlatUtils::fromFlatArray(pfArray); + + ASSERT_EQ(array, *restored); + + delete restored; +} \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/StringTests.cpp b/libnd4j/tests_cpu/layers_tests/StringTests.cpp index a023dcdd3..2ae236210 100644 --- a/libnd4j/tests_cpu/layers_tests/StringTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/StringTests.cpp @@ -24,7 +24,6 @@ #include "testlayers.h" #include -using namespace nd4j; using namespace nd4j; class StringTests : public testing::Test { @@ -91,4 +90,4 @@ TEST_F(StringTests, Basic_dup_1) { ASSERT_EQ(f, z1); delete dup; -} +} \ No newline at end of file From 841eeb56c5d8d78d3df6804024393a8022e5f09d Mon Sep 17 00:00:00 2001 From: raver119 Date: Sat, 24 Aug 2019 16:18:38 +0300 Subject: [PATCH 05/56] get rid of context variable Signed-off-by: raver119 --- .../ops/declarable/helpers/cuda/lup.cu | 34 ++++++------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu index bf9c73e7c..97d47079b 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu @@ -31,8 +31,6 @@ namespace nd4j { namespace ops { namespace helpers { - nd4j::LaunchContext* defaultContext = nd4j::LaunchContext::defaultContext(); - // template // static __device__ void swapRows_(T* matrix, Nd4jLong* shape, int theFirst, int theSecond, Nd4jLong N) { // if (theFirst != theSecond) { @@ -204,7 +202,7 @@ namespace helpers { if (inputMatrix->isIdentityMatrix()) return; - auto stream = defaultContext->getCudaStream(); + auto stream = LaunchContext::defaultContext()->getCudaStream(); // invert main diagonal upvertKernel << < 1, n, 512, *stream >> > @@ -227,7 +225,7 @@ namespace helpers { static void invertUpperMatrix_(NDArray* inputMatrix, NDArray* invertedMatrix) { int n = inputMatrix->rows(); invertedMatrix->setIdentity(); - auto stream = defaultContext->getCudaStream(); + auto stream = LaunchContext::defaultContext()->getCudaStream(); if (inputMatrix->isIdentityMatrix()) { // the inverse for I is I return; } @@ -392,7 +390,6 @@ namespace helpers { auto n = input->rows(); cusolverDnHandle_t cusolverH = nullptr; cusolverStatus_t status = cusolverDnCreate(&cusolverH); - defaultContext = context; if (CUSOLVER_STATUS_SUCCESS != status) { throw cuda_exception::build("Cannot create cuSolver handle", status); } @@ -543,9 +540,8 @@ namespace helpers { // DataType dtype = input->dataType(); // if (dtype != DataType::DOUBLE) // dtype = DataType::FLOAT32; - defaultContext = context; auto matrix = NDArrayFactory::create(input->ordering(), {n, n}, DataTypeUtils::fromT(), - defaultContext); //, block.getWorkspace()); + LaunchContext::defaultContext()); //, block.getWorkspace()); auto det = NDArrayFactory::create(1); auto stream = context->getCudaStream(); NDArray::prepareSpecialUse({output}, {input}); @@ -578,7 +574,6 @@ namespace helpers { } int determinant(nd4j::LaunchContext *context, NDArray *input, NDArray *output) { - defaultContext = context; NDArray::prepareSpecialUse({output}, {input}); BUILD_SINGLE_SELECTOR(input->dataType(), return determinant_, (context, input, output), FLOAT_NATIVE); NDArray::registerSpecialUse({output}, {input}); @@ -586,7 +581,6 @@ namespace helpers { template int logAbsDeterminant_(LaunchContext *context, NDArray *input, NDArray *output) { - defaultContext = context; Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; std::vector dims(); @@ -598,7 +592,7 @@ namespace helpers { dtype = DataType::FLOAT32; auto matrix = NDArrayFactory::create(input->ordering(), {n, n}, dtype, - defaultContext); //, block.getWorkspace()); + LaunchContext::defaultContext()); //, block.getWorkspace()); auto det = NDArrayFactory::create(1); auto stream = context->getCudaStream(); NDArray::prepareSpecialUse({output}, {input}); @@ -633,7 +627,6 @@ namespace helpers { } int logAbsDeterminant(nd4j::LaunchContext *context, NDArray *input, NDArray *output) { - defaultContext = context; NDArray::prepareSpecialUse({output}, {input}); BUILD_SINGLE_SELECTOR(input->dataType(), return logAbsDeterminant_, (context, input, output), FLOAT_NATIVE); NDArray::registerSpecialUse({output}, {input}); @@ -696,17 +689,16 @@ namespace helpers { template static int inverse_(nd4j::LaunchContext *context, NDArray *input, NDArray *output) { - defaultContext = context; auto n = input->sizeAt(-1); auto n2 = n * n; auto dtype = DataTypeUtils::fromT(); //input->dataType(); // if (dtype != DataType::DOUBLE) // dtype = DataType::FLOAT32; - NDArray matrix = NDArrayFactory::create('c', {n, n}, dtype, defaultContext); - NDArray upper = NDArrayFactory::create('c', {n, n}, dtype, defaultContext); - NDArray lower = NDArrayFactory::create('c', {n, n}, dtype, defaultContext); - NDArray compound = NDArrayFactory::create('c', {n, n}, dtype, defaultContext); - NDArray permutation = NDArrayFactory::create('c', {n, n}, dtype, defaultContext); + NDArray matrix = NDArrayFactory::create('c', {n, n}, dtype, LaunchContext::defaultContext()); + NDArray upper = NDArrayFactory::create('c', {n, n}, dtype, LaunchContext::defaultContext()); + NDArray lower = NDArrayFactory::create('c', {n, n}, dtype, LaunchContext::defaultContext()); + NDArray compound = NDArrayFactory::create('c', {n, n}, dtype, LaunchContext::defaultContext()); + NDArray permutation = NDArrayFactory::create('c', {n, n}, dtype, LaunchContext::defaultContext()); auto packX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); @@ -745,7 +737,6 @@ namespace helpers { } int inverse(nd4j::LaunchContext *context, NDArray *input, NDArray *output) { - defaultContext = context; NDArray::prepareSpecialUse({output}, {input}); BUILD_SINGLE_SELECTOR(input->dataType(), return inverse_, (context, input, output), FLOAT_NATIVE); NDArray::registerSpecialUse({output}, {input}); @@ -788,7 +779,6 @@ namespace helpers { int cholesky__(LaunchContext *context, NDArray *input, NDArray *output, bool inplace) { if (!inplace) output->assign(input); - defaultContext = context; std::unique_ptr tempOutput(output->dup()); cusolverDnHandle_t handle = nullptr; auto n = input->sizeAt(-1); @@ -868,7 +858,6 @@ namespace helpers { // template int cholesky_(LaunchContext *context, NDArray *input, NDArray *output, bool inplace) { - defaultContext = context; NDArray::prepareSpecialUse({output}, {input}); if (input->dataType() == DataType::DOUBLE) cholesky__(context, input, output, inplace); @@ -877,7 +866,7 @@ namespace helpers { else { std::unique_ptr tempOutput( NDArrayFactory::create_('c', input->getShapeAsVector(), DataType::FLOAT32, - defaultContext)); + LaunchContext::defaultContext())); tempOutput->assign(input); cholesky__(context, tempOutput.get(), tempOutput.get(), true); output->assign(tempOutput.get()); @@ -888,7 +877,6 @@ namespace helpers { int cholesky(nd4j::LaunchContext *context, NDArray *input, NDArray *output, bool inplace) { // BUILD_SINGLE_SELECTOR(input->dataType(), return cholesky_, (context, input, output, inplace), FLOAT_TYPES); - defaultContext = context; return cholesky_(context, input, output, inplace); } // BUILD_SINGLE_TEMPLATE(template int cholesky_, (LaunchContext* context, NDArray* input, NDArray* output, bool inplace), FLOAT_TYPES); @@ -927,7 +915,6 @@ namespace helpers { template int logdetFunctor_(nd4j::LaunchContext *context, NDArray *input, NDArray *output) { - defaultContext = context; NDArray::prepareSpecialUse({output}, {input}); auto n2 = input->sizeAt(-1) * input->sizeAt(-2); auto stream = context->getCudaStream(); @@ -957,7 +944,6 @@ namespace helpers { } int logdetFunctor(nd4j::LaunchContext *context, NDArray *input, NDArray *output) { - defaultContext = context; BUILD_SINGLE_SELECTOR(output->dataType(), logdetFunctor_, (context, input, output), FLOAT_NATIVE); } From ece6a17b1197d8ee57487b57253c92906b1bf2f5 Mon Sep 17 00:00:00 2001 From: raver119 Date: Sat, 24 Aug 2019 16:57:48 +0300 Subject: [PATCH 06/56] lup context fix (#164) Signed-off-by: raver119 --- .../ops/declarable/helpers/cpu/lup.cpp | 61 ++++++-------- .../ops/declarable/helpers/cuda/lup.cu | 81 +++++++------------ 2 files changed, 57 insertions(+), 85 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp index ee9a78cee..1e3c798e2 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/lup.cpp @@ -26,7 +26,6 @@ namespace nd4j { namespace ops { namespace helpers { - nd4j::LaunchContext* defaultContext = nd4j::LaunchContext::defaultContext(); template static void swapRows_(NDArray* matrix, int theFirst, int theSecond) { @@ -108,14 +107,14 @@ namespace helpers { template - static NDArray lup_(NDArray* input, NDArray* compound, NDArray* permutation) { + static NDArray lup_(LaunchContext *context, NDArray* input, NDArray* compound, NDArray* permutation) { const int rowNum = input->rows(); const int columnNum = input->columns(); NDArray determinant = NDArrayFactory::create(1.f); NDArray compoundMatrix = *input; // copy - NDArray permutationMatrix(input, false, defaultContext); // has same shape as input and contiguous strides + NDArray permutationMatrix(input, false, context); // has same shape as input and contiguous strides permutationMatrix.setIdentity(); T pivotValue; // = T(0.0); @@ -161,46 +160,43 @@ namespace helpers { return determinant; } - BUILD_SINGLE_TEMPLATE(template NDArray lup_, (NDArray* input, NDArray* output, NDArray* permutation), FLOAT_TYPES); + BUILD_SINGLE_TEMPLATE(template NDArray lup_, (LaunchContext *context, NDArray* input, NDArray* output, NDArray* permutation), FLOAT_TYPES); template - static int determinant_(NDArray* input, NDArray* output) { + static int determinant_(LaunchContext *context, NDArray* input, NDArray* output) { Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; - auto matrix = NDArrayFactory::create(input->ordering(), {n, n}, input->dataType(), defaultContext); //, block.getWorkspace()); + auto matrix = NDArrayFactory::create(input->ordering(), {n, n}, input->dataType(), context); //, block.getWorkspace()); for (int e = 0; e < output->lengthOf(); e++) { for (int k = e * n2, row = 0; k < (e + 1) * n2; ++k, ++row) matrix.p(row, input->e(k)); - output->p(e, lup_(&matrix, (NDArray*)nullptr, (NDArray*)nullptr)); + output->p(e, lup_(context, &matrix, (NDArray*)nullptr, (NDArray*)nullptr)); } return Status::OK(); } - BUILD_SINGLE_TEMPLATE(template int determinant_, (NDArray* input, NDArray* output), FLOAT_TYPES); - int determinant(nd4j::LaunchContext * context, NDArray* input, NDArray* output) { - defaultContext = context; - BUILD_SINGLE_SELECTOR(input->dataType(), return determinant_, (input, output), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), return determinant_, (context, input, output), FLOAT_TYPES); } template - int logAbsDeterminant_(NDArray* input, NDArray* output) { + int logAbsDeterminant_(LaunchContext *context, NDArray* input, NDArray* output) { Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; - NDArray matrix = NDArrayFactory::create(input->ordering(), {n, n}, input->dataType(), defaultContext); //, block.getWorkspace()); + NDArray matrix = NDArrayFactory::create(input->ordering(), {n, n}, input->dataType(), context); //, block.getWorkspace()); for (int e = 0; e < output->lengthOf(); e++) { for (int k = e * n2, row = 0; k < (e + 1) * n2; ++k, ++row) { matrix.p(row, input->e(k)); } - NDArray det = lup_(&matrix, (NDArray*)nullptr, (NDArray*)nullptr); + NDArray det = lup_(context, &matrix, (NDArray*)nullptr, (NDArray*)nullptr); if (det.e(0) != 0.f) output->p(e, nd4j::math::nd4j_log(nd4j::math::nd4j_abs(det.t(0)))); } @@ -208,25 +204,23 @@ template return ND4J_STATUS_OK; } - BUILD_SINGLE_TEMPLATE(template int logAbsDeterminant_, (NDArray* input, NDArray* output), FLOAT_TYPES); - int logAbsDeterminant(nd4j::LaunchContext * context, NDArray* input, NDArray* output) { - BUILD_SINGLE_SELECTOR(input->dataType(), return logAbsDeterminant_, (input, output), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), return logAbsDeterminant_, (context, input, output), FLOAT_TYPES); } template - static int inverse_(NDArray* input, NDArray* output) { + static int inverse_(LaunchContext *context, NDArray* input, NDArray* output) { auto n = input->sizeAt(-1); auto n2 = n * n; auto totalCount = output->lengthOf() / n2; output->assign(0.f); // fill up output tensor with zeros - auto matrix = NDArrayFactory::create('c', {n, n}, DataTypeUtils::fromT(), defaultContext); //, block.getWorkspace()); - auto compound = NDArrayFactory::create('c', {n, n}, DataTypeUtils::fromT(), defaultContext); //, block.getWorkspace()); - auto permutation = NDArrayFactory::create('c', {n, n}, DataTypeUtils::fromT(), defaultContext); - auto lowerMatrix = NDArrayFactory::create('c', {n, n}, DataTypeUtils::fromT(), defaultContext); - auto upperMatrix = NDArrayFactory::create('c', {n, n}, DataTypeUtils::fromT(), defaultContext); + auto matrix = NDArrayFactory::create('c', {n, n}, DataTypeUtils::fromT(), context); //, block.getWorkspace()); + auto compound = NDArrayFactory::create('c', {n, n}, DataTypeUtils::fromT(), context); //, block.getWorkspace()); + auto permutation = NDArrayFactory::create('c', {n, n}, DataTypeUtils::fromT(), context); + auto lowerMatrix = NDArrayFactory::create('c', {n, n}, DataTypeUtils::fromT(), context); + auto upperMatrix = NDArrayFactory::create('c', {n, n}, DataTypeUtils::fromT(), context); for (int e = 0; e < totalCount; e++) { if (e) @@ -235,7 +229,7 @@ template for (int k = e * n2, row = 0; k < (e + 1) * n2; k++) { matrix.p(row++, input->e(k)); } - T det = lup_(&matrix, &compound, &permutation).template e(0); + T det = lup_(context, &matrix, &compound, &permutation).template e(0); // FIXME: and how this is going to work on float16? if (nd4j::math::nd4j_abs(det) < T(0.000001)) { @@ -268,8 +262,7 @@ template } int inverse(nd4j::LaunchContext * context, NDArray* input, NDArray* output) { - defaultContext = context; - BUILD_SINGLE_SELECTOR(input->dataType(), return inverse_, (input, output), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), return inverse_, (context, input, output), FLOAT_TYPES); } template @@ -296,14 +289,13 @@ template return true; } - BUILD_SINGLE_TEMPLATE(template bool checkCholeskyInput_, (nd4j::LaunchContext * context, NDArray const* input), FLOAT_TYPES); bool checkCholeskyInput(nd4j::LaunchContext * context, NDArray const* input) { BUILD_SINGLE_SELECTOR(input->dataType(), return checkCholeskyInput_, (context, input), FLOAT_TYPES); } template - int cholesky_(NDArray* input, NDArray* output, bool inplace) { + int cholesky_(LaunchContext *context, NDArray* input, NDArray* output, bool inplace) { auto n = input->sizeAt(-1); auto n2 = n * n; @@ -311,8 +303,8 @@ template if (!inplace) output->assign(0.f); // fill up output tensor with zeros only inplace=false - std::unique_ptr matrix(NDArrayFactory::create_('c', {n, n}, input->dataType(), defaultContext)); //, block.getWorkspace()); - std::unique_ptr lowerMatrix(NDArrayFactory::create_('c',{n, n}, input->dataType(), defaultContext)); + std::unique_ptr matrix(NDArrayFactory::create_('c', {n, n}, input->dataType(), context)); //, block.getWorkspace()); + std::unique_ptr lowerMatrix(NDArrayFactory::create_('c',{n, n}, input->dataType(), context)); for (int e = 0; e < totalCount; e++) { @@ -346,14 +338,13 @@ template } int cholesky(nd4j::LaunchContext * context, NDArray* input, NDArray* output, bool inplace) { - defaultContext = context; - BUILD_SINGLE_SELECTOR(input->dataType(), return cholesky_, (input, output, inplace), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), return cholesky_, (context, input, output, inplace), FLOAT_TYPES); } template - int logdetFunctor_(NDArray* input, NDArray* output) { + int logdetFunctor_(LaunchContext *context, NDArray* input, NDArray* output) { std::unique_ptr tempOutput(input->dup()); - int res = cholesky_(input, tempOutput.get(), false); + int res = cholesky_(context, input, tempOutput.get(), false); if (res != ND4J_STATUS_OK) return res; auto n = input->sizeAt(-1); @@ -372,7 +363,7 @@ template } int logdetFunctor(nd4j::LaunchContext * context, NDArray* input, NDArray* output) { - BUILD_SINGLE_SELECTOR(input->dataType(), return logdetFunctor_, (input, output), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(input->dataType(), return logdetFunctor_, (context, input, output), FLOAT_TYPES); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu index 97d47079b..f11b56745 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/lup.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/lup.cu @@ -196,36 +196,33 @@ namespace helpers { } template - static void invertLowerMatrix_(NDArray *inputMatrix, NDArray *invertedMatrix) { + static void invertLowerMatrix_(LaunchContext *context, NDArray *inputMatrix, NDArray *invertedMatrix) { int n = inputMatrix->rows(); invertedMatrix->setIdentity(); if (inputMatrix->isIdentityMatrix()) return; - auto stream = LaunchContext::defaultContext()->getCudaStream(); + auto stream = context->getCudaStream(); // invert main diagonal - upvertKernel << < 1, n, 512, *stream >> > - (invertedMatrix->specialBuffer(), invertedMatrix->specialShapeInfo(), inputMatrix->specialBuffer(), inputMatrix->specialShapeInfo(), n); + upvertKernel<<<1, n, 512, *stream>>>(invertedMatrix->specialBuffer(), invertedMatrix->specialShapeInfo(), inputMatrix->specialBuffer(), inputMatrix->specialShapeInfo(), n); // invert the second diagonal - invertKernelLow << < 1, n, 512, *stream >> > - (invertedMatrix->specialBuffer(), invertedMatrix->specialShapeInfo(), inputMatrix->specialBuffer(), inputMatrix->specialShapeInfo(), n); + invertKernelLow<<<1, n, 512, *stream>>>(invertedMatrix->specialBuffer(), invertedMatrix->specialShapeInfo(), inputMatrix->specialBuffer(), inputMatrix->specialShapeInfo(), n); // invertKernelLow<<<1, n, 128, *stream>>>(invertedMatrix->specialBuffer(), invertedMatrix->specialShapeInfo(), inputMatrix->specialBuffer(), inputMatrix->specialShapeInfo(), n); - invertLowKernel<<< n, n, 512, *stream >> > - (invertedMatrix->specialBuffer(), invertedMatrix->specialShapeInfo(), inputMatrix->specialBuffer(), inputMatrix->specialShapeInfo(), n); + invertLowKernel<<>>(invertedMatrix->specialBuffer(), invertedMatrix->specialShapeInfo(), inputMatrix->specialBuffer(), inputMatrix->specialShapeInfo(), n); } - void invertLowerMatrix(NDArray *inputMatrix, NDArray *invertedMatrix) { + void invertLowerMatrix(LaunchContext *context, NDArray *inputMatrix, NDArray *invertedMatrix) { NDArray::prepareSpecialUse({invertedMatrix}, {inputMatrix}); - BUILD_SINGLE_SELECTOR(inputMatrix->dataType(), invertLowerMatrix_, (inputMatrix, invertedMatrix), FLOAT_NATIVE); + BUILD_SINGLE_SELECTOR(inputMatrix->dataType(), invertLowerMatrix_, (context, inputMatrix, invertedMatrix), FLOAT_NATIVE); NDArray::registerSpecialUse({invertedMatrix}, {inputMatrix}); } template - static void invertUpperMatrix_(NDArray* inputMatrix, NDArray* invertedMatrix) { + static void invertUpperMatrix_(LaunchContext *context, NDArray* inputMatrix, NDArray* invertedMatrix) { int n = inputMatrix->rows(); invertedMatrix->setIdentity(); - auto stream = LaunchContext::defaultContext()->getCudaStream(); + auto stream = context->getCudaStream(); if (inputMatrix->isIdentityMatrix()) { // the inverse for I is I return; } @@ -235,13 +232,12 @@ namespace helpers { inputMatrix->specialBuffer(), inputMatrix->specialShapeInfo(), n); invertedMatrix->tickWriteDevice(); invertedMatrix->printIndexedBuffer("Step1 UP inversion"); - invertUpKernel<<>>(invertedMatrix->specialBuffer(), invertedMatrix->specialShapeInfo(), - inputMatrix->specialBuffer(), inputMatrix->specialShapeInfo(), n); + invertUpKernel<<>>(invertedMatrix->specialBuffer(), invertedMatrix->specialShapeInfo(),inputMatrix->specialBuffer(), inputMatrix->specialShapeInfo(), n); } - void invertUpperMatrix(NDArray *inputMatrix, NDArray *invertedMatrix) { + void invertUpperMatrix(LaunchContext *context, NDArray *inputMatrix, NDArray *invertedMatrix) { NDArray::prepareSpecialUse({invertedMatrix}, {inputMatrix}); - BUILD_SINGLE_SELECTOR(invertedMatrix->dataType(), invertUpperMatrix_, (inputMatrix, invertedMatrix), FLOAT_NATIVE); + BUILD_SINGLE_SELECTOR(invertedMatrix->dataType(), invertUpperMatrix_, (context, inputMatrix, invertedMatrix), FLOAT_NATIVE); NDArray::prepareSpecialUse({invertedMatrix}, {inputMatrix}); } @@ -525,23 +521,19 @@ namespace helpers { input->tickWriteDevice(); } - BUILD_SINGLE_TEMPLATE(template void lup_, - (LaunchContext * context, NDArray * input, NDArray * output, NDArray * permutation), - FLOAT_NATIVE); + BUILD_SINGLE_TEMPLATE(template void lup_,(LaunchContext * context, NDArray * input, NDArray * output, NDArray * permutation), FLOAT_NATIVE); template static int determinant_(nd4j::LaunchContext *context, NDArray *input, NDArray *output) { Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; std::vector dims(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), - {input->rankOf() - 2, input->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); //auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 1}); // DataType dtype = input->dataType(); // if (dtype != DataType::DOUBLE) // dtype = DataType::FLOAT32; - auto matrix = NDArrayFactory::create(input->ordering(), {n, n}, DataTypeUtils::fromT(), - LaunchContext::defaultContext()); //, block.getWorkspace()); + auto matrix = NDArrayFactory::create(input->ordering(), {n, n}, DataTypeUtils::fromT(), context); //, block.getWorkspace()); auto det = NDArrayFactory::create(1); auto stream = context->getCudaStream(); NDArray::prepareSpecialUse({output}, {input}); @@ -550,8 +542,7 @@ namespace helpers { for (int e = 0; e < output->lengthOf(); e++) { Nd4jLong pos = e * n2; // if (matrix.dataType() == input->dataType()) - fillMatrix << < launchDims.x, launchDims.y, launchDims.z, *stream >> > - (matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n); + fillMatrix<<>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n); // else // fillMatrix<<>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n); @@ -584,15 +575,13 @@ namespace helpers { Nd4jLong n = input->sizeAt(-1); Nd4jLong n2 = n * n; std::vector dims(); - auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), - {input->rankOf() - 2, input->rankOf() - 1}); + auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); //auto packZ = ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), {output->rankOf() - 1}); DataType dtype = input->dataType(); if (dtype != DataType::DOUBLE) dtype = DataType::FLOAT32; - auto matrix = NDArrayFactory::create(input->ordering(), {n, n}, dtype, - LaunchContext::defaultContext()); //, block.getWorkspace()); + auto matrix = NDArrayFactory::create(input->ordering(), {n, n}, dtype, context); //, block.getWorkspace()); auto det = NDArrayFactory::create(1); auto stream = context->getCudaStream(); NDArray::prepareSpecialUse({output}, {input}); @@ -601,8 +590,7 @@ namespace helpers { for (int e = 0; e < output->lengthOf(); e++) { Nd4jLong pos = e * n2; // if (matrix.dataType() == input->dataType()) - fillMatrix << < launchDims.x, launchDims.y, launchDims.z, *stream >> > - (matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n); + fillMatrix<<>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n); // else // fillMatrix<<>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), pos, n); @@ -614,8 +602,7 @@ namespace helpers { auto inputBuf = reinterpret_cast(matrix.specialBuffer()); auto outputBuf = reinterpret_cast(output->specialBuffer()) + offset; // if (matrix.dataType() == input->dataType()) - determinantLogKernel << < launchDims.x, launchDims.y, launchDims.z, *stream >> > - (inputBuf, outputBuf, n); + determinantLogKernel<<>>(inputBuf, outputBuf, n); // else // determinantLogKernel<<>> (inputBuf, outputBuf, n); } @@ -694,11 +681,11 @@ namespace helpers { auto dtype = DataTypeUtils::fromT(); //input->dataType(); // if (dtype != DataType::DOUBLE) // dtype = DataType::FLOAT32; - NDArray matrix = NDArrayFactory::create('c', {n, n}, dtype, LaunchContext::defaultContext()); - NDArray upper = NDArrayFactory::create('c', {n, n}, dtype, LaunchContext::defaultContext()); - NDArray lower = NDArrayFactory::create('c', {n, n}, dtype, LaunchContext::defaultContext()); - NDArray compound = NDArrayFactory::create('c', {n, n}, dtype, LaunchContext::defaultContext()); - NDArray permutation = NDArrayFactory::create('c', {n, n}, dtype, LaunchContext::defaultContext()); + NDArray matrix = NDArrayFactory::create('c', {n, n}, dtype, context); + NDArray upper = NDArrayFactory::create('c', {n, n}, dtype, context); + NDArray lower = NDArrayFactory::create('c', {n, n}, dtype, context); + NDArray compound = NDArrayFactory::create('c', {n, n}, dtype, context); + NDArray permutation = NDArrayFactory::create('c', {n, n}, dtype, context); auto packX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), {input->rankOf() - 2, input->rankOf() - 1}); @@ -708,20 +695,17 @@ namespace helpers { auto stream = context->getCudaStream(); for (auto i = 0LL; i < packX.numberOfTads(); i++) { - fillMatrix << < 1, n2, 1024, *stream >> > - (matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), - i * n2, n); + fillMatrix<<<1, n2, 1024, *stream>>>(matrix.specialBuffer(), matrix.specialShapeInfo(), input->specialBuffer(), input->specialShapeInfo(), i * n2, n); matrix.tickWriteDevice(); compound.assign(matrix); lup_(context, &compound, nullptr, nullptr); - fillLowerUpperKernel << < n, n, 1024, *stream >> > - (lower.specialBuffer(), lower.specialShapeInfo(), upper.specialBuffer(), upper.specialShapeInfo(), compound.specialBuffer(), compound.specialShapeInfo(), n); + fillLowerUpperKernel<<>>(lower.specialBuffer(), lower.specialShapeInfo(), upper.specialBuffer(), upper.specialShapeInfo(), compound.specialBuffer(), compound.specialShapeInfo(), n); matrix.assign(0); - invertUpperMatrix(&upper, &matrix); // U^{-1} + invertUpperMatrix(context, &upper, &matrix); // U^{-1} matrix.tickWriteDevice(); // matrix.printIndexedBuffer("Upper Inverted"); compound.assign(0); - invertLowerMatrix(&lower, &compound); // L{-1} + invertLowerMatrix(context, &lower, &compound); // L{-1} compound.tickWriteDevice(); // compound.printIndexedBuffer("Lower Inverted"); // matrix.tickWriteDevice(); @@ -729,9 +713,7 @@ namespace helpers { nd4j::MmulHelper::mmul(&matrix, &compound, &upper, 1.0, 0.0); upper.tickWriteDevice(); // upper.printIndexedBuffer("Full inverted"); - returnMatrix << < 1, n2, 1024, *stream >> > - (output->specialBuffer(), output->specialShapeInfo(), upper.specialBuffer(), upper.specialShapeInfo(), - i * n2, n); + returnMatrix <<<1, n2, 1024, *stream>>>(output->specialBuffer(), output->specialShapeInfo(), upper.specialBuffer(), upper.specialShapeInfo(), i * n2, n); } return Status::OK(); } @@ -865,8 +847,7 @@ namespace helpers { cholesky__(context, input, output, inplace); else { std::unique_ptr tempOutput( - NDArrayFactory::create_('c', input->getShapeAsVector(), DataType::FLOAT32, - LaunchContext::defaultContext())); + NDArrayFactory::create_('c', input->getShapeAsVector(), DataType::FLOAT32, context)); tempOutput->assign(input); cholesky__(context, tempOutput.get(), tempOutput.get(), true); output->assign(tempOutput.get()); From d607bec6f928c666368b1c6e832537c0910f32d1 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 26 Aug 2019 11:45:33 +1000 Subject: [PATCH 07/56] Small test fixes (#165) Signed-off-by: AlexDBlack --- .../org/nd4j/evaluation/RegressionEvalTest.java | 4 ++-- .../java/org/nd4j/linalg/crash/SpecialTests.java | 16 ++++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/evaluation/RegressionEvalTest.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/evaluation/RegressionEvalTest.java index d182377fe..1bd6fd22c 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/evaluation/RegressionEvalTest.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/evaluation/RegressionEvalTest.java @@ -291,7 +291,7 @@ public class RegressionEvalTest extends BaseNd4jTest { for (Metric m : Metric.values()) { double d1 = e4d.scoreForMetric(m); double d2 = e2d.scoreForMetric(m); - assertEquals(m.toString(), d2, d1, 1e-6); + assertEquals(m.toString(), d2, d1, 1e-5); } } @@ -385,7 +385,7 @@ public class RegressionEvalTest extends BaseNd4jTest { for(Metric m : Metric.values()){ double d1 = e4d_m1.scoreForMetric(m); double d2 = e2d_m1.scoreForMetric(m); - assertEquals(m.toString(), d2, d1, 1e-6); + assertEquals(m.toString(), d2, d1, 1e-5); } //Check per-output masking: diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/SpecialTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/SpecialTests.java index 75a91263d..cfff870a6 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/SpecialTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/crash/SpecialTests.java @@ -551,15 +551,13 @@ public class SpecialTests extends BaseNd4jTest { int[] inputShape = new int[]{1, 2, 2, 1}; int M = 2; - int[] blockShape = new int[]{M, 1}; - int[] paddingShape = new int[]{M, 2}; INDArray input = Nd4j.randn(inputShape).castTo(DataType.DOUBLE); - INDArray blocks = Nd4j.create(new float[]{2, 2}, blockShape).castTo(DataType.INT); - INDArray padding = Nd4j.create(new float[]{0, 0, 0, 0}, paddingShape).castTo(DataType.INT); + INDArray blocks = Nd4j.createFromArray(2, 2); + INDArray padding = Nd4j.createFromArray(0, 0, 0, 0).reshape(2,2); INDArray expOut = Nd4j.create(DataType.DOUBLE, miniBatch, 1, 1, 1); - val op = DynamicCustomOp.builder("space_to_batch") + val op = DynamicCustomOp.builder("space_to_batch_nd") .addInputs(input, blocks, padding) .addOutputs(expOut).build(); Nd4j.getExecutioner().execAndReturn(op); @@ -573,15 +571,13 @@ public class SpecialTests extends BaseNd4jTest { int[] inputShape = new int[]{miniBatch, 1, 1, 1}; int M = 2; - int[] blockShape = new int[]{M, 1}; - int[] cropShape = new int[]{M, 2}; INDArray input = Nd4j.randn(inputShape).castTo(DataType.DOUBLE); - INDArray blocks = Nd4j.create(new float[]{2, 2}, blockShape).castTo(DataType.INT); - INDArray crops = Nd4j.create(new float[]{0, 0, 0, 0}, cropShape).castTo(DataType.INT); + INDArray blocks = Nd4j.createFromArray(2, 2); + INDArray crops = Nd4j.createFromArray(0, 0, 0, 0).reshape(2,2); INDArray expOut = Nd4j.create(DataType.DOUBLE, 1, 2, 2, 1); - DynamicCustomOp op = DynamicCustomOp.builder("batch_to_space") + DynamicCustomOp op = DynamicCustomOp.builder("batch_to_space_nd") .addInputs(input, blocks, crops) .addOutputs(expOut).build(); Nd4j.getExecutioner().execAndReturn(op); From daf5420f4c2b47ec0e615a04b34d11880c4a86f2 Mon Sep 17 00:00:00 2001 From: raver119 Date: Mon, 26 Aug 2019 08:13:22 +0300 Subject: [PATCH 08/56] cmake fix for windows debug build Signed-off-by: raver119 --- libnd4j/blas/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libnd4j/blas/CMakeLists.txt b/libnd4j/blas/CMakeLists.txt index 8e940bedb..add8960a3 100755 --- a/libnd4j/blas/CMakeLists.txt +++ b/libnd4j/blas/CMakeLists.txt @@ -341,7 +341,7 @@ elseif(CPU_BLAS) endif() endif() - if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT(APPLE)) + if (CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT(APPLE) AND NOT(WIN32)) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic -Wl,-export-dynamic") SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -export-dynamic") endif() From 9a513a9aa66a9ad8efb3a498640963309512213d Mon Sep 17 00:00:00 2001 From: Robert Altena Date: Mon, 26 Aug 2019 16:01:20 +0900 Subject: [PATCH 09/56] INDArray javadocs (#166) * small fix of compiler warnings in nd4j. Signed-off-by: Robert Altena * indarray javadoc start. Signed-off-by: Robert Altena --- .../nd4j/linalg/api/ndarray/BaseNDArray.java | 27 ------------------- .../linalg/api/ndarray/BaseSparseNDArray.java | 3 --- .../org/nd4j/linalg/api/ndarray/INDArray.java | 26 +++++++++--------- .../java/org/nd4j/linalg/factory/Nd4j.java | 4 +-- 4 files changed, 13 insertions(+), 47 deletions(-) diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java index 121ca2b43..c73a56c61 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java @@ -142,21 +142,11 @@ public abstract class BaseNDArray implements INDArray, Iterable { } - /** - * Returns true if this array is compressed, and false otherwise - * @return - */ @Override public boolean isCompressed() { return compressed; } - /** - * This method marks INDArray instance as compressed - * PLEASE NOTE: Do not use this method unless you 100% have to - * - * @param reallyCompressed - */ @Override public void markAsCompressed(boolean reallyCompressed) { this.compressed = reallyCompressed; @@ -949,17 +939,6 @@ public abstract class BaseNDArray implements INDArray, Iterable { @Override public int elementWiseStride() { - /* - if(Shape.elementWiseStride(shapeInfo()) < 0 && !attemptedToFindElementWiseStride) { - INDArray reshapeAttempt = Shape.newShapeNoCopy(this,new int[]{1,length()}, ordering() == 'f'); - if(reshapeAttempt != null && reshapeAttempt.elementWiseStride() > 0) { - Shape.setElementWiseStride(shapeInfo(), reshapeAttempt.stride(-1)); - this.shapeInformation = Nd4j.getShapeInfoProvider().createShapeInformation(shape(), stride(), offset(),reshapeAttempt.stride(-1), ordering()); - } - attemptedToFindElementWiseStride = true; - - } - */ return Shape.elementWiseStride(shapeInfoDataBuffer()); } @@ -5349,12 +5328,6 @@ public abstract class BaseNDArray implements INDArray, Iterable { return jvmShapeInfo.shape; } - /** - * Returns the shape information debugging - * information - * - * @return the shape information debugging information - */ @Override public String shapeInfoToString() { return Shape.shapeToString(this); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseSparseNDArray.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseSparseNDArray.java index 3570ed7ad..5257be12e 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseSparseNDArray.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseSparseNDArray.java @@ -328,13 +328,11 @@ public abstract class BaseSparseNDArray implements ISparseNDArray { return sparseInformation; } - @Override public LongBuffer shapeInfo() { return null; } - @Override public boolean isCompressed() { return false; @@ -364,7 +362,6 @@ public abstract class BaseSparseNDArray implements ISparseNDArray { return Shape.sparseOffsets(sparseInformation); } - @Override public int stride(int dimension) { int rank = Shape.rank(shapeInformation); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java index 727b5db6d..88cda5b4f 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/INDArray.java @@ -41,45 +41,44 @@ import org.nd4j.linalg.string.NDArrayStrings; */ public interface INDArray extends Serializable, AutoCloseable { /** - * Returns the shape information debugging - * information - * @return the shape information debugging information + * Returns the shape information debugging information + * @return the shape information. */ String shapeInfoToString(); /** * Shape info - * @return + * @return Shape info */ DataBuffer shapeInfoDataBuffer(); /** * Sparse info - * @return + * @return Sparse info. */ DataBuffer sparseInfoDataBuffer(); /** * Shape info - * @return + * @return Shape info */ LongBuffer shapeInfo(); /** - * Returns true if this array is a view or not - * @return + * Check if this array is a view or not. + * @return true if array is a view. */ boolean isView(); /** - * Returns true if this array is sparse - * @return + * Check if this array is sparse + * @return true if this array is sparse. */ boolean isSparse(); /** - * Returns true if this array is compressed, and false otherwise - * @return + * Check if this array is compressed. + * @return true if this array is compressed. */ boolean isCompressed(); @@ -87,11 +86,10 @@ public interface INDArray extends Serializable, AutoCloseable { * This method marks INDArray instance as compressed * PLEASE NOTE: Do not use this method unless you 100% have to * - * @param reallyCompressed + * @param reallyCompressed new value for compressed. */ void markAsCompressed(boolean reallyCompressed); - /** * Returns the rank of the ndarray (the number of dimensions). * diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java index eb3250e3c..d2a4f94a4 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java @@ -103,7 +103,6 @@ import java.text.NumberFormat; import java.text.ParseException; import java.util.*; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.logging.Logger; @@ -153,7 +152,6 @@ public class Nd4j { public static RandomFactory randomFactory; private static MemoryWorkspaceManager workspaceManager; private static DeallocatorService deallocatorService; - private static final AtomicInteger numThreads = new AtomicInteger(-1); private static AtomicReference defaultFloatingPointDataType; private static DataBufferFactory DATA_BUFFER_FACTORY_INSTANCE; @@ -4755,7 +4753,7 @@ public class Nd4j { * @param toStrip the ndarray to newShapeNoCopy * @return the reshaped ndarray */ - @SuppressWarnings("WeakerAccess") // Needs tests if part of public API. + @SuppressWarnings({"unused"}) // Needs tests if part of public API. public static INDArray stripOnes(INDArray toStrip) { if (toStrip.isVector()) return toStrip; From b417ca21bfb00f24e100d5308ae499021463498f Mon Sep 17 00:00:00 2001 From: Alex Black Date: Mon, 26 Aug 2019 23:10:28 +1000 Subject: [PATCH 10/56] Fix for concat op shape function (empty shapes) (#167) Signed-off-by: AlexDBlack --- .../declarable/generic/transforms/concat.cpp | 4 +-- .../opvalidation/ShapeOpValidation.java | 33 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/libnd4j/include/ops/declarable/generic/transforms/concat.cpp b/libnd4j/include/ops/declarable/generic/transforms/concat.cpp index 5249758bf..3c165f64f 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/concat.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/concat.cpp @@ -167,9 +167,7 @@ DECLARE_SHAPE_FN(concat) { } for(int i = 1; i < numOfArrs; ++i) - if (!shape::isEmpty(arrShapes[i])) { - outShapeInfo[axis + 1] += arrShapes[i][axis + 1]; - } + outShapeInfo[axis + 1] += arrShapes[i][axis + 1]; ShapeUtils::updateStridesAndType(outShapeInfo, arrShapes[0], shape::order(arrShapes[0])); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ShapeOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ShapeOpValidation.java index e53cfa5ff..2965f367f 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ShapeOpValidation.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ShapeOpValidation.java @@ -2109,6 +2109,38 @@ public class ShapeOpValidation extends BaseOpValidation { Nd4j.exec(op); } + @Test + public void testConcatEmpty2(){ + INDArray empty10a = Nd4j.create(DataType.INT, 1, 0); + INDArray empty10b = Nd4j.create(DataType.INT, 1, 0); + + DynamicCustomOp op = DynamicCustomOp.builder("concat") + .addInputs(empty10a, empty10b) + .addIntegerArguments(0) //axis = 0 + .build(); + + List l = op.calculateOutputShape(); + assertEquals(1, l.size()); + assertTrue(l.get(0).isEmpty()); + assertArrayEquals(new long[]{2, 0}, l.get(0).getShape()); + assertEquals(DataType.INT, l.get(0).dataType()); + + op.addOutputArgument(Nd4j.create(DataType.INT, 2, 0)); + Nd4j.exec(op); + + + op = DynamicCustomOp.builder("concat") + .addInputs(empty10a, empty10b) + .addIntegerArguments(1) //axis = 1 + .build(); + l = op.calculateOutputShape(); + assertEquals(1, l.size()); + assertTrue(l.get(0).isEmpty()); + assertArrayEquals(new long[]{1, 0}, l.get(0).getShape()); + op.addOutputArgument(Nd4j.create(DataType.INT, 1, 0)); + Nd4j.exec(op); + } + @Test public void testEmptyGather(){ /* @@ -2434,4 +2466,5 @@ public class ShapeOpValidation extends BaseOpValidation { .addInputs(Nd4j.createFromArray(1, 0)) .build(); } + } From bb5fc36e5e4d446d1f60fa949c2780e9cba7f75d Mon Sep 17 00:00:00 2001 From: raver119 Date: Mon, 26 Aug 2019 19:37:05 +0300 Subject: [PATCH 11/56] [WIP] ops fixes (#168) * - correct layer_norm Signed-off-by: Yurii * - further fix of layer norm Signed-off-by: Yurii * - correct scatter_upd op Signed-off-by: Yurii * - correct cuda kernel for histogram_fixed_width op Signed-off-by: Yurii * - delete comments Signed-off-by: Yurii * enabled one ignored test Signed-off-by: raver119 --- .../generic/transforms/layer_norm.cpp | 28 +- .../helpers/cpu/histogramFixedWidth.cpp | 9 +- .../ops/declarable/helpers/cpu/scatter.cpp | 2 + .../helpers/cuda/histogramFixedWidth.cu | 245 +++++++++++------- .../ops/declarable/helpers/cuda/scatter.cu | 11 +- .../layers_tests/DeclarableOpsTests10.cpp | 26 +- .../layers_tests/DeclarableOpsTests15.cpp | 4 +- .../layers_tests/DeclarableOpsTests16.cpp | 23 +- .../opvalidation/LayerOpValidation.java | 1 - 9 files changed, 240 insertions(+), 109 deletions(-) diff --git a/libnd4j/include/ops/declarable/generic/transforms/layer_norm.cpp b/libnd4j/include/ops/declarable/generic/transforms/layer_norm.cpp index 4e612565e..684d98d6d 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/layer_norm.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/layer_norm.cpp @@ -32,9 +32,12 @@ namespace ops { auto input = INPUT_VARIABLE(0); auto gain = INPUT_VARIABLE(1); auto output = OUTPUT_VARIABLE(0); - + std::vector axis = *block.getIArguments(); + const bool isNCHW = block.getBArguments()->size() > 0 ? B_ARG(0) : true; // INT_ARG(9): 0-NCHW, 1-NHWC + const int dimC = isNCHW ? 1 : input->rankOf() - 1; + NDArray* bias = nullptr; if (block.width() > 2) bias = INPUT_VARIABLE(2); @@ -48,9 +51,12 @@ namespace ops { std::vector bargs = {}; standardizeOp.execute(inputs, outputs, targs, longAxis, bargs); - output->applyTrueBroadcast(nd4j::BroadcastOpsTuple::Multiply(), gain, output); - if(bias != nullptr) - output->applyTrueBroadcast(nd4j::BroadcastOpsTuple::Add(), bias, output); + // output->applyTrueBroadcast(nd4j::BroadcastOpsTuple::Multiply(), gain, output); + output->applyBroadcast(nd4j::broadcast::Multiply, {dimC}, gain); + if(bias != nullptr) { + // output->applyTrueBroadcast(nd4j::BroadcastOpsTuple::Add(), bias, output); + output->applyBroadcast(nd4j::broadcast::Add, {dimC}, bias); + } return Status::OK(); } @@ -71,12 +77,17 @@ namespace ops { auto dLdg = OUTPUT_VARIABLE(1); auto dLdb = block.width() == 4 ? OUTPUT_VARIABLE(2) : nullptr; + const bool isNCHW = block.getBArguments()->size() > 0 ? B_ARG(0) : true; // INT_ARG(9): 0-NCHW, 1-NHWC + const int dimC = isNCHW ? 1 : input->rankOf() - 1; + std::vector axis = *block.getIArguments(); std::vector longAxis = ArrayUtils::toLongVector(axis); - if(bias != nullptr) - eps->reduceAlongDimension(nd4j::reduce::Sum, dLdb, {0}, true); + if(bias != nullptr) { + // eps->reduceAlongDimension(nd4j::reduce::Sum, dLdb, {0}, true); + eps->reduceAlongDimension(nd4j::reduce::Sum, dLdb, ShapeUtils::evalDimsToExclude(input->rankOf(), {dimC}), true); + } NDArray standardized(input->shapeInfo(), false, block.launchContext()); @@ -88,10 +99,11 @@ namespace ops { standardizeOp.execute(inputs, outputs, targs, longAxis, bargs); standardized.applyPairwiseTransform(nd4j::pairwise::Multiply, eps, &standardized, nullptr); - standardized.reduceAlongDimension(nd4j::reduce::Sum, dLdg, {0}, true); + standardized.reduceAlongDimension(nd4j::reduce::Sum, dLdg, ShapeUtils::evalDimsToExclude(input->rankOf(), {dimC}), true); nd4j::ops::standardize_bp standardizeBp; - eps->applyTrueBroadcast(nd4j::BroadcastOpsTuple::Multiply(), gain, dLdx); + // eps->applyTrueBroadcast(nd4j::BroadcastOpsTuple::Multiply(), gain, dLdx); + eps->applyBroadcast(nd4j::broadcast::Multiply, {dimC}, gain, dLdx); auto dLdx_tmp = dLdx->dup(); std::vector standardizeBpArgs = {input, dLdx_tmp}; diff --git a/libnd4j/include/ops/declarable/helpers/cpu/histogramFixedWidth.cpp b/libnd4j/include/ops/declarable/helpers/cpu/histogramFixedWidth.cpp index a3f3e2f9e..349d0381a 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/histogramFixedWidth.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/histogramFixedWidth.cpp @@ -28,13 +28,10 @@ namespace helpers { template void histogramFixedWidth_(const NDArray& input, const NDArray& range, NDArray& output) { - const int nbins = output.lengthOf(); + const int nbins = output.lengthOf(); - // firstly initialize output with zeros - if(output.ews() == 1) - memset(output.buffer(), 0, nbins * output.sizeOfT()); - else - output = 0; + // firstly initialize output with zeros + output.nullify(); const T leftEdge = range.e(0); const T rightEdge = range.e(1); diff --git a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp index c1d01930c..0b16ac989 100644 --- a/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp +++ b/libnd4j/include/ops/declarable/helpers/cpu/scatter.cpp @@ -54,6 +54,8 @@ PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(!lock) schedule(guided)) std::vector dimsToExcludeUpd(sizeOfDims); std::iota(dimsToExcludeUpd.begin(), dimsToExcludeUpd.end(), 0); + shape::printIntArray(dimsToExcludeUpd.data(),dimsToExcludeUpd.size()); + // PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(indLen > Environment::getInstance()->elementwiseThreshold()) schedule(guided)) // causes known openMP asan bug ! PRAGMA_OMP_PARALLEL_FOR_ARGS(OMP_IF(!lock) schedule(guided)) for(Nd4jLong i = 0; i < indLen; ++i) { diff --git a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu index 2c46210cf..ebde4909c 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu @@ -20,110 +20,181 @@ #include #include +#include -namespace nd4j { -namespace ops { +namespace nd4j { +namespace ops { namespace helpers { - template - __global__ static void copyBuffers(Nd4jLong* destination, void const* source, Nd4jLong* sourceShape, Nd4jLong bufferLength) { - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; - const auto step = gridDim.x * blockDim.x; - for (int t = tid; t < bufferLength; t += step) { - destination[t] = reinterpret_cast(source)[shape::getIndexOffset(t, sourceShape, bufferLength)]; - } +/////////////////////////////////////////////////////////////////// +template +__global__ static void histogramFixedWidthCuda( const void* vx, const Nd4jLong* xShapeInfo, + void* vz, const Nd4jLong* zShapeInfo, + const T leftEdge, const T rightEdge) { + + const T* x = reinterpret_cast(vx); + Nd4jLong* z = reinterpret_cast(vz); + + __shared__ Nd4jLong xLen, zLen, totalThreads, nbins; + __shared__ T binWidth, secondEdge, lastButOneEdge; + + if (threadIdx.x == 0) { + + xLen = shape::length(xShapeInfo); + nbins = shape::length(zShapeInfo); // nbins = zLen + totalThreads = gridDim.x * blockDim.x; + + binWidth = (rightEdge - leftEdge ) / nbins; + secondEdge = leftEdge + binWidth; + lastButOneEdge = rightEdge - binWidth; } - template - __global__ static void returnBuffers(void* destination, Nd4jLong const* source, Nd4jLong* destinationShape, Nd4jLong bufferLength) { - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; - const auto step = gridDim.x * blockDim.x; - for (int t = tid; t < bufferLength; t += step) { - reinterpret_cast(destination)[shape::getIndexOffset(t, destinationShape, bufferLength)] = source[t]; - } + __syncthreads(); + + const auto tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (Nd4jLong i = tid; i < xLen; i += totalThreads) { + + const T value = x[shape::getIndexOffset(i, xShapeInfo, xLen)]; + + Nd4jLong zIndex; + + if(value < secondEdge) + zIndex = 0; + else if(value >= lastButOneEdge) + zIndex = nbins - 1; + else + zIndex = static_cast((value - leftEdge) / binWidth); + + nd4j::math::atomics::nd4j_atomicAdd(&z[shape::getIndexOffset(zIndex, zShapeInfo, nbins)], 1LL); } +} - template - static __global__ void histogramFixedWidthKernel(void* outputBuffer, Nd4jLong outputLength, void const* inputBuffer, Nd4jLong* inputShape, Nd4jLong inputLength, double const leftEdge, double binWidth, double secondEdge, double lastButOneEdge) { +/////////////////////////////////////////////////////////////////// +template +__host__ static void histogramFixedWidthCudaLauncher(const cudaStream_t *stream, const NDArray& input, const NDArray& range, NDArray& output) { - __shared__ T const* x; - __shared__ Nd4jLong* z; // output buffer + const T leftEdge = range.e(0); + const T rightEdge = range.e(1); - if (threadIdx.x == 0) { - z = reinterpret_cast(outputBuffer); - x = reinterpret_cast(inputBuffer); - } - __syncthreads(); - auto tid = blockIdx.x * gridDim.x + threadIdx.x; - auto step = blockDim.x * gridDim.x; + histogramFixedWidthCuda<<<512, MAX_NUM_THREADS / 2, 512, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftEdge, rightEdge); +} - for(auto i = tid; i < inputLength; i += step) { +//////////////////////////////////////////////////////////////////////// +void histogramFixedWidth(nd4j::LaunchContext* context, const NDArray& input, const NDArray& range, NDArray& output) { - const T value = x[shape::getIndexOffset(i, inputShape, inputLength)]; - Nd4jLong currInd = static_cast((value - leftEdge) / binWidth); + // firstly initialize output with zeros + output.nullify(); - if(value < secondEdge) - currInd = 0; - else if(value >= lastButOneEdge) - currInd = outputLength - 1; - nd4j::math::atomics::nd4j_atomicAdd(&z[currInd], 1LL); - } - } + PointersManager manager(context, "histogramFixedWidth"); + + NDArray::prepareSpecialUse({&output}, {&input}); + BUILD_SINGLE_SELECTOR(input.dataType(), histogramFixedWidthCudaLauncher, (context->getCudaStream(), input, range, output), LIBND4J_TYPES); + NDArray::registerSpecialUse({&output}, {&input}); + + manager.synchronize(); +} - template - void histogramFixedWidth_(nd4j::LaunchContext * context, const NDArray& input, const NDArray& range, NDArray& output) { - const int nbins = output.lengthOf(); - auto stream = context->getCudaStream(); - // firstly initialize output with zeros - //if(output.ews() == 1) - // memset(output.buffer(), 0, nbins * output.sizeOfT()); - //else - output.assign(0); - if (!input.isActualOnDeviceSide()) - input.syncToDevice(); +// template +// __global__ static void copyBuffers(Nd4jLong* destination, void const* source, Nd4jLong* sourceShape, Nd4jLong bufferLength) { +// const auto tid = blockIdx.x * gridDim.x + threadIdx.x; +// const auto step = gridDim.x * blockDim.x; +// for (int t = tid; t < bufferLength; t += step) { +// destination[t] = reinterpret_cast(source)[shape::getIndexOffset(t, sourceShape, bufferLength)]; +// } +// } - const double leftEdge = range.e(0); - const double rightEdge = range.e(1); +// template +// __global__ static void returnBuffers(void* destination, Nd4jLong const* source, Nd4jLong* destinationShape, Nd4jLong bufferLength) { +// const auto tid = blockIdx.x * gridDim.x + threadIdx.x; +// const auto step = gridDim.x * blockDim.x; +// for (int t = tid; t < bufferLength; t += step) { +// reinterpret_cast(destination)[shape::getIndexOffset(t, destinationShape, bufferLength)] = source[t]; +// } +// } - const double binWidth = (rightEdge - leftEdge ) / nbins; - const double secondEdge = leftEdge + binWidth; - double lastButOneEdge = rightEdge - binWidth; - Nd4jLong* outputBuffer; - cudaError_t err = cudaMalloc(&outputBuffer, output.lengthOf() * sizeof(Nd4jLong)); - if (err != 0) - throw cuda_exception::build("helpers::histogramFixedWidth: Cannot allocate memory for output", err); - copyBuffers<<<256, 512, 8192, *stream>>>(outputBuffer, output.getSpecialBuffer(), output.getSpecialShapeInfo(), output.lengthOf()); - histogramFixedWidthKernel<<<256, 512, 8192, *stream>>>(outputBuffer, output.lengthOf(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), input.lengthOf(), leftEdge, binWidth, secondEdge, lastButOneEdge); - returnBuffers<<<256, 512, 8192, *stream>>>(output.specialBuffer(), outputBuffer, output.specialShapeInfo(), output.lengthOf()); - //cudaSyncStream(*stream); - err = cudaFree(outputBuffer); - if (err != 0) - throw cuda_exception::build("helpers::histogramFixedWidth: Cannot deallocate memory for output buffer", err); - output.tickWriteDevice(); -//#pragma omp parallel for schedule(guided) -// for(Nd4jLong i = 0; i < input.lengthOf(); ++i) { -// -// const T value = input.e(i); -// -// if(value < secondEdge) -//#pragma omp critical -// output.p(0, output.e(0) + 1); -// else if(value >= lastButOneEdge) -//#pragma omp critical -// output.p(nbins-1, output.e(nbins-1) + 1); -// else { -// Nd4jLong currInd = static_cast((value - leftEdge) / binWidth); -//#pragma omp critical -// output.p(currInd, output.e(currInd) + 1); -// } -// } - } +// template +// static __global__ void histogramFixedWidthKernel(void* outputBuffer, Nd4jLong outputLength, void const* inputBuffer, Nd4jLong* inputShape, Nd4jLong inputLength, double const leftEdge, double binWidth, double secondEdge, double lastButOneEdge) { - void histogramFixedWidth(nd4j::LaunchContext * context, const NDArray& input, const NDArray& range, NDArray& output) { - BUILD_SINGLE_SELECTOR(input.dataType(), histogramFixedWidth_, (context, input, range, output), LIBND4J_TYPES); - } - BUILD_SINGLE_TEMPLATE(template void histogramFixedWidth_, (nd4j::LaunchContext * context, const NDArray& input, const NDArray& range, NDArray& output), LIBND4J_TYPES); +// __shared__ T const* x; +// __shared__ Nd4jLong* z; // output buffer + +// if (threadIdx.x == 0) { +// z = reinterpret_cast(outputBuffer); +// x = reinterpret_cast(inputBuffer); +// } +// __syncthreads(); +// auto tid = blockIdx.x * gridDim.x + threadIdx.x; +// auto step = blockDim.x * gridDim.x; + +// for(auto i = tid; i < inputLength; i += step) { + +// const T value = x[shape::getIndexOffset(i, inputShape, inputLength)]; +// Nd4jLong currInd = static_cast((value - leftEdge) / binWidth); + +// if(value < secondEdge) +// currInd = 0; +// else if(value >= lastButOneEdge) +// currInd = outputLength - 1; +// nd4j::math::atomics::nd4j_atomicAdd(&z[currInd], 1LL); +// } +// } + + +// template +// void histogramFixedWidth_(nd4j::LaunchContext * context, const NDArray& input, const NDArray& range, NDArray& output) { +// const int nbins = output.lengthOf(); +// auto stream = context->getCudaStream(); +// // firstly initialize output with zeros +// //if(output.ews() == 1) +// // memset(output.buffer(), 0, nbins * output.sizeOfT()); +// //else +// output.assign(0); +// if (!input.isActualOnDeviceSide()) +// input.syncToDevice(); + +// const double leftEdge = range.e(0); +// const double rightEdge = range.e(1); + +// const double binWidth = (rightEdge - leftEdge ) / nbins; +// const double secondEdge = leftEdge + binWidth; +// double lastButOneEdge = rightEdge - binWidth; +// Nd4jLong* outputBuffer; +// cudaError_t err = cudaMalloc(&outputBuffer, output.lengthOf() * sizeof(Nd4jLong)); +// if (err != 0) +// throw cuda_exception::build("helpers::histogramFixedWidth: Cannot allocate memory for output", err); +// copyBuffers<<<256, 512, 8192, *stream>>>(outputBuffer, output.getSpecialBuffer(), output.getSpecialShapeInfo(), output.lengthOf()); +// histogramFixedWidthKernel<<<256, 512, 8192, *stream>>>(outputBuffer, output.lengthOf(), input.getSpecialBuffer(), input.getSpecialShapeInfo(), input.lengthOf(), leftEdge, binWidth, secondEdge, lastButOneEdge); +// returnBuffers<<<256, 512, 8192, *stream>>>(output.specialBuffer(), outputBuffer, output.specialShapeInfo(), output.lengthOf()); +// //cudaSyncStream(*stream); +// err = cudaFree(outputBuffer); +// if (err != 0) +// throw cuda_exception::build("helpers::histogramFixedWidth: Cannot deallocate memory for output buffer", err); +// output.tickWriteDevice(); +// //#pragma omp parallel for schedule(guided) +// // for(Nd4jLong i = 0; i < input.lengthOf(); ++i) { +// // +// // const T value = input.e(i); +// // +// // if(value < secondEdge) +// //#pragma omp critical +// // output.p(0, output.e(0) + 1); +// // else if(value >= lastButOneEdge) +// //#pragma omp critical +// // output.p(nbins-1, output.e(nbins-1) + 1); +// // else { +// // Nd4jLong currInd = static_cast((value - leftEdge) / binWidth); +// //#pragma omp critical +// // output.p(currInd, output.e(currInd) + 1); +// // } +// // } +// } + +// void histogramFixedWidth(nd4j::LaunchContext * context, const NDArray& input, const NDArray& range, NDArray& output) { +// BUILD_SINGLE_SELECTOR(input.dataType(), histogramFixedWidth_, (context, input, range, output), LIBND4J_TYPES); +// } +// BUILD_SINGLE_TEMPLATE(template void histogramFixedWidth_, (nd4j::LaunchContext * context, const NDArray& input, const NDArray& range, NDArray& output), LIBND4J_TYPES); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu index ec0d304df..54d350f47 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/scatter.cu @@ -398,10 +398,15 @@ void scatter(nd4j::LaunchContext *context, pairwise::Ops op, const NDArray& ind const int xRank = indices.rankOf(); std::vector zTadDims = ShapeUtils::evalDimsToExclude(output.rankOf(), {0}); - std::vector yTadDims(xRank); - std::iota(yTadDims.begin(), yTadDims.end(), xRank == 1 ? 0 : xRank); - auto packY = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), yTadDims); + int sizeOfUpdDims = xRank; + if(output.rankOf() == updates.rankOf() && indices.isVector()) + sizeOfUpdDims = 1; + + std::vector yTadDims(sizeOfUpdDims); + std::iota(yTadDims.begin(), yTadDims.end(), 0); + + auto packY = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(updates.getShapeInfo(), ShapeUtils::evalDimsToExclude(updates.rankOf(), yTadDims)); auto packZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(output.getShapeInfo(), zTadDims); const Nd4jLong zTadLen = shape::length(packZ.primaryShapeInfo()); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp index 1d4bf7338..82ed21709 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests10.cpp @@ -910,7 +910,31 @@ TEST_F(DeclarableOpsTests10, histogram_fixed_width_test5) { auto *out = results->at(0); ASSERT_TRUE(exp.isSameShape(out)); - out->printBuffer("5HIST"); + // out->printBuffer("5HIST"); + ASSERT_TRUE(exp.equalsTo(out)); + + delete results; +} + +/////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests10, histogram_fixed_width_test6) { + + auto input = NDArrayFactory::create('c', {7},{0.0, 0.1, 0.1, 0.3, 0.5, 0.5, 0.9}); + auto range = NDArrayFactory::create('c', {2}, {0, 1}); + auto bins = NDArrayFactory::create(5); + + auto exp = NDArrayFactory::create('c', {5}, {3, 1, 2, 0, 1}); + + nd4j::ops::histogram_fixed_width op; + auto results = op.execute({&input, &range, &bins}, {}, {}, {}); + + ASSERT_EQ(ND4J_STATUS_OK, results->status()); + + auto out = results->at(0); + // out->printShapeInfo(); + // out->printIndexedBuffer(); + + ASSERT_TRUE(exp.isSameShape(out)); ASSERT_TRUE(exp.equalsTo(out)); delete results; diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp index df1421d71..21a0381e9 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests15.cpp @@ -249,7 +249,7 @@ TEST_F(DeclarableOpsTests15, Test_layer_norm_1) { auto b = NDArrayFactory::create('c', {1, 5}, {1., 2., 3., 4., 5.}); nd4j::ops::layer_norm op; - auto result = op.execute({&x, &g, &b}, {}, {0}, {}); + auto result = op.execute({&x, &g, &b}, {}, {0}, {false}); ASSERT_EQ(Status::OK(), result->status()); delete result; } @@ -261,7 +261,7 @@ TEST_F(DeclarableOpsTests15, Test_layer_norm_bp_1) { auto eps = NDArrayFactory::create('c', {1, 5}, {0., 0., 0., 0., 0.}); nd4j::ops::layer_norm_bp op; - auto result = op.execute({&x, &g, &b, &eps}, {}, {0}, {}); + auto result = op.execute({&x, &g, &b, &eps}, {}, {0}, {false}); ASSERT_EQ(Status::OK(), result->status()); delete result; } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests16.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests16.cpp index a23d5421e..992b21c0f 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests16.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests16.cpp @@ -39,7 +39,7 @@ public: } }; -TEST_F(DeclarableOpsTests16, test_scatter_update_119) { +TEST_F(DeclarableOpsTests16, scatter_upd_1) { auto x = NDArrayFactory::create('c', {3}, {1, 1, 1}); auto y = NDArrayFactory::create(0); auto w = NDArrayFactory::create(3.0f); @@ -56,6 +56,27 @@ TEST_F(DeclarableOpsTests16, test_scatter_update_119) { delete result; } +TEST_F(DeclarableOpsTests16, scatter_upd_2) { + + NDArray x('c', {10, 3}, nd4j::DataType::FLOAT32); + NDArray indices('c', {2}, {2,5}, nd4j::DataType::INT32); + NDArray updates('c', {2, 3}, {100,101,102, 200,201,202}, nd4j::DataType::FLOAT32); + NDArray e('c', {10, 3}, {1,2,3, 4,5,6, 100,101,102, 10,11,12, 13,14,15, 200,201,202, 19,20,21, 22,23,24, 25,26,27, 28,29,30}, nd4j::DataType::FLOAT32); + + x.linspace(1); + + nd4j::ops::scatter_upd op; + auto result = op.execute({&x, &indices, &updates}, {}, {}); + ASSERT_EQ(Status::OK(), result->status()); + + auto z = result->at(0); + + ASSERT_EQ(e, *z); + + delete result; +} + + TEST_F(DeclarableOpsTests16, test_size_dtype_1) { auto x = NDArrayFactory::create('c', {3}, {1, 1, 1}); auto z = NDArrayFactory::create(0.0f); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java index 9437ad7b2..84bd96ad6 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java @@ -1297,7 +1297,6 @@ public class LayerOpValidation extends BaseOpValidation { } @Test - @Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912") public void testLayerNormMixedOrders(){ Nd4j.getRandom().setSeed(12345); INDArray input = Nd4j.rand(DataType.DOUBLE, 3, 8).dup('f'); From 25e5c23eae642c21c0bf43fc0df16113ea6dea29 Mon Sep 17 00:00:00 2001 From: raver119 Date: Mon, 26 Aug 2019 19:57:51 +0300 Subject: [PATCH 12/56] [WIP] Error handling (#169) * CUDA reverse rewrite + couple of tests Signed-off-by: raver119 * don't throw exception on invalid pointer Signed-off-by: raver119 * data types validation for fastpath exec mode + 2 tests Signed-off-by: raver119 * data types validation for fastpath exec mode + 2 tests Signed-off-by: raver119 * ismax allowed dtypes tweak Signed-off-by: raver119 * lastErrorCode + lastErrorMessage for native exceptions handling Signed-off-by: raver119 * exportable ErrorReference Signed-off-by: raver119 * check error codes in java Signed-off-by: raver119 * - consume lastErrorCode - fast_in dtype validation fix Signed-off-by: raver119 * - sg/cb allowed output type change - minor logging fix for data type validation Signed-off-by: raver119 --- libnd4j/blas/NativeOps.h | 44 +- libnd4j/blas/cpu/NativeOps.cpp | 1798 ++++++------ libnd4j/blas/cuda/NativeOps.cu | 2417 +++++++++-------- libnd4j/include/execution/ContextBuffers.h | 4 + .../ErrorReference.h} | 40 +- libnd4j/include/execution/LaunchContext.h | 4 + .../include/execution/cpu/ContextBuffers.cpp | 4 + .../include/execution/cpu/LaunchContext.cpp | 8 + .../include/execution/cuda/ContextBuffers.cu | 4 + .../include/execution/cuda/LaunchContext.cu | 4 + .../impl/ErrorReference.cpp} | 51 +- .../ops/declarable/generic/convo/ismax.cpp | 2 +- .../ops/declarable/generic/nlp/cbow.cpp | 3 +- .../ops/declarable/generic/nlp/skipgram.cpp | 2 +- .../ops/declarable/generic/nn/softmax.cpp | 2 +- .../ops/declarable/helpers/cuda/reverse.cu | 114 +- .../ops/declarable/impl/DeclarableOp.cpp | 199 +- .../layers_tests/JavaInteropTests.cpp | 26 + .../tests_cpu/layers_tests/NativeOpsTests.cpp | 75 - .../java/org/nd4j/nativeblas/NativeOps.java | 40 +- .../allocator/pointers/cuda/cudaEvent_t.java | 9 +- .../allocator/pointers/cuda/cudaStream_t.java | 5 +- .../linalg/jcublas/JCublasNDArrayFactory.java | 261 +- .../ops/executioner/CudaExecutioner.java | 184 +- .../java/org/nd4j/nativeblas/Nd4jCuda.java | 139 +- .../org/nd4j/nativeblas/Nd4jCudaPresets.java | 1 + .../cpu/nativecpu/CpuNDArrayFactory.java | 187 +- .../nativecpu/ops/NativeOpExecutioner.java | 209 +- .../java/org/nd4j/nativeblas/Nd4jCpu.java | 139 +- .../org/nd4j/nativeblas/Nd4jCpuPresets.java | 1 + .../test/java/org/nd4j/linalg/Nd4jTestsC.java | 98 +- 31 files changed, 3002 insertions(+), 3072 deletions(-) rename libnd4j/include/{helpers/ProviderRNG.h => execution/ErrorReference.h} (57%) rename libnd4j/include/{helpers/impl/ProviderRNG.cpp => execution/impl/ErrorReference.cpp} (52%) diff --git a/libnd4j/blas/NativeOps.h b/libnd4j/blas/NativeOps.h index 9ce90176f..9bca7bb10 100755 --- a/libnd4j/blas/NativeOps.h +++ b/libnd4j/blas/NativeOps.h @@ -79,6 +79,18 @@ bool verbose = false; extern "C" { +/** + * This function returns last error code stored, + * @return non-zero if something bad happened + */ +ND4J_EXPORT int lastErrorCode(); + +/** + * This function returns last error message, if last error code > 0 + * @return + */ +ND4J_EXPORT const char* lastErrorMessage(); + /** * * @param p @@ -557,38 +569,6 @@ ND4J_EXPORT void execScalarBoolTad(Nd4jPointer *extraPointers, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); - -/** -* Append an input array -* to the end of a flat array -* in a particular order -* @param offset the offset of the array to start at -* @param order the order -* @param result the result array -* @param resultShapeInfo the shape info for te array -* @param input the input for the array -* @param inputShapeInfo the shape information for that array -*/ -ND4J_EXPORT void flatten( - Nd4jPointer *extraPointers, - int offset, - char order, - void *result, Nd4jLong *resultShapeInfo, - void *dresult, Nd4jLong *dresultShapeInfo, - void *input, Nd4jLong *inputShapeInfo, - void *dinput, Nd4jLong *dinputShapeInfo); - -ND4J_EXPORT void concat( - Nd4jPointer *extraPointers, - int dimension, - int numArrays, - Nd4jPointer *data, Nd4jPointer *inputShapeInfo, - Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo, - void *result, Nd4jLong *resultShapeInfo, - void *dresult, Nd4jLong *dresultShapeInfo, - Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers); - - ND4J_EXPORT void specialConcat ( Nd4jPointer *extraPointers, int dimension, diff --git a/libnd4j/blas/cpu/NativeOps.cpp b/libnd4j/blas/cpu/NativeOps.cpp index f5d4996e4..86bc04fc4 100644 --- a/libnd4j/blas/cpu/NativeOps.cpp +++ b/libnd4j/blas/cpu/NativeOps.cpp @@ -102,8 +102,12 @@ void execIndexReduceScalar(Nd4jPointer *extraPointers, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - NativeOpExecutioner::execIndexReduceScalar(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + NativeOpExecutioner::execIndexReduceScalar(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -125,31 +129,36 @@ void execIndexReduce(Nd4jPointer *extraPointers,int opNum, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto hTADShapeInfo = tadPack.primaryShapeInfo(); + auto hTADOffsets = tadPack.primaryOffsets(); - auto hTADShapeInfo = tadPack.primaryShapeInfo(); - auto hTADOffsets = tadPack.primaryOffsets(); + auto hz = reinterpret_cast(hZ); - auto hz = reinterpret_cast(hZ); - - NativeOpExecutioner::execIndexReduce(nullptr, opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hz, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - hTADShapeInfo, - hTADOffsets); + NativeOpExecutioner::execIndexReduce(nullptr, opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hz, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + hTADShapeInfo, + hTADOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -175,31 +184,38 @@ void execBroadcast(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); - auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength); + auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); + auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPackX.primaryShapeInfo(); - auto hTADOffsets = tadPackX.primaryOffsets(); - auto hTADShapeInfoZ = tadPackZ.primaryShapeInfo(); - auto hTADOffsetsZ = tadPackZ.primaryOffsets(); + auto hTADShapeInfo = tadPackX.primaryShapeInfo(); + auto hTADOffsets = tadPackX.primaryOffsets(); + auto hTADShapeInfoZ = tadPackZ.primaryShapeInfo(); + auto hTADOffsetsZ = tadPackZ.primaryOffsets(); - NativeOpExecutioner::execBroadcast(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hY, - hYShapeInfo, - dY, - dYShapeInfo, - hZ, hZShapeInfo, - dZ, dZShapeInfo, - dimension, - dimensionLength, hTADShapeInfo, hTADOffsets, hTADShapeInfoZ, hTADOffsetsZ); + NativeOpExecutioner::execBroadcast(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hY, + hYShapeInfo, + dY, + dYShapeInfo, + hZ, hZShapeInfo, + dZ, dZShapeInfo, + dimension, + dimensionLength, hTADShapeInfo, hTADOffsets, hTADShapeInfoZ, hTADOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execBroadcastBool(Nd4jPointer *extraPointers, @@ -212,31 +228,39 @@ void execBroadcastBool(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); - auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength); + auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); + auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPackX.primaryShapeInfo(); - auto hTADOffsets = tadPackX.primaryOffsets(); - auto hTADShapeInfoZ = tadPackZ.primaryShapeInfo(); - auto hTADOffsetsZ = tadPackZ.primaryOffsets(); + auto hTADShapeInfo = tadPackX.primaryShapeInfo(); + auto hTADOffsets = tadPackX.primaryOffsets(); + auto hTADShapeInfoZ = tadPackZ.primaryShapeInfo(); + auto hTADOffsetsZ = tadPackZ.primaryOffsets(); - NativeOpExecutioner::execBroadcastBool(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hY, - hYShapeInfo, - dY, - dYShapeInfo, - hZ, hZShapeInfo, - dZ, dZShapeInfo, - dimension, - dimensionLength, hTADShapeInfo, hTADOffsets, hTADShapeInfoZ, hTADOffsetsZ); + NativeOpExecutioner::execBroadcastBool(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hY, + hYShapeInfo, + dY, + dYShapeInfo, + hZ, hZShapeInfo, + dZ, dZShapeInfo, + dimension, + dimensionLength, hTADShapeInfo, hTADOffsets, hTADShapeInfoZ, + hTADOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -261,21 +285,26 @@ void execPairwiseTransform( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - NativeOpExecutioner::execPairwiseTransform(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hY, - hYShapeInfo, - dY, - dYShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams); + try { + NativeOpExecutioner::execPairwiseTransform(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hY, + hYShapeInfo, + dY, + dYShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execPairwiseTransformBool( @@ -288,21 +317,27 @@ void execPairwiseTransformBool( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - NativeOpExecutioner::execPairwiseBoolTransform(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hY, - hYShapeInfo, - dY, - dYShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams); + + try { + NativeOpExecutioner::execPairwiseBoolTransform(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hY, + hYShapeInfo, + dY, + dYShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -323,18 +358,22 @@ void execReduceFloat( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - NativeOpExecutioner::execReduceFloatScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo); - + try { + NativeOpExecutioner::execReduceFloatScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceSame( @@ -346,18 +385,22 @@ void execReduceSame( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - NativeOpExecutioner::execReduceSameScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo); - + try { + NativeOpExecutioner::execReduceSameScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceBool( @@ -368,19 +411,22 @@ void execReduceBool( void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - NativeOpExecutioner::execReduceBoolScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo); - + try { + NativeOpExecutioner::execReduceBoolScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceLong( @@ -391,19 +437,22 @@ void execReduceLong( void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - NativeOpExecutioner::execReduceLongScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo); - + try { + NativeOpExecutioner::execReduceLongScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -424,28 +473,34 @@ void execReduceFloat2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPackX.primaryShapeInfo(); - auto hTADOffsets = tadPackX.primaryOffsets(); + auto hTADShapeInfo = tadPackX.primaryShapeInfo(); + auto hTADOffsets = tadPackX.primaryOffsets(); - NativeOpExecutioner::execReduceFloat(nullptr, opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - hTADShapeInfo, - hTADOffsets); + NativeOpExecutioner::execReduceFloat(nullptr, opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + hTADShapeInfo, + hTADOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceBool2(Nd4jPointer *extraPointers, @@ -457,28 +512,34 @@ void execReduceBool2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPack.primaryShapeInfo(); - auto hTADOffsets = tadPack.primaryOffsets(); + auto hTADShapeInfo = tadPack.primaryShapeInfo(); + auto hTADOffsets = tadPack.primaryOffsets(); - NativeOpExecutioner::execReduceBool(nullptr, opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - hTADShapeInfo, - hTADOffsets); + NativeOpExecutioner::execReduceBool(nullptr, opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + hTADShapeInfo, + hTADOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceSame2(Nd4jPointer *extraPointers, @@ -490,28 +551,34 @@ void execReduceSame2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPack.primaryShapeInfo(); - auto hTADOffsets = tadPack.primaryOffsets(); + auto hTADShapeInfo = tadPack.primaryShapeInfo(); + auto hTADOffsets = tadPack.primaryOffsets(); - NativeOpExecutioner::execReduceSame(nullptr, opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - hTADShapeInfo, - hTADOffsets); + NativeOpExecutioner::execReduceSame(nullptr, opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + hTADShapeInfo, + hTADOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceLong2(Nd4jPointer *extraPointers, @@ -523,28 +590,34 @@ void execReduceLong2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPack.primaryShapeInfo(); - auto hTADOffsets = tadPack.primaryOffsets(); + auto hTADShapeInfo = tadPack.primaryShapeInfo(); + auto hTADOffsets = tadPack.primaryOffsets(); - NativeOpExecutioner::execReduceLong(nullptr, opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - hTADShapeInfo, - hTADOffsets); + NativeOpExecutioner::execReduceLong(nullptr, opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + hTADShapeInfo, + hTADOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -567,8 +640,13 @@ void execReduce3(Nd4jPointer *extraPointers, void *dY, Nd4jLong *dYShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - NativeOpExecutioner::execReduce3(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + NativeOpExecutioner::execReduce3(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, + dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -588,8 +666,13 @@ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, void *dY, Nd4jLong *dYShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - NativeOpExecutioner::execReduce3Scalar(nullptr, opNum,hX,hXShapeInfo,dX, dXShapeInfo,extraParams,hY,hYShapeInfo,dY,dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + NativeOpExecutioner::execReduce3Scalar(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, + hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** * @@ -617,19 +700,31 @@ void execReduce3Tad(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - if (extraPointers == nullptr || extraPointers[2] == 0) { - NativeOpExecutioner::execReduce3(LaunchContext::defaultContext(), opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); - } else { - // going tad-way - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + if (extraPointers == nullptr || extraPointers[2] == 0) { + NativeOpExecutioner::execReduce3(LaunchContext::defaultContext(), opNum, hX, hXShapeInfo, dX, dXShapeInfo, + extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, + yTadOnlyShapeInfo, yTadOffsets); + } else { + // going tad-way + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPack.primaryShapeInfo(); - auto hTADOffsets = tadPack.primaryOffsets(); + auto hTADShapeInfo = tadPack.primaryShapeInfo(); + auto hTADOffsets = tadPack.primaryOffsets(); - NativeOpExecutioner::execReduce3TAD(LaunchContext::defaultContext(), opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, hTADShapeInfo, hTADOffsets, nullptr, nullptr); + NativeOpExecutioner::execReduce3TAD(LaunchContext::defaultContext(), opNum, hX, hXShapeInfo, dX, + dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, + hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, hTADShapeInfo, + hTADOffsets, nullptr, nullptr); + } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } } @@ -654,36 +749,9 @@ void execScalar( void *hScalar, Nd4jLong *hScalarShapeInfo, void *dScalar, Nd4jLong *dScalarShapeInfo, void *extraParams) { - NativeOpExecutioner::execScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - hScalar, - hScalarShapeInfo, - dScalar, - dScalarShapeInfo, - extraParams); -} - -void execScalarBool( - Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hScalarShapeInfo, - void *dScalar, Nd4jLong *dScalarShapeInfo, - void *extraParams) { - - NativeOpExecutioner::execScalarBool(nullptr, - opNum, + try { + NativeOpExecutioner::execScalar(nullptr, + opNum, hX, hXShapeInfo, dX, @@ -696,7 +764,43 @@ void execScalarBool( hScalarShapeInfo, dScalar, dScalarShapeInfo, - extraParams); + extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } +} + +void execScalarBool( + Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hScalar, Nd4jLong *hScalarShapeInfo, + void *dScalar, Nd4jLong *dScalarShapeInfo, + void *extraParams) { + try { + NativeOpExecutioner::execScalarBool(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + hScalar, + hScalarShapeInfo, + dScalar, + dScalarShapeInfo, + extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -714,18 +818,23 @@ void execSummaryStatsScalar(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, bool biasCorrected) { - NativeOpExecutioner::execSummaryStatsScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - biasCorrected); + try { + NativeOpExecutioner::execSummaryStatsScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** * @@ -744,18 +853,23 @@ void execSummaryStats(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, bool biasCorrected) { - NativeOpExecutioner::execSummaryStats(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - biasCorrected); + try { + NativeOpExecutioner::execSummaryStats(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** * @@ -779,27 +893,31 @@ void execSummaryStatsTad(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, bool biasCorrected, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - NativeOpExecutioner::execSummaryStats(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - tadShapeInfo, - tadOffsets, - biasCorrected); - + NativeOpExecutioner::execSummaryStats(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + tadShapeInfo, + tadOffsets, + biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -820,20 +938,24 @@ void execTransformFloat( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - NativeOpExecutioner::execTransformFloat(nullptr, - opNum, - hX, - hXShapeInfo, - dZ, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams, - nullptr, - nullptr); + try { + NativeOpExecutioner::execTransformFloat(nullptr, + opNum, + hX, + hXShapeInfo, + dZ, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams, + nullptr, + nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execTransformSame( @@ -844,20 +966,24 @@ void execTransformSame( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - NativeOpExecutioner::execTransformSame(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams, - nullptr, - nullptr); + try { + NativeOpExecutioner::execTransformSame(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams, + nullptr, + nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execTransformBool( @@ -868,20 +994,24 @@ void execTransformBool( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - NativeOpExecutioner::execTransformBool(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams, - nullptr, - nullptr); + try { + NativeOpExecutioner::execTransformBool(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams, + nullptr, + nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execTransformAny( @@ -892,20 +1022,24 @@ void execTransformAny( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - NativeOpExecutioner::execTransformAny(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams, - nullptr, - nullptr); + try { + NativeOpExecutioner::execTransformAny(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams, + nullptr, + nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execTransformStrict( @@ -916,20 +1050,24 @@ void execTransformStrict( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - NativeOpExecutioner::execTransformStrict(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams, - nullptr, - nullptr); + try { + NativeOpExecutioner::execTransformStrict(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams, + nullptr, + nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduce3All(Nd4jPointer *extraPointers, @@ -948,158 +1086,18 @@ void execReduce3All(Nd4jPointer *extraPointers, Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - NativeOpExecutioner::execReduce3All(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParamsVals, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets); -} - - -template -void flattenGeneric(Nd4jPointer *extraPointers, - int offset, - char order, - void *vresult, - Nd4jLong *hZShapeInfo, - void *vinput, - Nd4jLong *inputShapeInfo) { - - auto hZ = reinterpret_cast(vresult); - auto input = reinterpret_cast(vinput); - - int numOnes = 0; - auto shape = shape::shapeOf(inputShapeInfo); - int wholeRank = shape::rank(inputShapeInfo); - for(int i = 0; i < wholeRank; i++) { - if(shape[i] == 1) - numOnes++; + NativeOpExecutioner::execReduce3All(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParamsVals, hY, + hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, + dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } - - - - //start at the given offset - hZ += offset; - char inputOrder = shape::order(inputShapeInfo); - auto len = shape::length(inputShapeInfo); - auto resultEleStride = shape::elementWiseStride(hZShapeInfo); - auto inputEleStride = shape::elementWiseStride(inputShapeInfo); - Nd4jLong numTads, stride; - int dimension, dimensionLength; - int rank = shape::rank(inputShapeInfo); - auto xStride = shape::stride(inputShapeInfo); - auto xShape = shape::shapeOf(inputShapeInfo); - - dimensionLength = 1; - if(order == 'f') { - dimension = 0; - } - else { - dimension = rank - 1; - } - stride = xStride[dimension]; - // numTads is product of length of all dimensions excluding - // the one we do the tad on - numTads = 1; - for (int i = 0; i < rank; i++) { - if (i != dimension) - numTads *= xShape[i]; - } - - if (inputOrder == order) { - if (resultEleStride == 1 && inputEleStride == 1) { - memcpy(hZ, input, len* sizeof(T)); - } - else if (resultEleStride >= 1 && inputEleStride >= 1) { - if (len < ELEMENT_THRESHOLD) { - - PRAGMA_OMP_SIMD - for (Nd4jLong i = 0; i < len; i++) { - hZ[i * resultEleStride] = input[i * inputEleStride]; - } - } - else { - - PRAGMA_OMP_PARALLEL_FOR_SIMD - for (Nd4jLong i = 0; i < len; i++) { - hZ[i * resultEleStride] = input[i * inputEleStride]; - } - } - } - else { - int idx = 0; - for(Nd4jLong i = 0; i < len; i++) - hZ[idx++] = input[shape::getIndexOffset(i, inputShapeInfo, len)]; - } - } - else { - int rank = shape::rank(inputShapeInfo); - auto xShape = shape::shapeOf(inputShapeInfo); - auto tadShape = xShape[dimension]; - - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(inputShapeInfo, dimension); - - PRAGMA_OMP_PARALLEL_FOR - for(int i = 0; i < numTads; i++) { - - Nd4jLong resultOffset; - - if (order == 'f') { - // 1. get c ordering coordinates - auto cIndexCoordinates = new Nd4jLong[rank - 1]; - Nd4jLong divisor = 1; - for (int dim = rank - 1; dim > 0; dim--) { - cIndexCoordinates[dim - 1] = (i / divisor) % xShape[dim]; - divisor *= xShape[dim]; - } - - - // 2. convert to f ordering index - int fIndex = 0; - Nd4jLong multiplier = 1; - for (int dim = 1; dim <= rank - 1; dim++) { - fIndex += cIndexCoordinates[dim - 1] * multiplier; - multiplier *= xShape[dim]; - } - - resultOffset = fIndex * tadShape; - delete[] cIndexCoordinates; - - } - else { - resultOffset = i * tadShape; - } - - auto tadOffset = tadPack.primaryOffsets()[i]; - for( int j = 0; j < tadShape; j++) { - - // TAD are returned in C ordering always - hZ[resultOffset + j] = input[tadOffset + j * stride]; - - } - } - } -} - - -/** - * Concatneate multi array of the same shape together - * along a particular dimension - */ -void concat( - Nd4jPointer *extraPointers, - int dimension, - int numArrays, - Nd4jPointer *data, Nd4jPointer *inputShapeInfo, - Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - Nd4jPointer *tadPointers, - Nd4jPointer *offsetPointers) { - - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - - BUILD_SINGLE_SELECTOR(zType, nd4j::SpecialMethods, ::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, hZ, hZShapeInfo), LIBND4J_TYPES); } /** @@ -1116,39 +1114,14 @@ void specialConcat( Nd4jLong *hZShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { + try { + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - - BUILD_SINGLE_SELECTOR(zType, nd4j::SpecialMethods, ::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, hZ, hZShapeInfo), LIBND4J_TYPES); -} - -/** -* Append an input array -* to the end of a flat array -* in a particular order -* @param offset the offset of the array to start at -* @param order the order -* @param hZ the hZ array -* @param hZShapeInfo the shape info for te array -* @param input the input for the array -* @param inputShapeInfo the shape information for that array -*/ -void flatten( - Nd4jPointer *extraPointers, - int offset, - char order, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *input, Nd4jLong *inputShapeInfo, - void *dinput, Nd4jLong *dinputShapeInfo) { - - auto xType = nd4j::ArrayOptions::dataType(inputShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - - if (xType != zType) - throw std::runtime_error("NativeOps::flatten requires all operands to have same data type"); - - BUILD_SINGLE_SELECTOR(xType, flattenGeneric, (extraPointers, offset, order, hZ, hZShapeInfo, input, inputShapeInfo), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(zType, nd4j::SpecialMethods,::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, hZ, hZShapeInfo), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -1324,7 +1297,13 @@ void setGridLimit(int gridSize) { nd4j::TadPack* tadOnlyShapeInfo(Nd4jLong *hXShapeInfo, int *dimension, int dimensionLength) { auto pack = new TadPack(); - *pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + try { + *pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } + return pack; } @@ -1421,9 +1400,14 @@ void pullRows(Nd4jPointer *extraPointers, Nd4jLong *tadOffsets, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + try { + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, pullRowsGeneric, (hX, hXShapeInfo, hZ, hZShapeInfo, n, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, pullRowsGeneric, (hX, hXShapeInfo, hZ, hZShapeInfo, n, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } template @@ -1474,9 +1458,14 @@ void tear(Nd4jPointer *extraPointers, Nd4jLong *hZShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + try { + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, tearGeneric, (hX, hXShapeInfo, targets, hZShapeInfo, tadShapeInfo, tadOffsets), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, tearGeneric, (hX, hXShapeInfo, targets, hZShapeInfo, tadShapeInfo, tadOffsets), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1488,9 +1477,14 @@ void average(Nd4jPointer *extras, int n, Nd4jLong length, bool propagate) { - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + try { + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::averageGeneric(hX, z, hZShapeInfo, n, length, propagate), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::averageGeneric(hX, z, hZShapeInfo, n, length, propagate), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void accumulate(Nd4jPointer *extras, @@ -1500,10 +1494,14 @@ void accumulate(Nd4jPointer *extras, void *dz, Nd4jLong *dZShapeInfo, int n, Nd4jLong length) { + try { + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - - BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::accumulateGeneric(hX, hz, hZShapeInfo, n, length), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::accumulateGeneric(hX, hz, hZShapeInfo, n, length), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void enableP2P(bool enable) { @@ -1613,14 +1611,20 @@ void shuffle(Nd4jPointer *extras, int *shuffleMap, Nd4jPointer *tadShapeInfo, Nd4jPointer *tadOffsets) { - auto xShape = reinterpret_cast(hXShapeInfo); - auto zShape = reinterpret_cast(hZShapeInfo); - auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); - auto tadOffset = reinterpret_cast(tadOffsets); + try { + auto xShape = reinterpret_cast(hXShapeInfo); + auto zShape = reinterpret_cast(hZShapeInfo); + auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); + auto tadOffset = reinterpret_cast(tadOffsets); - auto xType = nd4j::ArrayOptions::dataType(xShape[0]); + auto xType = nd4j::ArrayOptions::dataType(xShape[0]); - BUILD_SINGLE_SELECTOR(xType, shuffleGeneric, (hX, xShape, hz, zShape, N, shuffleMap, tadOnlyShapeInfo, tadOffset), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, shuffleGeneric, + (hX, xShape, hz, zShape, N, shuffleMap, tadOnlyShapeInfo, tadOffset), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1633,27 +1637,6 @@ void setOmpMinThreads(int threads) { // TODO: to be implemented } -/* -void execMetaPredicateShape(Nd4jPointer *extras, - const int opTypeA, - const int opNumA, - const int opTypeB, - const int opNumB, - Nd4jLong N, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraA, - void *extraB, - double scalarA, - double scalarB) { - // no-op; -} -*/ - int getDevice() { return 0; } @@ -1671,31 +1654,35 @@ void execScalarTad(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); - - NativeOpExecutioner::execScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - hScalars, - hScalarShapeInfo, - dScalars, - dScalarShapeInfo, - dimension, - shape::length(hDimensionShape), - tadShapeInfo, - tadOffsets, - tadShapeInfoZ, - tadOffsetsZ); + NativeOpExecutioner::execScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + hScalars, + hScalarShapeInfo, + dScalars, + dScalarShapeInfo, + dimension, + shape::length(hDimensionShape), + tadShapeInfo, + tadOffsets, + tadShapeInfoZ, + tadOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execScalarBoolTad(Nd4jPointer *extraPointers, @@ -1711,44 +1698,53 @@ void execScalarBoolTad(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); - - NativeOpExecutioner::execScalarBool(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - hScalars, - hScalarShapeInfo, - dScalars, - dScalarShapeInfo, - dimension, - dimensionLength, - tadShapeInfo, - tadOffsets, - tadShapeInfoZ, - tadOffsetsZ); + NativeOpExecutioner::execScalarBool(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + hScalars, + hScalarShapeInfo, + dScalars, + dScalarShapeInfo, + dimension, + dimensionLength, + tadShapeInfo, + tadOffsets, + tadShapeInfoZ, + tadOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } const char * getDeviceName(int deviceId) { - if (!nameSet) { - name = reinterpret_cast(malloc(256 * sizeof(char))); + try { + if (!nameSet) { + name = reinterpret_cast(malloc(256 * sizeof(char))); - CHECK_ALLOC(name, "Failed to allocate new string buffer", 256); + CHECK_ALLOC(name, "Failed to allocate new string buffer", 256); - std::memset(name, 0, 256 * sizeof(char)); - nameSet = true; + std::memset(name, 0, 256 * sizeof(char)); + nameSet = true; - // TODO: provide proper CPU model name here - sprintf(name, "x86-compatible CPU"); + // TODO: provide proper CPU model name here + sprintf(name, "x86-compatible CPU"); + } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } @@ -1768,8 +1764,12 @@ void execAggregate(Nd4jPointer *extraPointers,int opNum, void *realArguments, int numRealArguments, nd4j::DataType dtype) { - - BUILD_SINGLE_SELECTOR(dtype, NativeOpExecutioner::execAggregate, (nullptr, opNum, arguments, numArguments, shapeArguments, numShapeArguments, indexArguments, numIndexArguments, intArrays, numIntArrays, realArguments, numRealArguments), FLOAT_TYPES); + try { + BUILD_SINGLE_SELECTOR(dtype, NativeOpExecutioner::execAggregate, (nullptr, opNum, arguments, numArguments, shapeArguments, numShapeArguments, indexArguments, numIndexArguments, intArrays, numIntArrays, realArguments, numRealArguments), FLOAT_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1841,7 +1841,12 @@ void batchExecutor(Nd4jPointer *extraPointers, int maxReals, void *ptrToArguments, nd4j::DataType dtype) { - BUILD_SINGLE_SELECTOR(dtype, _batchExecutor, (extraPointers, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments, dtype), FLOAT_TYPES); + try { + BUILD_SINGLE_SELECTOR(dtype, _batchExecutor, (extraPointers, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments, dtype), FLOAT_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execAggregateBatch(Nd4jPointer *extraPointers, @@ -1855,7 +1860,12 @@ void execAggregateBatch(Nd4jPointer *extraPointers, int maxReals, void *ptrToArguments, nd4j::DataType dtype) { - BUILD_SINGLE_SELECTOR(dtype, _batchExecutor, (extraPointers, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments, dtype), FLOAT_TYPES); + try { + BUILD_SINGLE_SELECTOR(dtype, _batchExecutor, (extraPointers, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments, dtype), FLOAT_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1865,7 +1875,12 @@ void execRandom(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - NativeOpExecutioner::execRandom(nullptr, opNum, state, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + NativeOpExecutioner::execRandom(nullptr, opNum, state, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execRandom3(Nd4jPointer *extraPointers, @@ -1878,8 +1893,12 @@ void execRandom3(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - - NativeOpExecutioner::execRandom(nullptr, opNum, state, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + NativeOpExecutioner::execRandom(nullptr, opNum, state, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execRandom2(Nd4jPointer *extraPointers, @@ -1890,19 +1909,25 @@ void execRandom2(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - - NativeOpExecutioner::execRandom(nullptr, opNum, state, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + NativeOpExecutioner::execRandom(nullptr, opNum, state, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } Nd4jPointer initRandom(Nd4jPointer *extraPointers, long seed, long bufferSize, Nd4jPointer ptrToBuffer) { - graph::RandomGenerator* generator = new graph::RandomGenerator(seed, seed); -// auto ptrBuf = reinterpret_cast(ptrToBuffer); -// auto buffer = new nd4j::random::RandomBuffer(seed, bufferSize, reinterpret_cast(ptrBuf)); -// -// nd4j::random::Xoroshiro128 generator(buffer); -// generator.refreshBuffer(); -// - return (Nd4jPointer) generator; + try { + auto generator = new graph::RandomGenerator(seed, seed); + + return (Nd4jPointer) generator; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + + return nullptr; + } } void refreshBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer ptrRandom) { @@ -1953,7 +1978,12 @@ void sort(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, bool descending) { - NativeOpExecutioner::execSort(hX, hXShapeInfo, descending); + try { + NativeOpExecutioner::execSort(hX, hXShapeInfo, descending); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortTad(Nd4jPointer *extraPointers, @@ -1964,7 +1994,12 @@ void sortTad(Nd4jPointer *extraPointers, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending) { - NativeOpExecutioner::execSort(hX, hXShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); + try { + NativeOpExecutioner::execSort(hX, hXShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortCooIndices(Nd4jPointer *extraPointers, @@ -1972,7 +2007,12 @@ void sortCooIndices(Nd4jPointer *extraPointers, void *values, Nd4jLong length, int rank) { - NativeOpExecutioner::execSortCooIndices(indices, values, length, rank); + try { + NativeOpExecutioner::execSortCooIndices(indices, values, length, rank); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { @@ -1983,7 +2023,7 @@ Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInf Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length) { auto hZ = new Nd4jLong[2];errno = 0; - +try { #if defined(_WIN32) || defined(_WIN64) _mmap(hZ, static_cast(length), fileName); #else @@ -1992,7 +2032,7 @@ Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong le nd4j_printf("Errno: %i\n", errno); throw std::runtime_error("Failed to open file for MMAP"); } - void * ptr = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + void *ptr = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); // check for failed allocation if (ptr == MAP_FAILED) @@ -2004,7 +2044,11 @@ Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong le #endif return hZ; - +} catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; +} } void munmapFile(Nd4jPointer *extraPointers, Nd4jLong *ptrMap, Nd4jLong length) { @@ -2019,7 +2063,13 @@ void munmapFile(Nd4jPointer *extraPointers, Nd4jLong *ptrMap, Nd4jLong length) { } nd4j::graph::ResultWrapper* executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer) { - return nd4j::graph::GraphExecutioner::executeFlatBuffer(flatBufferPointer); + try { + return nd4j::graph::GraphExecutioner::executeFlatBuffer(flatBufferPointer); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getResultWrapperSize(nd4j::graph::ResultWrapper* ptr) { @@ -2061,8 +2111,14 @@ FORCEINLINE int estimateThresholdGeneric(Nd4jPointer *extraPointers, Nd4jPointer int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer hX, Nd4jLong *hXShapeInfo, int N, float threshold) { - auto xType = ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, return estimateThresholdGeneric, (extraPointers, hX, N, threshold), FLOAT_TYPES); + try { + auto xType = ArrayOptions::dataType(hXShapeInfo); + BUILD_SINGLE_SELECTOR(xType, return estimateThresholdGeneric, (extraPointers, hX, N, threshold), FLOAT_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 0; + } } Nd4jLong getShapeListSize(nd4j::ShapeList* list) { @@ -2122,9 +2178,15 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D } nd4j::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs); + return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::DeclarableOp *op, Nd4jPointer* inputShapes, int numInputShapes, double *tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { @@ -2147,16 +2209,28 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D } nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); + return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - auto context = reinterpret_cast(opContext); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + auto context = reinterpret_cast(opContext); - return op->execute(context); + return op->execute(context); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 20; + } } Nd4jStatus realExec(nd4j::ops::DeclarableOp* op, Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { @@ -2234,34 +2308,6 @@ Nd4jStatus realExec(nd4j::ops::DeclarableOp* op, Nd4jPointer* extraPointers, Nd4 outputs[e]->streamline(shape::order(reinterpret_cast(outputShapes[e]))); } -/* - if (!isInplace) { - if (hZ->size() != numOutputs) { - return ND4J_STATUS_BAD_OUTPUT; - } - - for (int e = 0; e < numOutputs; e++) { - auto buffer = (T *) outputBuffers[e]; - auto shape = (int *) outputShapes[e]; - nd4j::NDArray tmp(buffer, shape); - - if (tmp.lengthOf() != hZ->at(e)->lengthOf()) { - nd4j_printf("Provided output array for [%s] has length of %i, but actual hZ has length of %i\n", op->getOpName()->c_str(), tmp.lengthOf(), hZ->at(e)->lengthOf()); - return ND4J_STATUS_BAD_OUTPUT; - } - - tmp.assign(hZ->at(e)); - } - } else { - // if op is inplace, our ResultSet holds pointers - hZ->purge(); - } - - - delete hZ; - -*/ - for (auto v: inputs) delete v; @@ -2273,16 +2319,28 @@ Nd4jStatus realExec(nd4j::ops::DeclarableOp* op, Nd4jPointer* extraPointers, Nd4 int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer) { - auto graph = nd4j::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); + try { + auto graph = nd4j::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); - nd4j::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); + nd4j::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); - return ND4J_STATUS_OK; + return ND4J_STATUS_OK; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } static VariablesSet* executeStoredGraphT(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) { @@ -2478,7 +2536,13 @@ Nd4jStatus execCustomOpWithScope_(Nd4jPointer *extraPointers, nd4j::graph::Graph } Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs) { - return execCustomOpWithScope_(extraPointers, reinterpret_cast(state), opHash, scopes, numScopes, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs); + try { + return execCustomOpWithScope_(extraPointers, reinterpret_cast(state), opHash, scopes, numScopes, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } void deleteResultWrapper(Nd4jPointer ptr) { @@ -2704,73 +2768,98 @@ void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, int* hIindexes, int* dIindexes) { + try { - int numThreads = omp_get_max_threads(); + int numThreads = omp_get_max_threads(); - PRAGMA_OMP_PARALLEL_THREADS(numThreads) - { - for (int i = 0; i < numOfSubArrs; ++i) { + PRAGMA_OMP_PARALLEL_THREADS(numThreads) + { + for (int i = 0; i < numOfSubArrs; ++i) { - int threadIndex = omp_get_thread_num(); - const auto xIndex = hIindexes[i]; - const bool isOwner = xIndex < numThreads ? threadIndex == xIndex : threadIndex == xIndex % numThreads; + int threadIndex = omp_get_thread_num(); + const auto xIndex = hIindexes[i]; + const bool isOwner = xIndex < numThreads ? threadIndex == xIndex : threadIndex == xIndex % numThreads; - if (!isOwner) - continue; - - NDArray inSubArr(reinterpret_cast(hX) + (hXOffsets[hIindexes[i]] * DataTypeUtils::sizeOf(hXShapeInfo)), hXShapeInfo); - NDArray updSubArr(reinterpret_cast(hY) + (hYOffsets[i] * DataTypeUtils::sizeOf(hXShapeInfo)), hYShapeInfo); - - if (inSubArr.lengthOf() != updSubArr.lengthOf()) { - continue; - } - - switch (opCode) { - case 0: - inSubArr.applyPairwiseTransform(pairwise::Add, &updSubArr, &inSubArr, nullptr); - break; - case 1: - inSubArr.applyPairwiseTransform(pairwise::Subtract, &updSubArr, &inSubArr, nullptr); - break; - case 2: - inSubArr.applyPairwiseTransform(pairwise::Multiply, &updSubArr, &inSubArr, nullptr); - break; - case 3: - inSubArr.applyPairwiseTransform(pairwise::Divide, &updSubArr, &inSubArr, nullptr); - break; - case 4: - inSubArr.applyPairwiseTransform(pairwise::ReverseSubtract, &updSubArr, &inSubArr, nullptr); - break; - case 5: - inSubArr.applyPairwiseTransform(pairwise::ReverseDivide, &updSubArr, &inSubArr, nullptr); - break; - case 6: - inSubArr.applyPairwiseTransform(pairwise::CopyPws, &updSubArr, &inSubArr, nullptr); - break; - default: + if (!isOwner) continue; + + NDArray inSubArr( + reinterpret_cast(hX) + (hXOffsets[hIindexes[i]] * DataTypeUtils::sizeOf(hXShapeInfo)), + hXShapeInfo); + NDArray updSubArr(reinterpret_cast(hY) + (hYOffsets[i] * DataTypeUtils::sizeOf(hXShapeInfo)), + hYShapeInfo); + + if (inSubArr.lengthOf() != updSubArr.lengthOf()) { + continue; + } + + switch (opCode) { + case 0: + inSubArr.applyPairwiseTransform(pairwise::Add, &updSubArr, &inSubArr, nullptr); + break; + case 1: + inSubArr.applyPairwiseTransform(pairwise::Subtract, &updSubArr, &inSubArr, nullptr); + break; + case 2: + inSubArr.applyPairwiseTransform(pairwise::Multiply, &updSubArr, &inSubArr, nullptr); + break; + case 3: + inSubArr.applyPairwiseTransform(pairwise::Divide, &updSubArr, &inSubArr, nullptr); + break; + case 4: + inSubArr.applyPairwiseTransform(pairwise::ReverseSubtract, &updSubArr, &inSubArr, nullptr); + break; + case 5: + inSubArr.applyPairwiseTransform(pairwise::ReverseDivide, &updSubArr, &inSubArr, nullptr); + break; + case 6: + inSubArr.applyPairwiseTransform(pairwise::CopyPws, &updSubArr, &inSubArr, nullptr); + break; + default: + continue; + } } } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } } void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo) { - auto p = reinterpret_cast(debugInfo); - NDArray array(buffer, shapeInfo); - nd4j::DebugHelper::retrieveDebugStatistics(p, &array); + try { + auto p = reinterpret_cast(debugInfo); + NDArray array(buffer, shapeInfo); + nd4j::DebugHelper::retrieveDebugStatistics(p, &array); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { - auto buf = reinterpret_cast(p); - int cnt = 0; - for (int i = 0; i < len; i++) - cnt += buf[cnt]; + try { + auto buf = reinterpret_cast(p); + int cnt = 0; + for (int i = 0; i < len; i++) + cnt += buf[cnt]; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } nd4j::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty) { - auto buffer = new ConstantDataBuffer(); - *buffer = nd4j::ConstantShapeHelper::getInstance()->bufferForShapeInfo(ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); - return buffer; + try { + auto buffer = new ConstantDataBuffer(); + *buffer = nd4j::ConstantShapeHelper::getInstance()->bufferForShapeInfo( + ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); + return buffer; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } void deleteShapeBuffer(nd4j::ConstantDataBuffer* ptr) { @@ -2790,7 +2879,13 @@ nd4j::ConstantDataBuffer* constantBufferDouble(nd4j::DataType dtype, double *dat } nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor) { - return nd4j::ConstantHelper::getInstance()->constantBuffer(*descriptor, dtype); + try { + return nd4j::ConstantHelper::getInstance()->constantBuffer(*descriptor, dtype); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jPointer getConstantDataBufferPrimary(nd4j::ConstantDataBuffer* dbf) { @@ -2808,7 +2903,13 @@ Nd4jLong getConstantDataBufferSizeOf(nd4j::ConstantDataBuffer* dbf) { nd4j::graph::Context* createGraphContext(int nodeId) { - return new nd4j::graph::Context(nodeId); + try { + return new nd4j::graph::Context(nodeId); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } nd4j::graph::RandomGenerator* getGraphContextRandomGenerator(nd4j::graph::Context* ptr) { return &ptr->randomGenerator(); @@ -2872,32 +2973,38 @@ int dataTypeFromNpyHeader(void *header) { } Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { - cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); - unsigned int shapeSize = arr.shape.size(); - std::vector shape(shapeSize); - bool _empty = false; - for(unsigned int i = 0; i < shapeSize; i++) { - shape[i] = arr.shape[i]; + try { + cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); + unsigned int shapeSize = arr.shape.size(); + std::vector shape(shapeSize); + bool _empty = false; + for (unsigned int i = 0; i < shapeSize; i++) { + shape[i] = arr.shape[i]; - if (arr.shape[i] == 0) - _empty = true; + if (arr.shape[i] == 0) + _empty = true; + } + + auto dtype = cnpy::dataTypeFromHeader(reinterpret_cast(npyArray)); + + Nd4jLong *shapeBuffer; + if (shape.size() == 1 && shape[0] == 0) { + // scalar case + shapeBuffer = nd4j::ShapeBuilders::createScalarShapeInfo(dtype); + } else if (_empty) { + if (shapeSize > 0) + shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); + else + shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype); + } else { + shapeBuffer = nd4j::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); + } + return reinterpret_cast(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; } - - auto dtype = cnpy::dataTypeFromHeader(reinterpret_cast(npyArray)); - - Nd4jLong *shapeBuffer; - if (shape.size() == 1 && shape[0] == 0) { - // scalar case - shapeBuffer = nd4j::ShapeBuilders::createScalarShapeInfo(dtype); - } else if (_empty) { - if (shapeSize > 0) - shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); - else - shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype); - } else { - shapeBuffer = nd4j::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); - } - return reinterpret_cast(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); } void sortByKey(Nd4jPointer *extraPointers, @@ -2906,10 +3013,15 @@ void sortByKey(Nd4jPointer *extraPointers, void *y, Nd4jLong *yShapeInfo, void *dy, Nd4jLong *dyShapeInfo, bool descending) { - auto xType = ArrayOptions::dataType(xShapeInfo); - auto yType = ArrayOptions::dataType(yShapeInfo); + try { + auto xType = ArrayOptions::dataType(xShapeInfo); + auto yType = ArrayOptions::dataType(yShapeInfo); - BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortByKey(x, xShapeInfo, y, yShapeInfo, descending), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortByKey(x, xShapeInfo, y, yShapeInfo, descending), LIBND4J_TYPES, LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortByValue(Nd4jPointer *extraPointers, @@ -2918,11 +3030,15 @@ void sortByValue(Nd4jPointer *extraPointers, void *y, Nd4jLong *yShapeInfo, void *dy, Nd4jLong *dyShapeInfo, bool descending) { + try { + auto xType = ArrayOptions::dataType(xShapeInfo); + auto yType = ArrayOptions::dataType(yShapeInfo); - auto xType = ArrayOptions::dataType(xShapeInfo); - auto yType = ArrayOptions::dataType(yShapeInfo); - - BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortByValue(x, xShapeInfo, y, yShapeInfo, descending), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortByValue(x, xShapeInfo, y, yShapeInfo, descending), LIBND4J_TYPES, LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortTadByKey(Nd4jPointer *extraPointers, @@ -2933,10 +3049,15 @@ void sortTadByKey(Nd4jPointer *extraPointers, int *dimension, int dimensionLength, bool descending) { - auto xType = ArrayOptions::dataType(xShapeInfo); - auto yType = ArrayOptions::dataType(yShapeInfo); + try { + auto xType = ArrayOptions::dataType(xShapeInfo); + auto yType = ArrayOptions::dataType(yShapeInfo); - BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByKey(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByKey(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortTadByValue(Nd4jPointer *extraPointers, @@ -2947,24 +3068,35 @@ void sortTadByValue(Nd4jPointer *extraPointers, int *dimension, int dimensionLength, bool descending) { - auto xType = ArrayOptions::dataType(xShapeInfo); - auto yType = ArrayOptions::dataType(yShapeInfo); + try { + auto xType = ArrayOptions::dataType(xShapeInfo); + auto yType = ArrayOptions::dataType(yShapeInfo); - BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByValue(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByValue(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } const char* runLightBenchmarkSuit(bool printOut) { - nd4j::LightBenchmarkSuit suit; - auto result = suit.runSuit(); + try { + nd4j::LightBenchmarkSuit suit; + auto result = suit.runSuit(); - if (printOut) - nd4j_printf("%s\n", result.data()); + if (printOut) + nd4j_printf("%s\n", result.data()); - auto chars = new char[result.length()+1]; - std::memcpy(chars, result.data(), result.length()); - chars[result.length()] = (char) 0x0; + auto chars = new char[result.length() + 1]; + std::memcpy(chars, result.data(), result.length()); + chars[result.length()] = (char) 0x0; - return chars; + return chars; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getCachedMemory(int deviceId) { @@ -2972,17 +3104,23 @@ Nd4jLong getCachedMemory(int deviceId) { } const char* runFullBenchmarkSuit(bool printOut) { - nd4j::FullBenchmarkSuit suit; - auto result = suit.runSuit(); + try { + nd4j::FullBenchmarkSuit suit; + auto result = suit.runSuit(); - if (printOut) - nd4j_printf("%s\n", result.data()); + if (printOut) + nd4j_printf("%s\n", result.data()); - auto chars = new char[result.length()+1]; - std::memcpy(chars, result.data(), result.length()); - chars[result.length()] = (char) 0x0; + auto chars = new char[result.length() + 1]; + std::memcpy(chars, result.data(), result.length()); + chars[result.length()] = (char) 0x0; - return chars; + return chars; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } nd4j::LaunchContext* defaultLaunchContext() { @@ -3017,8 +3155,14 @@ Nd4jPointer lcSolverHandle(OpaqueLaunchContext* lc) { return nullptr; } +int lastErrorCode() { + return nd4j::LaunchContext::defaultContext()->errorReference()->errorCode(); +} + +const char* lastErrorMessage() { + return nd4j::LaunchContext::defaultContext()->errorReference()->errorMessage(); +} -BUILD_SINGLE_TEMPLATE(template void flattenGeneric,(Nd4jPointer*, int, char, void*, Nd4jLong*, void*, Nd4jLong*), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void pullRowsGeneric, (void *, Nd4jLong*, void*, Nd4jLong*, const int, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void tearGeneric, (void *, Nd4jLong*, Nd4jPointer*, Nd4jLong*, Nd4jLong*, Nd4jLong*), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void shuffleGeneric, (void**, Nd4jLong**, void**, Nd4jLong**, int, int*, Nd4jLong**, Nd4jLong**), LIBND4J_TYPES); diff --git a/libnd4j/blas/cuda/NativeOps.cu b/libnd4j/blas/cuda/NativeOps.cu index e75aa422c..626b0ea26 100755 --- a/libnd4j/blas/cuda/NativeOps.cu +++ b/libnd4j/blas/cuda/NativeOps.cu @@ -68,21 +68,6 @@ int minThreads = 32; __constant__ char deviceConstantMemory[49152]; -typedef struct { - long streamId; - long callId; -} __syncInfo; - -typedef __syncInfo SyncInfo; - - -// this method isn't used, left here for legacy and caution purposes -// TLDR: don't use this way, it sucks -void CUDART_CB syncCallback(cudaStream_t stream, cudaError_t status, void *data){ - SyncInfo *sync = reinterpret_cast(data); - - //printf("Finished stream: [%i], kernel call: [%i]\n", sync->streamId, sync->callId); -} // this method just does type conversion in fancy way int getDeviceId(Nd4jPointer ptrToDeviceId) { @@ -250,9 +235,14 @@ void execPairwiseTransform( Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execPairwiseTransform(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execPairwiseTransform(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -265,9 +255,14 @@ void execPairwiseTransformBool(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execPairwiseBoolTransform(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execPairwiseBoolTransform(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, + dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -279,9 +274,14 @@ void execSummaryStatsScalar(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, bool biasCorrected) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execSummaryStatsScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, biasCorrected); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execSummaryStatsScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo, biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -295,24 +295,30 @@ void execBroadcastBool(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { + try { + //Nd4jLong *tadOnlyShapeInfo = reinterpret_cast(extraPointers[0]); + //Nd4jLong *tadOffsets = reinterpret_cast(extraPointers[1]); + //Nd4jLong *tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[2]); + //Nd4jLong *tadOffsetsZ = reinterpret_cast(extraPointers[3]); - //Nd4jLong *tadOnlyShapeInfo = reinterpret_cast(extraPointers[0]); - //Nd4jLong *tadOffsets = reinterpret_cast(extraPointers[1]); - //Nd4jLong *tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[2]); - //Nd4jLong *tadOffsetsZ = reinterpret_cast(extraPointers[3]); + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); - - auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); - auto tadOnlyShapeInfo = reinterpret_cast(extraPointers[10]); - auto tadOffsets = reinterpret_cast(extraPointers[11]); - auto tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[12]); - auto tadOffsetsZ = reinterpret_cast(extraPointers[13]); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execBroadcastBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); + auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); + auto tadOnlyShapeInfo = reinterpret_cast(extraPointers[10]); + auto tadOffsets = reinterpret_cast(extraPointers[11]); + auto tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[12]); + auto tadOffsetsZ = reinterpret_cast(extraPointers[13]); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execBroadcastBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, + dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, + tadOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -338,38 +344,33 @@ void execBroadcast( void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { -/* - cudaEvent_t start; - cudaEventCreateWithFlags(&start, cudaEventDisableTiming); - timespec tsX; - timespec tsY; - clock_gettime(CLOCK_REALTIME, &tsX); -*/ - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); - auto tadOnlyShapeInfo = reinterpret_cast(extraPointers[10]); - auto tadOffsets = reinterpret_cast(extraPointers[11]); - auto tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[12]); - auto tadOffsetsZ = reinterpret_cast(extraPointers[13]); + auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); + auto tadOnlyShapeInfo = reinterpret_cast(extraPointers[10]); + auto tadOffsets = reinterpret_cast(extraPointers[11]); + auto tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[12]); + auto tadOffsetsZ = reinterpret_cast(extraPointers[13]); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(hYShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(hYShapeInfo); + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("F3 opNum:[%i]\n", opNum); + if (nd4j::Environment::getInstance()->isDebugAndVerbose()) + printf("F3 opNum:[%i]\n", opNum); - //Nd4jLong *tadOnlyShapeInfo = reinterpret_cast(extraPointers[0]); - //Nd4jLong *tadOffsets = reinterpret_cast(extraPointers[1]); - //Nd4jLong *tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[2]); - //Nd4jLong *tadOffsetsZ = reinterpret_cast(extraPointers[3]); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execBroadcast(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execBroadcast(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, + tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -390,9 +391,14 @@ void execReduceFloat(Nd4jPointer *extraPointers, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceFloatScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceFloatScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -403,9 +409,14 @@ void execReduceSame(Nd4jPointer *extraPointers, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceSameScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceSameScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -418,13 +429,22 @@ void execReduceSame2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceSame(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceSame(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), + tadPack.specialOffsets()); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -437,13 +457,22 @@ void execReduceLong2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceLong(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceLong(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), + tadPack.specialOffsets()); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -454,30 +483,37 @@ void execReduceLong(Nd4jPointer *extraPointers, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { + try { + auto stream = reinterpret_cast(extraPointers[1]); + auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); + auto dTADShapeInfo = reinterpret_cast(extraPointers[10]); - auto stream = reinterpret_cast(extraPointers[1]); - auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); - auto dTADShapeInfo = reinterpret_cast(extraPointers[10]); + if (nd4j::Environment::getInstance()->isDebugAndVerbose()) + printf("LF7 opNum:[%i]\n", opNum); - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("LF7 opNum:[%i]\n", opNum); + auto reductionPointer = reinterpret_cast(extraPointers[4]); - auto reductionPointer = reinterpret_cast(extraPointers[4]); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); + if (zType != nd4j::DataType::INT64) + throw datatype_exception::build("execReduceLong wrong Z data type", nd4j::DataType::INT64, zType); - if (zType != nd4j::DataType::INT64) - throw datatype_exception::build("execReduceLong wrong Z data type", nd4j::DataType::INT64, zType); + auto xLength = shape::length(hXShapeInfo); + auto blockWidth = 256; + auto numBlocks = CudaLaunchHelper::getReductionBlocks(xLength, blockWidth); + dim3 launchDims(numBlocks, blockWidth, 32768); - auto xLength = shape::length(hXShapeInfo); - auto blockWidth = 256; - auto numBlocks = CudaLaunchHelper::getReductionBlocks(xLength, blockWidth); - dim3 launchDims(numBlocks, blockWidth, 32768); + BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceLongFunction, + ::execReduceScalar(launchDims, stream, opNum, dX, dXShapeInfo, hXShapeInfo, extraParams, + dZ, dZShapeInfo, hXShapeInfo, nullptr, 0, reductionPointer, + dTADShapeInfo), LIBND4J_TYPES, LONG_TYPES); - BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceLongFunction, ::execReduceScalar(launchDims, stream, opNum, dX, dXShapeInfo, hXShapeInfo, extraParams, dZ, dZShapeInfo, hXShapeInfo, nullptr, 0, reductionPointer, dTADShapeInfo), LIBND4J_TYPES, LONG_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "execReduceLong(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "execReduceLong(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -490,13 +526,22 @@ void execReduceBool2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), + tadPack.specialOffsets()); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -507,30 +552,37 @@ void execReduceBool(Nd4jPointer *extraPointers, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { + try { + auto stream = reinterpret_cast(extraPointers[1]); + auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); + auto dTADShapeInfo = reinterpret_cast(extraPointers[10]); - auto stream = reinterpret_cast(extraPointers[1]); - auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); - auto dTADShapeInfo = reinterpret_cast(extraPointers[10]); + if (nd4j::Environment::getInstance()->isDebugAndVerbose()) + printf("BF7 opNum:[%i]\n", opNum); - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("BF7 opNum:[%i]\n", opNum); + auto reductionPointer = reinterpret_cast(extraPointers[4]); - auto reductionPointer = reinterpret_cast(extraPointers[4]); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); + if (zType != nd4j::DataType::BOOL) + throw std::runtime_error("execReduceBool requires Z operand to have BOOL type"); - if (zType != nd4j::DataType::BOOL) - throw std::runtime_error("execReduceBool requires Z operand to have BOOL type"); + auto xLength = shape::length(hXShapeInfo); + auto blockWidth = 256; + auto numBlocks = CudaLaunchHelper::getReductionBlocks(xLength, blockWidth); + dim3 launchDims(numBlocks, blockWidth, 32768); - auto xLength = shape::length(hXShapeInfo); - auto blockWidth = 256; - auto numBlocks = CudaLaunchHelper::getReductionBlocks(xLength, blockWidth); - dim3 launchDims(numBlocks, blockWidth, 32768); + BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceBoolFunction, + ::execReduceScalar(launchDims, stream, opNum, dX, dXShapeInfo, hXShapeInfo, extraParams, + dZ, dZShapeInfo, hZShapeInfo, nullptr, 0, reductionPointer, + dTADShapeInfo), LIBND4J_TYPES, BOOL_TYPES); - BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceBoolFunction, ::execReduceScalar(launchDims, stream, opNum, dX, dXShapeInfo, hXShapeInfo, extraParams, dZ, dZShapeInfo, hZShapeInfo, nullptr, 0, reductionPointer, dTADShapeInfo), LIBND4J_TYPES, BOOL_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "execReduceBool(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "execReduceBool(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -554,13 +606,22 @@ void execIndexReduce(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execIndexReduce(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execIndexReduce(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), + tadPack.specialOffsets()); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -582,13 +643,22 @@ void execReduceFloat2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceFloat(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceFloat(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), + tadPack.specialOffsets()); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -607,9 +677,14 @@ void execIndexReduceScalar( void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo){ - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execIndexReduceScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execIndexReduceScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -619,12 +694,17 @@ void execTransformSame(Nd4jPointer *extraPointers,int opNum, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { + try { + auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[0] : nullptr); + auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[1] : nullptr); - auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[0] : nullptr); - auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[1] : nullptr); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execTransformSame(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execTransformSame(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -634,12 +714,17 @@ void execTransformBool(Nd4jPointer *extraPointers,int opNum, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { + try { + auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[0] : nullptr); + auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[1] : nullptr); - auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[0] : nullptr); - auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[1] : nullptr); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execTransformBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execTransformBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -649,12 +734,18 @@ void execTransformAny(Nd4jPointer *extraPointers,int opNum, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { + try { + auto stream = reinterpret_cast(extraPointers[1]); + auto streamSpecial = reinterpret_cast(extraPointers[4]); + LaunchContext lc(stream, streamSpecial, extraPointers[5], extraPointers[3], + reinterpret_cast(extraPointers[6])); - auto stream = reinterpret_cast(extraPointers[1]); - auto streamSpecial = reinterpret_cast(extraPointers[4]); - LaunchContext lc(stream, streamSpecial, extraPointers[5], extraPointers[3], reinterpret_cast(extraPointers[6])); - - NativeOpExecutioner::execTransformAny(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformAny(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraParams, nullptr, nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -664,12 +755,17 @@ void execTransformStrict(Nd4jPointer *extraPointers,int opNum, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { + try { + auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[10] : nullptr); + auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[11] : nullptr); - auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[10] : nullptr); - auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[11] : nullptr); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execTransformStrict(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execTransformStrict(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -679,55 +775,19 @@ void execTransformFloat(Nd4jPointer *extraPointers,int opNum, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { + try { + auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[10] : nullptr); + auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[11] : nullptr); - auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[10] : nullptr); - auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[11] : nullptr); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execTransformFloat(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execTransformFloat(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } - -/** - * Append an input array - * to the end of a flat array - * in a particular order - * @param offset the offset of the array to start at - * @param order the order - * @param dZ the dZ array - * @param dZShapeInfo the shape info for te array - * @param input the input for the array - * @param inputShapeInfo the shape information for that array - */ -void flatten(Nd4jPointer *extraPointers, - int offset, - char order, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hInput, Nd4jLong *hInputShapeInfo, - void *dInput, Nd4jLong *dInputShapeInfo) { - - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - auto hYShapeInfo = reinterpret_cast(extraPointers[7]); - - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("F22 opNum:[7]\n"); - - // int *allocPointer = reinterpret_cast(extraPointers[3]); - - dim3 launchDims(256, 256, 2048); - - if (nd4j::Environment::getInstance()->isVerbose() && launchDims.x == 1) - printf("AF222 opNum:[7]\n"); - - auto type = nd4j::ArrayOptions::dataType(hInputShapeInfo); - BUILD_SINGLE_SELECTOR(type, flattenKernelGeneric, (launchDims, stream, extraPointers, offset, order, dZ, dZShapeInfo, dInput, dInputShapeInfo), LIBND4J_TYPES); - - DEBUG_KERNEL(stream, -1); -} - - - void checkP2P() { int curDevice = 0; @@ -821,23 +881,28 @@ bool isP2PAvailable() { void initializeDevicesAndFunctions() { - int devCnt = 0; - cudaGetDeviceCount(&devCnt); - deviceProperties = new cudaDeviceProp[devCnt]; - for (int i = 0; i < devCnt; i++) { - cudaSetDevice(i); - cudaGetDeviceProperties(&deviceProperties[i], i); + try { + int devCnt = 0; + cudaGetDeviceCount(&devCnt); + deviceProperties = new cudaDeviceProp[devCnt]; + for (int i = 0; i < devCnt; i++) { + cudaSetDevice(i); + cudaGetDeviceProperties(&deviceProperties[i], i); - cudaDeviceSetLimit(cudaLimitStackSize, 4096); - } + cudaDeviceSetLimit(cudaLimitStackSize, 4096); + } - cudaSetDevice(0); + cudaSetDevice(0); - checkP2P(); + checkP2P(); - // enabling p2p gpu access if it's supported - if (supportedP2P && devCnt > 1) - enableP2P(allowedP2P); + // enabling p2p gpu access if it's supported + if (supportedP2P && devCnt > 1) + enableP2P(allowedP2P); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void initializeFunctions(Nd4jPointer *functions) { @@ -867,8 +932,10 @@ Nd4jPointer mallocHost(Nd4jLong memorySize, int flags) { Nd4jPointer pointer; // cudaHostAllocMapped |cudaHostAllocPortable auto res = cudaHostAlloc(reinterpret_cast(&pointer), memorySize, cudaHostAllocDefault); - if (res != 0) - throw nd4j::cuda_exception::build("cudaHostAlloc(...) failed", res); + if (res != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(res); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaHostAlloc failed"); + } return pointer; } @@ -884,8 +951,11 @@ Nd4jPointer mallocHost(Nd4jLong memorySize, int flags) { Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags) { Nd4jPointer pointer; auto res = cudaMalloc(reinterpret_cast(&pointer), memorySize); - if (res != 0) - throw nd4j::cuda_exception::build("cudaMalloc(...) failed", res); + if (res != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(res); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaMalloc failed"); + } + return pointer; } @@ -896,8 +966,11 @@ Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags) { */ int freeHost(Nd4jPointer pointer) { auto res = cudaFreeHost(reinterpret_cast(pointer)); - if (res != 0) - throw nd4j::cuda_exception::build("cudaFreeHost(...) failed", res); + if (res != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(res); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaFreeHost failed"); + } + return 1L; } @@ -909,10 +982,14 @@ int freeHost(Nd4jPointer pointer) { */ int freeDevice(Nd4jPointer pointer, int deviceId) { auto res = cudaFree(reinterpret_cast(pointer)); - if (res != 0) - throw nd4j::cuda_exception::build("cudaFree(...) failed", res); - return 1L; + // we're intentionally skipping + if (res != 0 && res != 1) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(res); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaFree failed"); + } + + return res == 0 ? 1L : 0L; } @@ -921,22 +998,13 @@ Nd4jPointer createContext() { } Nd4jPointer createStream() { - /* - Nd4jPointer nativeStream = (Nd4jPointer) malloc(sizeof(cudaStream_t)); - CHECK_ALLOC(nativeStream, "Failed to allocate memory for new CUDA stream", sizeof(cudaStream_t)); - - cudaError_t dZ = cudaStreamCreate(reinterpret_cast(&nativeStream)); - checkCudaErrors(dZ); - if (dZ != 0) - throw std::runtime_error("cudaStreamCreate(...) failed"); - - return nativeStream; - */ auto stream = new cudaStream_t(); auto dZ = cudaStreamCreate(stream); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaStreamCreate(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaStreamCreate failed"); + } return stream; } @@ -947,9 +1015,10 @@ Nd4jPointer createEvent() { CHECK_ALLOC(nativeEvent, "Failed to allocate new CUDA event buffer", sizeof(cudaEvent_t)); auto dZ = cudaEventCreateWithFlags(reinterpret_cast(&nativeEvent), cudaEventDisableTiming); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaEventCreateWithFlags(...) failed", dZ); - + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaEventCreateWithFlags failed"); + } return nativeEvent; } @@ -959,8 +1028,10 @@ int registerEvent(Nd4jPointer event, Nd4jPointer stream) { auto pStream = reinterpret_cast(stream); auto dZ = cudaEventRecord(*pEvent, *pStream); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaEventRecord(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaEventRecord failed"); + } return 1; } @@ -1048,8 +1119,11 @@ int memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4j kind = cudaMemcpyDeviceToDevice; } break; - default: - throw nd4j::cuda_exception::build("UNDEFINED MEMCPY!\n", 119); + default: { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("UNDEFNED MEMCPY"); + return 0; + } } auto dZ = cudaMemcpyAsync(reinterpret_cast(dst), const_cast(reinterpret_cast(src)), static_cast(size), kind, *pStream); @@ -1058,7 +1132,8 @@ int memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4j printf("Failed on [%lu] -> [%lu], size: [%i], direction: [%i], dZ: [%i]\n", src, dst, size, flags, static_cast(dZ)); fflush(stdout); fflush(stderr); - throw nd4j::cuda_exception::build("cudaMemcpyAsync(...) failed", dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaMemcpyAsync failed"); } return 1; @@ -1066,8 +1141,10 @@ int memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4j int memsetSync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { auto dZ = cudaMemset(reinterpret_cast(dst), value, static_cast(size)); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaMemset(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaMemset failed"); + } return 1; } @@ -1076,8 +1153,10 @@ int memsetAsync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointe auto pStream = reinterpret_cast(reserved); auto dZ = cudaMemsetAsync(reinterpret_cast(dst), value, static_cast(size), *pStream); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaMemsetAsync(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaMemsetAsync failed"); + } return 1; } @@ -1085,8 +1164,10 @@ int memsetAsync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointe int destroyEvent(Nd4jPointer event) { auto pEvent = reinterpret_cast(&event); auto dZ = cudaEventDestroy(*pEvent); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaEvenDestroy(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaEventDestroy failed"); + } return 1; } @@ -1095,8 +1176,10 @@ int streamSynchronize(Nd4jPointer stream) { auto pStream = reinterpret_cast(stream); auto dZ = cudaStreamSynchronize(*pStream); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaStreamSynchronize(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaStreamSynchronize failed"); + } return 1L; } @@ -1105,8 +1188,10 @@ int eventSynchronize(Nd4jPointer event) { auto pEvent = reinterpret_cast(&event); auto dZ = cudaEventSynchronize(*pEvent); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaEventSynchronize(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaEventSynchronize failed"); + } return 1L; } @@ -1162,268 +1247,6 @@ const char * getDeviceName(int device) { return deviceProperties[device].name; } -/////////////////////////////////////////////////////////////////// -template -__global__ static void concatCuda(const int numOfArrs, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo) { - - __shared__ int arrIdx, blocksPerArr; - __shared__ T *x, *z; - __shared__ Nd4jLong *zShapeInfo, *xShapeInfo, arrLen, arrLenZ, arrLenPerBlock, start, end; - - if (threadIdx.x == 0) { - blocksPerArr = (gridDim.x - gridDim.x % numOfArrs) / numOfArrs; // floor - arrIdx = blockIdx.x / blocksPerArr; - if (arrIdx >= numOfArrs) - arrIdx = numOfArrs - 1; - x = reinterpret_cast(reinterpret_cast(pVx)[arrIdx]); - z = reinterpret_cast(reinterpret_cast(pVz)[arrIdx]); - xShapeInfo = reinterpret_cast(pxShapeInfo)[arrIdx]; - zShapeInfo = reinterpret_cast(pzShapeInfo)[arrIdx]; - - arrLen = shape::length(xShapeInfo); - arrLenZ = shape::length(zShapeInfo); - arrLenPerBlock = (arrLen + blocksPerArr - arrLen % blocksPerArr) / blocksPerArr; // ceil - - start = arrLenPerBlock * (blockIdx.x % blocksPerArr); - end = (start + arrLenPerBlock) > arrLen ? arrLen : (start + arrLenPerBlock); - } - __syncthreads(); - - for (Nd4jLong i = threadIdx.x + start; i < end; i += blockDim.x) { - auto zOffset = shape::getIndexOffset(i, zShapeInfo, arrLenZ); - auto xOffset = shape::getIndexOffset(i, xShapeInfo, arrLen); - //printf("z[%i][%lld] = x[%i][%lld]\n", arrIdx, zOffset, arrIdx, xOffset); - z[zOffset] = x[xOffset]; - } -} -template -__host__ static void concatCudaLauncher(const int numOfArrs, cudaStream_t *stream, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo) { - //int blocks = numOfArrs * 16; // >> 1 << 2); - //nd4j_printf("gridDim.x is %i\n", blocks); - //if (blocks > 8192) - // blocks = 8192; // restrict grid dims to 8K max - concatCuda<<>>(numOfArrs, pVx, pxShapeInfo, pVz, pzShapeInfo); - nd4j::DebugHelper::checkErrorCode(stream, "concat(...) failed"); -} -BUILD_SINGLE_TEMPLATE(template void concatCudaLauncher, (const int numOfArrs, cudaStream_t *stream, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo), LIBND4J_TYPES); - -static void -specialBufferAndShapeWithOffset(void* vZ, Nd4jLong* hZShapeInfo, Nd4jLong* dZShapeInfo, std::vector const& idx, void*& outBuffer, Nd4jLong*& outShape) { - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - const int rank = shape::rank(hZShapeInfo); - Nd4jLong* newShape = new Nd4jLong[shape::shapeInfoLength(rank)]; - //ALLOCATE(newShape, nullptr, , Nd4jLong) - auto shapeSize = shape::shapeInfoByteLength(rank); - memcpy(newShape, hZShapeInfo, shapeSize); - - auto shapeOf = shape::shapeOf(newShape); - auto stridesOf = shape::stride(newShape); - - Nd4jLong offset(0), subArrLen(1); - int n(2), first, last, stride; - - for (int d = rank - 1; d >= 0; --d) { - - if (idx[n * d] != idx[n * d + 1]) { - auto axeDim = shape::sizeAt(hZShapeInfo, d); - first = idx[n * d] >= 0 ? idx[n * d] : idx[n * d] + axeDim + 1; - last = idx[n * d + 1] >= 0 ? idx[n * d + 1] : idx[n * d + 1] + axeDim + 1; - stride = 1; - - shapeOf[d] = (last - first + stride - 1) / stride; // ceil (last - first) / stride; - offset += first * stridesOf[d]; - - if(shapeOf[d] != 1) - stridesOf[d] *= stride; - } - - subArrLen *= shapeOf[d]; - } - - // check if there is possibility to set ews = 1 - //shape::setEws(newShape, subArrLen); - - //makeBothBuffersActual(); - outBuffer = (void*)((int8_t*)vZ + offset * DataTypeUtils::sizeOfElement(zType)); - cudaError_t err = cudaMalloc(&outShape, shapeSize); - if (err != 0) { - printf("Cannot allocate memory with error %d\n", err); - throw std::runtime_error("Cannot allocate memory for shape"); - } - cudaMemcpy(outShape, newShape, shapeSize, cudaMemcpyHostToDevice); - delete [] newShape; -} - -/** - * Concatneate multi array of the same shape together - * along a particular dimension - */ -void concat( - Nd4jPointer *extraPointers, - int dimension, - int numArrays, - Nd4jPointer *data, Nd4jPointer *inputShapeInfo, - Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { - - auto stream = reinterpret_cast(extraPointers[1]); - - auto hXShapeInfo = hZShapeInfo; - auto hShapePointers = reinterpret_cast(inputShapeInfo); - auto dShapePointers = reinterpret_cast(dinputShapeInfo); - // numArrays will be used as number of TADs, so each block process 1 input - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - auto axis = dimension; - - const int rank = shape::rank(hZShapeInfo); //reinterpret_cast(inputShapeInfo[0])); - const int rank2 = 2 * rank; - std::vector> indices(numArrays, std::vector(rank2,0)); - - // take into account indices for first array - auto axisSize = shape::sizeAt(reinterpret_cast(inputShapeInfo[0]), axis); - indices[0][2 * axis + 1] = axisSize; - - for(int i = 1; i < numArrays; ++i) { - indices[i][2 * axis] = indices[i-1][2 * axis + 1]; // index start from - indices[i][2 * axis + 1] = indices[i-1][2 * axis + 1] + shape::sizeAt(reinterpret_cast(inputShapeInfo[i]), axis); // index end with (excluding) - } - - std::vector outSubArrsBuffs(numArrays); - std::vector outSubArrsShapes(numArrays); - for(int i = 0; i < numArrays; ++i) { - specialBufferAndShapeWithOffset(dZ, hZShapeInfo, dZShapeInfo, indices[i], outSubArrsBuffs[i], outSubArrsShapes[i]); - } - - LaunchContext context(stream); - PointersManager manager(&context, "concat"); - void* dOutBuffers = manager.replicatePointer(outSubArrsBuffs.data(), outSubArrsBuffs.size() * sizeof(void*)); - void* dInBuffers = manager.replicatePointer(ddata, numArrays * sizeof(void*)); - void* dInShapeInfo = manager.replicatePointer(dShapePointers, numArrays * sizeof(Nd4jLong*)); - void* dOutShapeInfo = manager.replicatePointer(outSubArrsShapes.data(), outSubArrsShapes.size() * sizeof(Nd4jLong*)); - - BUILD_SINGLE_SELECTOR(zType, concatCudaLauncher, (numArrays, stream, dInBuffers, dInShapeInfo, dOutBuffers, dOutShapeInfo), LIBND4J_TYPES); - manager.synchronize(); - - cudaError_t err; - for(int i = 0; i < numArrays; ++i) { - err = cudaFree(outSubArrsShapes[i]); - if (err != 0) { - printf("Error %d occured when shape %i was deallocating.\n", err, i); - throw std::runtime_error("Cannot deallocate memory for shapes."); - } - } -} - -/** - * Concatneate multi array of the same shape together - * along a particular dimension - */ -// void concat( -// Nd4jPointer *extraPointers, -// int dimension, -// int numArrays, -// Nd4jPointer *data, Nd4jPointer *inputShapeInfo, -// Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo, -// void *hZ, Nd4jLong *hZShapeInfo, -// void *dZ, Nd4jLong *dZShapeInfo, -// Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { -// -// cudaStream_t *stream = reinterpret_cast(extraPointers[1]); -// auto hXShapeInfo = hZShapeInfo; -// auto hShapePointers = reinterpret_cast(inputShapeInfo); -// // numArrays will be used as number of TADs, so each block process 1 input -// -// int smem = 8192; -// bool isVstack = false; -// bool isScalar = true; -// bool isHstack = false; -// -// for (int i = 0; i < numArrays; i++) { -// if (!shape::isScalar(hShapePointers[i])) { -// isScalar = false; -// break; -// } -// } -// -// if (!isScalar && dimension == 0 && shape::rank(hZShapeInfo) == 2 && shape::order(hZShapeInfo) == 'c' ) { -// isVstack = true; -// for (int i = 0; i < numArrays; i++) { -// if (!shape::isVector(hShapePointers[i]) || shape::elementWiseStride(hShapePointers[i]) <= 0 || -// shape::order(hShapePointers[i]) != 'c') { -// isVstack = false; -// break; -// } -// } -// } -// -// // let's try to fit N-dimensional vstack -// if (!isVstack && !isScalar && dimension == 0 && shape::order(hXShapeInfo) == 'c') { -// auto length0 = shape::length(hShapePointers[0]); -// isVstack = true; -// for (int i = 0; i < numArrays; i++) { -// if (shape::elementWiseStride(hShapePointers[i]) <= 0 || shape::order(hShapePointers[i]) != 'c' || length0 != shape::length(hShapePointers[i])) { -// isVstack = false; -// break; -// } -// } -// } -// -// if (!isScalar && !isVstack && dimension == 1 && shape::isVector(hZShapeInfo)) { -// isHstack = true; -// for (int i = 0; i < numArrays; i++) { -// if (!shape::isVector(hShapePointers[i]) || shape::elementWiseStride(hShapePointers[i]) <= 0) { -// isHstack = false; -// break; -// } -// } -// } -// -// if (isScalar) { -// if (nd4j::Environment::getInstance()->isDebugAndVerbose()) -// printf("Going scalar concat\n"); -// -// dim3 launchDims(128, 128, 16384); -// auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); -// BUILD_SINGLE_SELECTOR(zType, concatKernelScalarGeneric, (launchDims, stream, numArrays, reinterpret_cast(ddata[0]), dZ), LIBND4J_TYPES); -// -// } else if (isVstack) { -// if (nd4j::Environment::getInstance()->isDebugAndVerbose()) -// printf("Going VStack concat\n"); -// -// dim3 launchDims(128, 512, 16384); -// auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); -// BUILD_SINGLE_SELECTOR(zType, concatKernelVStackGeneric, (launchDims, stream, numArrays, reinterpret_cast(ddata[0]), reinterpret_cast(dinputShapeInfo[0]), dZ, dZShapeInfo), LIBND4J_TYPES); -// -// } else if (isHstack) { -// if (nd4j::Environment::getInstance()->isDebugAndVerbose()) -// printf("Going HStack concat\n"); -// -// dim3 launchDims(128, 128, 16384); -// auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); -// BUILD_SINGLE_SELECTOR(zType, concatKernelHStackGeneric, (launchDims, stream, numArrays, reinterpret_cast(ddata[0]), reinterpret_cast(dinputShapeInfo[0]), dZ, dZShapeInfo), LIBND4J_TYPES); -// } else { -// if (nd4j::Environment::getInstance()->isDebugAndVerbose()) -// printf("Going generic concat\n"); -// -// auto devZTadShape = reinterpret_cast(extraPointers[10]); -// auto devZOffsets = reinterpret_cast(extraPointers[11]); -// -// dim3 launchDims(128, 128, 8192); -// auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); -// BUILD_SINGLE_SELECTOR(zType, concatKernelGeneric, (launchDims, stream, numArrays, reinterpret_cast(ddata[0]), reinterpret_cast(dinputShapeInfo[0]), dZ, dZShapeInfo, reinterpret_cast(tadPointers[0]), reinterpret_cast(offsetPointers[0]), devZTadShape, devZOffsets), LIBND4J_TYPES); -// } -// if (nd4j::Environment::getInstance()->isDebugAndVerbose()) -// printf("sharedMemory requested for concatFloat: [%i], registers: [%i]\n", smem, funcAttributes[31].numRegs); -// -// cudaError_t res = cudaStreamSynchronize(*stream); -// checkCudaErrors(res); -// nd4j::DebugHelper::checkErrorCode(stream, "Legacy ConcatFloat(...) failed"); -//} - - - void specialConcat( Nd4jPointer *extraPointers, int dimension, @@ -1432,8 +1255,14 @@ void specialConcat( Nd4jPointer *inputShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { - - BUILD_SINGLE_SELECTOR(ArrayOptions::dataType(dZShapeInfo), nd4j::SpecialMethods ,::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, dZ, dZShapeInfo), LIBND4J_TYPES); + try { + BUILD_SINGLE_SELECTOR(ArrayOptions::dataType(dZShapeInfo), nd4j::SpecialMethods, + ::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, dZ, dZShapeInfo), + LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1441,9 +1270,15 @@ void specialConcat( * This method saves */ nd4j::TadPack* tadOnlyShapeInfo(Nd4jLong *dXShapeInfo, int *dimension, int dimensionLength) { - auto pack = new TadPack(); - *pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(dXShapeInfo, dimension, dimensionLength); - return pack; + try { + auto pack = new TadPack(); + *pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(dXShapeInfo, dimension, dimensionLength); + return pack; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong* getPrimaryShapeInfo(nd4j::TadPack* pack) { @@ -1489,11 +1324,11 @@ int memcpyConstantAsync(Nd4jLong dst, Nd4jPointer src, Nd4jLong size, int flags, } break; } - //cudaError_t dZ = cudaMemcpyAsync((void *) dst, (const void *) src, (size_t) size, kind, *pStream); - cudaError_t dZ = cudaMemcpyToSymbolAsync(deviceConstantMemory, const_cast(src), size, dst, kind, *pStream); - checkCudaErrors(dZ); - if (dZ != 0) - throw std::runtime_error("cudaMemcpyToSymbolAsync(...) failed"); + auto dZ = cudaMemcpyToSymbolAsync(deviceConstantMemory, const_cast(src), size, dst, kind, *pStream); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaMemcpyToSymbolAsync failed"); + } return 1; } @@ -1502,8 +1337,10 @@ Nd4jPointer getConstantSpace() { Nd4jPointer dConstAddr; cudaError_t dZ = cudaGetSymbolAddress(reinterpret_cast(&dConstAddr), deviceConstantMemory); - if (dZ != 0) - throw std::runtime_error("cudaGetSymbolAddress(...) failed"); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaGetSymbolAddress failed"); + } return dConstAddr; } @@ -1519,13 +1356,19 @@ void pullRows(Nd4jPointer *extraPointers, Nd4jLong *tadOffsets, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + dim3 launchDims(64, 256, 1024); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + BUILD_SINGLE_SELECTOR(xType, pullRowsKernelGeneric, + (launchDims, stream, dX, dZ, n, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets), + LIBND4J_TYPES); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - dim3 launchDims(64, 256, 1024); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - BUILD_SINGLE_SELECTOR(xType, pullRowsKernelGeneric, (launchDims, stream, dX, dZ, n, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets), LIBND4J_TYPES); - - DEBUG_KERNEL(stream, -1); + DEBUG_KERNEL(stream, -1); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1537,25 +1380,31 @@ void average(Nd4jPointer *extras, int n, Nd4jLong length, bool propagate) { + try { + cudaStream_t *stream = reinterpret_cast(extras[1]); + int mode = getDeviceId(extras[3]); - cudaStream_t * stream = reinterpret_cast(extras[1]); - int mode = getDeviceId(extras[3]); + auto dX = reinterpret_cast(dx); - auto dX = reinterpret_cast(dx); + if (nd4j::Environment::getInstance()->isDebugAndVerbose()) + printf("averageFloat called\n"); - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("averageFloat called\n"); - - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - // launching on gpu - if (mode == 0) { - dim3 launchDims(256, 256, 4096); - BUILD_SINGLE_SELECTOR(xType, averagingKernelGeneric, (launchDims, stream, dX, dz, n, length, propagate), LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "AverageFloat(...) failed"); - } else { - // launching on host memory - BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::averageGeneric(x, z, zShapeInfo, n, length, propagate), LIBND4J_TYPES); - } + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + // launching on gpu + if (mode == 0) { + dim3 launchDims(256, 256, 4096); + BUILD_SINGLE_SELECTOR(xType, averagingKernelGeneric, (launchDims, stream, dX, dz, n, length, propagate), + LIBND4J_TYPES); + nd4j::DebugHelper::checkErrorCode(stream, "AverageFloat(...) failed"); + } else { + // launching on host memory + BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::averageGeneric(x, z, zShapeInfo, n, length, propagate), + LIBND4J_TYPES); + } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void accumulate(Nd4jPointer *extras, @@ -1565,25 +1414,31 @@ void accumulate(Nd4jPointer *extras, void *dz, Nd4jLong *dzShapeInfo, int n, Nd4jLong length) { + try { + auto stream = reinterpret_cast(extras[1]); + int mode = getDeviceId(extras[3]); - auto stream = reinterpret_cast(extras[1]); - int mode = getDeviceId(extras[3]); + auto dX = reinterpret_cast(dx); - auto dX = reinterpret_cast(dx); + if (nd4j::Environment::getInstance()->isDebugAndVerbose()) + printf("accumulateFloat called\n"); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("accumulateFloat called\n"); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - - // launching on gpu - if (mode == 0) { - dim3 launchDims(n, 256, 16384); - BUILD_SINGLE_SELECTOR(xType, accumulateKernelGeneric, (launchDims, stream, dX, dz, n,length), LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "AccumulateFloat(...) failed"); - } else { - // launching on host memory - BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::accumulateGeneric(x, z, zShapeInfo, n, length), LIBND4J_TYPES); - } + // launching on gpu + if (mode == 0) { + dim3 launchDims(n, 256, 16384); + BUILD_SINGLE_SELECTOR(xType, accumulateKernelGeneric, (launchDims, stream, dX, dz, n, length), + LIBND4J_TYPES); + nd4j::DebugHelper::checkErrorCode(stream, "AccumulateFloat(...) failed"); + } else { + // launching on host memory + BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::accumulateGeneric(x, z, zShapeInfo, n, length), + LIBND4J_TYPES); + } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1596,50 +1451,29 @@ void shuffle(Nd4jPointer *extras, int *shuffleMap, Nd4jPointer *tadShapeInfo, Nd4jPointer *tadOffsets) { + try { + cudaStream_t *stream = reinterpret_cast(extras[1]); - cudaStream_t *stream = reinterpret_cast(extras[1]); + auto dX = reinterpret_cast(dx); + auto dZ = reinterpret_cast(dz); + auto xShape = reinterpret_cast(xShapeInfo); + auto dxShape = reinterpret_cast(dXShapeInfo); + auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); + auto tadOffset = reinterpret_cast(tadOffsets); - auto dX = reinterpret_cast(dx); - auto dZ = reinterpret_cast(dz); - auto xShape = reinterpret_cast(xShapeInfo); - auto dxShape = reinterpret_cast(dXShapeInfo); - auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); - auto tadOffset = reinterpret_cast(tadOffsets); + auto xType = nd4j::ArrayOptions::dataType(xShape[0]); + dim3 launchDims(256, 512, 8192); + BUILD_SINGLE_SELECTOR(xType, shuffleKernelGeneric, + (launchDims, stream, dX, dxShape, dZ, N, shuffleMap, tadOnlyShapeInfo, tadOffset), + LIBND4J_TYPES); - auto xType = nd4j::ArrayOptions::dataType(xShape[0]); - dim3 launchDims(256, 512, 8192); - BUILD_SINGLE_SELECTOR(xType, shuffleKernelGeneric, (launchDims, stream, dX, dxShape, dZ, N, shuffleMap, tadOnlyShapeInfo, tadOffset), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "shuffle(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "shuffle(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } -/* -void execMetaPredicateShape(Nd4jPointer *extras, - const int opTypeA, - const int opNumA, - const int opTypeB, - const int opNumB, - Nd4jLong N, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraA, - void *extraB, - double scalarA, - double scalarB) { - - cudaStream_t *stream = reinterpret_cast(extras[1]); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, functions::grid::GRIDShaped, ::execMetaPredicateShaped(stream, extras, opTypeA, opNumA, opTypeB, opNumB, N, dX, dXShapeInfo, dY, dYShapeInfo, dZ, dZShapeInfo, extraA, extraB, scalarA, scalarB), LIBND4J_TYPES); - // functions::grid::GRIDShaped::execMetaPredicateShaped(stream, extras, opTypeA, opNumA, opTypeB, opNumB, N, dX, dXShapeInfo, dy, dYShapeInfo, dz, zShapeInfo, extraA, extraB, scalarA, scalarB); - - DEBUG_KERNEL(stream, opNumA); -} -*/ - bool isExperimentalEnabled() { return nd4j::Environment::getInstance()->isExperimentalBuild(); } @@ -1670,9 +1504,14 @@ void execSummaryStats(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, bool biasCorrected) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execSummaryStats(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, biasCorrected); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execSummaryStats(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo, biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1686,11 +1525,18 @@ void execSummaryStatsTad(Nd4jPointer *extraPointers, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape, bool biasCorrected, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execSummaryStats(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, biasCorrected); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execSummaryStats(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadShapeInfo, + tadOffsets, biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1703,9 +1549,14 @@ void execReduce3(Nd4jPointer *extraPointers, void *dY, Nd4jLong *dYShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduce3(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduce3(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1721,35 +1572,35 @@ void execReduce3Tad(Nd4jPointer *extraPointers, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - // if (extraPointers == nullptr || extraPointers[2] == 0) - // NativeOpExecutioner::execReduce3(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); - // else { - // // going tad-ways - // auto tadShapeInfo = reinterpret_cast (extraPointers[0]); - // auto tadOffsets = reinterpret_cast(extraPointers[1]); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); + auto tadLength = shape::length(tadPack.primaryShapeInfo()); + auto yLength = shape::length(hYShapeInfo); + auto xLength = shape::length(hXShapeInfo); - // NativeOpExecutioner::execReduce3TAD(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets); - // } + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - // nd4j_printf("Starting...\n",""); + if (tadLength == yLength || tadLength == xLength) { + // nd4j_printf("== way\n",""); + NativeOpExecutioner::execReduce3(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, + dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, + tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); + } else + NativeOpExecutioner::execReduce3TAD(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, + hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, + dimension, dimensionLength, tadOnlyShapeInfo, yTadOffsets, + yTadOnlyShapeInfo, yTadOffsets); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); - auto tadLength = shape::length(tadPack.primaryShapeInfo()); - auto yLength = shape::length(hYShapeInfo); - auto xLength = shape::length(hXShapeInfo); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - - if (tadLength == yLength || tadLength == xLength) { - // nd4j_printf("== way\n",""); - NativeOpExecutioner::execReduce3(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, - dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, - tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); - } else - NativeOpExecutioner::execReduce3TAD(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, yTadOffsets, yTadOnlyShapeInfo, yTadOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1761,9 +1612,14 @@ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, void *dY, Nd4jLong *dYShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduce3Scalar(&lc, opNum,hX,hXShapeInfo,dX, dXShapeInfo,extraParams,hY,hYShapeInfo,dY,dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduce3Scalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, + hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1776,9 +1632,15 @@ void execScalarBool(Nd4jPointer *extraPointers, void *hScalar, Nd4jLong *hScalarShapeInfo, void *dScalar, Nd4jLong *dScalarShapeInfo, void *extraParams) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execScalarBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, hScalar, hScalarShapeInfo, dScalar, dScalarShapeInfo, extraParams); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execScalarBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, hScalar, hScalarShapeInfo, dScalar, dScalarShapeInfo, + extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1794,11 +1656,19 @@ void execScalarBoolTad(Nd4jPointer *extraPointers, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execScalarBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, hScalars, hScalarShapeInfo, dScalars, dScalarShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execScalarBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, hScalars, hScalarShapeInfo, dScalars, dScalarShapeInfo, + dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, + tadOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1811,9 +1681,14 @@ void execScalar(Nd4jPointer *extraPointers, void *hScalar, Nd4jLong *hScalarShapeInfo, void *dScalar, Nd4jLong *dScalarShapeInfo, void *extraParams) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, hScalar, hScalarShapeInfo, dScalar, dScalarShapeInfo, extraParams); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, + hScalar, hScalarShapeInfo, dScalar, dScalarShapeInfo, extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1830,27 +1705,36 @@ void execScalarTad(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(hScalarShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(hScalarShapeInfo); + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - if (yType != xType && yType != nd4j::DataType::BOOL && !isExperimentalEnabled()) - throw nd4j::datatype_exception::build("execScalar both operands must have same data type", xType, yType); + if (yType != xType && yType != nd4j::DataType::BOOL && !isExperimentalEnabled()) + throw nd4j::datatype_exception::build("execScalar both operands must have same data type", xType, yType); - dim3 launchDims(256, 256, 16384); + dim3 launchDims(256, 256, 16384); #ifdef __ND4J_EXPERIMENTAL__ - BUILD_PAIRWISE_SELECTOR(xType, yType, zType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dX, dXShapeInfo, dZ, dZShapeInfo, dScalars, extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_PAIRWISE_SELECTOR(xType, yType, zType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dX, dXShapeInfo, dZ, dZShapeInfo, dScalars, extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES, LIBND4J_TYPES); #else - BUILD_SINGLE_SELECTOR_THRICE(xType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dX, dXShapeInfo, dZ, dZShapeInfo, dScalars, extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(xType, functions::scalar::ScalarTransform, + ::executeCudaAlongDimension(launchDims, stream, opNum, dX, dXShapeInfo, dZ, + dZShapeInfo, dScalars, extraParams, dimension, + dimensionLength, tadShapeInfo, tadOffsets, + tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES); #endif - DEBUG_KERNEL(stream, opNum); + DEBUG_KERNEL(stream, opNum); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execAggregate(Nd4jPointer *extraPointers, @@ -1866,16 +1750,23 @@ void execAggregate(Nd4jPointer *extraPointers, void *realArguments, int numRealArguments, nd4j::DataType dtype) { + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + int numBlocks = getDeviceId(extraPointers[2]); + int numThreads = getDeviceId(extraPointers[3]); + int shmem = getDeviceId(extraPointers[4]); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - int numBlocks = getDeviceId(extraPointers[2]); - int numThreads = getDeviceId(extraPointers[3]); - int shmem = getDeviceId(extraPointers[4]); + dim3 launchDims = dim3(numBlocks, numThreads, shmem); - dim3 launchDims = dim3(numBlocks, numThreads, shmem); - - BUILD_SINGLE_SELECTOR(dtype, functions::aggregate::AggregatedFunction, ::aggregateKernelGeneric(launchDims, stream, opNum, arguments, numArguments, shapes, numShapes, indexArguments, numIndexArguments, intArrays, numIntArrays, realArguments, numRealArguments), FLOAT_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "execAggregateFloat(...) failed"); + BUILD_SINGLE_SELECTOR(dtype, functions::aggregate::AggregatedFunction, + ::aggregateKernelGeneric(launchDims, stream, opNum, arguments, numArguments, shapes, + numShapes, indexArguments, numIndexArguments, intArrays, + numIntArrays, realArguments, numRealArguments), FLOAT_TYPES); + nd4j::DebugHelper::checkErrorCode(stream, "execAggregateFloat(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void batchExecutor(Nd4jPointer *extraPointers, @@ -1897,17 +1788,25 @@ void execAggregateBatch(Nd4jPointer *extraPointers, int maxIntArrays, int maxIntArraySize, int maxIdx, int maxReals, void *ptrToArguments, nd4j::DataType dtype) { - // not implemented yet - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - int numBlocks = getDeviceId(extraPointers[2]); - int numThreads = getDeviceId(extraPointers[3]); - int shmem = getDeviceId(extraPointers[4]); + try { + // not implemented yet + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + int numBlocks = getDeviceId(extraPointers[2]); + int numThreads = getDeviceId(extraPointers[3]); + int shmem = getDeviceId(extraPointers[4]); - dim3 launchDims = dim3(numAggregates, numThreads, shmem); + dim3 launchDims = dim3(numAggregates, numThreads, shmem); - BUILD_SINGLE_SELECTOR(dtype, functions::aggregate::AggregatedFunction, ::aggregateBatchKernelGeneric(launchDims, stream, opNum, numAggregates, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(dtype, functions::aggregate::AggregatedFunction, + ::aggregateBatchKernelGeneric(launchDims, stream, opNum, numAggregates, maxArgs, + maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, + ptrToArguments), FLOAT_TYPES); - DEBUG_KERNEL(stream, opNum); + DEBUG_KERNEL(stream, opNum); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1917,9 +1816,13 @@ void execRandom(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1929,9 +1832,14 @@ void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1943,9 +1851,14 @@ void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -2053,13 +1966,19 @@ void tear(Nd4jPointer *extras, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + try { + cudaStream_t *stream = reinterpret_cast(extras[1]); + dim3 launchDims(512, 512, 512); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + BUILD_SINGLE_SELECTOR(xType, tearKernelGeneric, + (launchDims, stream, dX, dXShapeInfo, targets, zShapeInfo, tadShapeInfo, tadOffsets), + LIBND4J_TYPES); - cudaStream_t *stream = reinterpret_cast(extras[1]); - dim3 launchDims(512, 512, 512); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - BUILD_SINGLE_SELECTOR(xType, tearKernelGeneric, (launchDims, stream, dX, dXShapeInfo, targets, zShapeInfo, tadShapeInfo, tadOffsets), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "tearFloat(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "tearFloat(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -2146,56 +2065,72 @@ void prescanArrayRecursive(Nd4jPointer *extras, int *dZ, int *dX, int numElement void encodeThresholdP1(Nd4jPointer *extras, void *dx, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { + try { + cudaStream_t *stream = reinterpret_cast(extras[1]); - cudaStream_t *stream = reinterpret_cast(extras[1]); + int blockSize = 1024; + int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); - int blockSize = 1024; - int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); + dim3 launchDims(numBlocks, blockSize, 1024); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + BUILD_SINGLE_SELECTOR(xType, encoderKernelP1Generic, (launchDims, stream, dx, N, dz, threshold), LIBND4J_TYPES); - dim3 launchDims(numBlocks, blockSize, 1024); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, encoderKernelP1Generic, (launchDims, stream, dx, N, dz, threshold), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP1Float(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP1Float(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *dx, Nd4jLong N, int *dz) { - - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - //encoderKernelP2Float<<>>(dx, N, dz); - prescanArrayRecursive(extraPointers, dz, dx + 1, (int) N, 0); - nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP2Int(...) failed"); + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + //encoderKernelP2Float<<>>(dx, N, dz); + prescanArrayRecursive(extraPointers, dz, dx + 1, (int) N, 0); + nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP2Int(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void encodeThresholdP3(Nd4jPointer *extraPointers, void *dx, Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + int blockSize = 1024; + int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); - int blockSize = 1024; - int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); + dim3 launchDims(numBlocks, blockSize, 4096); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + BUILD_SINGLE_SELECTOR(xType, encoderKernelP3Generic, (launchDims, stream, dx, offsets, N, dz), LIBND4J_TYPES); - dim3 launchDims(numBlocks, blockSize, 4096); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, encoderKernelP3Generic, (launchDims, stream, dx, offsets, N, dz), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP3Float(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP3Float(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void decodeThreshold(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo){ + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + // we probably want to have smaller blocks here, memory writes are misaligned anyway + int blockSize = 128; + int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); - // we probably want to have smaller blocks here, memory writes are misaligned anyway - int blockSize = 128; - int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); + dim3 launchDims(numBlocks, blockSize, 1024); + auto zType = nd4j::ArrayOptions::dataType(zShapeInfo); + BUILD_SINGLE_SELECTOR(zType, decoderKernelGeneric, (launchDims, stream, dx, N, dz), LIBND4J_TYPES); - dim3 launchDims(numBlocks, blockSize, 1024); - auto zType = nd4j::ArrayOptions::dataType(zShapeInfo); - BUILD_SINGLE_SELECTOR(zType, decoderKernelGeneric, (launchDims, stream, dx, N, dz), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "decodeThresholdFloat(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "decodeThresholdFloat(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -2212,11 +2147,18 @@ void execReduce3All(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduce3All(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParamsVals, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduce3All(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParamsVals, hY, + hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, + dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -2224,57 +2166,65 @@ void sort(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dX, Nd4jLong *dXShapeInfo, bool descending) { + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - - auto xLength = shape::length(xShapeInfo); - auto xEWS = shape::elementWiseStride(xShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + auto xLength = shape::length(xShapeInfo); + auto xEWS = shape::elementWiseStride(xShapeInfo); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - // check if xLength is a power of 2, and use bitonic sort, if that's the case - if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; + // check if xLength is a power of 2, and use bitonic sort, if that's the case + if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; - dim3 launchDims(numBlocks, numThreads, 32768); + dim3 launchDims(numBlocks, numThreads, 32768); - for (int k = 2; k <= xLength; k = 2*k) { - for (int j = k >> 1; j > 0; j = j >> 1) { - BUILD_SINGLE_SELECTOR(xType, bitonicSortStepGeneric, (launchDims, stream, dX, dXShapeInfo, j, k, xLength, descending), LIBND4J_TYPES); - } + for (int k = 2; k <= xLength; k = 2 * k) { + for (int j = k >> 1; j > 0; j = j >> 1) { + BUILD_SINGLE_SELECTOR(xType, bitonicSortStepGeneric, + (launchDims, stream, dX, dXShapeInfo, j, k, xLength, descending), + LIBND4J_TYPES); + } + } + } else { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; + + numBlocks = nd4j::math::nd4j_min(512, numBlocks); + dim3 launchDims(numBlocks, numThreads, 32768); + + int max = 2, dg = 0; + while (max < xLength) { + max <<= 1; + dg++; + } + max <<= 1; + + for (int window = 2; window < max; window <<= 1) { + int n = window; + int rev = 0; + do { + int half = n >> 1; + BUILD_SINGLE_SELECTOR(xType, bitonicArbitraryStepGeneric, + (launchDims, stream, dX, dXShapeInfo, n, xLength, rev, descending), + LIBND4J_TYPES); + n >>= 1; + rev = 1; + } while (n > 1); + } } - } else { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; - numBlocks = nd4j::math::nd4j_min(512, numBlocks); - dim3 launchDims(numBlocks, numThreads, 32768); - - int max = 2, dg = 0; - while (max < xLength) { - max <<= 1; - dg++; - } - max <<= 1; - - for (int window = 2; window < max; window<<=1) { - int n = window; - int rev = 0; - do{ - int half = n >> 1; - BUILD_SINGLE_SELECTOR(xType, bitonicArbitraryStepGeneric, (launchDims, stream, dX, dXShapeInfo, n, xLength, rev, descending), LIBND4J_TYPES); - n>>=1; - rev = 1; - } while(n > 1); - } + nd4j::DebugHelper::checkErrorCode(stream, "sort(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } - - nd4j::DebugHelper::checkErrorCode(stream, "sort(...) failed"); } @@ -2284,55 +2234,64 @@ void sortByKey(Nd4jPointer *extraPointers, void *y, Nd4jLong *yShapeInfo, void *dy, Nd4jLong *dyShapeInfo, bool descending) { + try { + auto stream = reinterpret_cast(extraPointers[1]); - auto stream = reinterpret_cast(extraPointers[1]); - - auto xLength = shape::length(xShapeInfo); - auto xEWS = shape::elementWiseStride(xShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(yShapeInfo); + auto xLength = shape::length(xShapeInfo); + auto xEWS = shape::elementWiseStride(xShapeInfo); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(yShapeInfo); - // check if xLength is a power of 2, and use bitonic sort, if that's the case - if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; + // check if xLength is a power of 2, and use bitonic sort, if that's the case + if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; - dim3 launchDims(numBlocks, numThreads, 32768); + dim3 launchDims(numBlocks, numThreads, 32768); - for (int k = 2; k <= xLength; k = 2*k) { - for (int j = k >> 1; j > 0; j = j >> 1) { - BUILD_DOUBLE_SELECTOR(xType, yType, bitonicSortStepGenericKey, (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, j, k, xLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + for (int k = 2; k <= xLength; k = 2 * k) { + for (int j = k >> 1; j > 0; j = j >> 1) { + BUILD_DOUBLE_SELECTOR(xType, yType, bitonicSortStepGenericKey, + (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, j, k, xLength, descending), + LIBND4J_TYPES, LIBND4J_TYPES); + } + } + } else { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; + + numBlocks = nd4j::math::nd4j_min(512, numBlocks); + dim3 launchDims(numBlocks, numThreads, 32768); + + int max = 2, dg = 0; + while (max < xLength) { + max <<= 1; + dg++; + } + max <<= 1; + + for (int window = 2; window < max; window <<= 1) { + int n = window; + int rev = 0; + do { + int half = n >> 1; + BUILD_DOUBLE_SELECTOR(xType, yType, bitonicArbitraryStepGenericKey, + (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, n, xLength, rev, descending), + LIBND4J_TYPES, LIBND4J_TYPES); + n >>= 1; + rev = 1; + } while (n > 1); } } - } else { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; - numBlocks = nd4j::math::nd4j_min(512, numBlocks); - dim3 launchDims(numBlocks, numThreads, 32768); - - int max = 2, dg = 0; - while (max < xLength) { - max <<= 1; - dg++; - } - max <<= 1; - - for (int window = 2; window < max; window<<=1) { - int n = window; - int rev = 0; - do{ - int half = n >> 1; - BUILD_DOUBLE_SELECTOR(xType, yType, bitonicArbitraryStepGenericKey, (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, n, xLength, rev, descending), LIBND4J_TYPES, LIBND4J_TYPES); - n>>=1; - rev = 1; - } while(n > 1); - } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } } @@ -2342,54 +2301,63 @@ void sortByValue(Nd4jPointer *extraPointers, void *y, Nd4jLong *yShapeInfo, void *dy, Nd4jLong *dyShapeInfo, bool descending) { - auto stream = reinterpret_cast(extraPointers[1]); + try { + auto stream = reinterpret_cast(extraPointers[1]); - auto xLength = shape::length(xShapeInfo); - auto xEWS = shape::elementWiseStride(xShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(yShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(xShapeInfo); + auto xLength = shape::length(xShapeInfo); + auto xEWS = shape::elementWiseStride(xShapeInfo); + auto xType = nd4j::ArrayOptions::dataType(yShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(xShapeInfo); - // check if xLength is a power of 2, and use bitonic sort, if that's the case - if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; + // check if xLength is a power of 2, and use bitonic sort, if that's the case + if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; - dim3 launchDims(numBlocks, numThreads, 32768); + dim3 launchDims(numBlocks, numThreads, 32768); - for (int k = 2; k <= xLength; k = 2*k) { - for (int j = k >> 1; j > 0; j = j >> 1) { - BUILD_DOUBLE_SELECTOR(xType, yType, bitonicSortStepGenericKey, (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, j, k, xLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + for (int k = 2; k <= xLength; k = 2 * k) { + for (int j = k >> 1; j > 0; j = j >> 1) { + BUILD_DOUBLE_SELECTOR(xType, yType, bitonicSortStepGenericKey, + (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, j, k, xLength, descending), + LIBND4J_TYPES, LIBND4J_TYPES); + } + } + } else { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; + + numBlocks = nd4j::math::nd4j_min(512, numBlocks); + dim3 launchDims(numBlocks, numThreads, 32768); + + int max = 2, dg = 0; + while (max < xLength) { + max <<= 1; + dg++; + } + max <<= 1; + + for (int window = 2; window < max; window <<= 1) { + int n = window; + int rev = 0; + do { + int half = n >> 1; + BUILD_DOUBLE_SELECTOR(xType, yType, bitonicArbitraryStepGenericKey, + (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, n, xLength, rev, descending), + LIBND4J_TYPES, LIBND4J_TYPES); + n >>= 1; + rev = 1; + } while (n > 1); } } - } else { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; - - numBlocks = nd4j::math::nd4j_min(512, numBlocks); - dim3 launchDims(numBlocks, numThreads, 32768); - - int max = 2, dg = 0; - while (max < xLength) { - max <<= 1; - dg++; - } - max <<= 1; - - for (int window = 2; window < max; window<<=1) { - int n = window; - int rev = 0; - do{ - int half = n >> 1; - BUILD_DOUBLE_SELECTOR(xType, yType, bitonicArbitraryStepGenericKey, (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, n, xLength, rev, descending), LIBND4J_TYPES, LIBND4J_TYPES); - n>>=1; - rev = 1; - } while(n > 1); - } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } } @@ -2403,15 +2371,23 @@ void sortTadByKey(Nd4jPointer *extraPointers, int *dimension, int dimensionLength, bool descending) { - auto stream = reinterpret_cast(extraPointers[1]); - auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext(): reinterpret_cast(extraPointers[0]); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(yShapeInfo); - BUILD_DOUBLE_SELECTOR(xType, yType, oesTadGenericKey, (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, nullptr, dimensionLength, tadPack.platformShapeInfo(), tadPack.platformOffsets(), descending), LIBND4J_TYPES, LIBND4J_TYPES); + try { + auto stream = reinterpret_cast(extraPointers[1]); + auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext() + : reinterpret_cast(extraPointers[0]); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(yShapeInfo); + BUILD_DOUBLE_SELECTOR(xType, yType, oesTadGenericKey, + (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, nullptr, dimensionLength, tadPack.platformShapeInfo(), tadPack.platformOffsets(), descending), + LIBND4J_TYPES, LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "sortTadKey(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "sortTadKey(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortTadByValue(Nd4jPointer *extraPointers, @@ -2422,16 +2398,24 @@ void sortTadByValue(Nd4jPointer *extraPointers, int *dimension, int dimensionLength, bool descending) { - auto stream = reinterpret_cast(extraPointers[1]); - auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext(): reinterpret_cast(extraPointers[0]); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048); - auto xType = nd4j::ArrayOptions::dataType(yShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(xShapeInfo); + try { + auto stream = reinterpret_cast(extraPointers[1]); + auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext() + : reinterpret_cast(extraPointers[0]); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048); + auto xType = nd4j::ArrayOptions::dataType(yShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(xShapeInfo); - BUILD_DOUBLE_SELECTOR(xType, yType, oesTadGenericKey, (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, nullptr, dimensionLength, tadPack.platformShapeInfo(), tadPack.platformOffsets(), descending), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, oesTadGenericKey, + (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, nullptr, dimensionLength, tadPack.platformShapeInfo(), tadPack.platformOffsets(), descending), + LIBND4J_TYPES, LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "sortTadValue(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "sortTadValue(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -2443,15 +2427,23 @@ void sortTad(Nd4jPointer *extraPointers, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending) { - // to be implemented - auto stream = reinterpret_cast(extraPointers[1]); - auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext(): reinterpret_cast(extraPointers[0]); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - dim3 launchDims((int) tadPack.numberOfTads(), 512, 33768); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - BUILD_SINGLE_SELECTOR(xType, oesTadGeneric, (launchDims, stream, dX, dXShapeInfo, nullptr, dimensionLength, tadShapeInfo, tadOffsets, descending), LIBND4J_TYPES); + try { + // to be implemented + auto stream = reinterpret_cast(extraPointers[1]); + auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext() + : reinterpret_cast(extraPointers[0]); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + dim3 launchDims((int) tadPack.numberOfTads(), 512, 33768); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + BUILD_SINGLE_SELECTOR(xType, oesTadGeneric, + (launchDims, stream, dX, dXShapeInfo, nullptr, dimensionLength, tadShapeInfo, tadOffsets, descending), + LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "sortTad(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "sortTad(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortCooIndices(Nd4jPointer *extraPointers, Nd4jLong *indices, void *values, Nd4jLong length, int rank) { @@ -2464,21 +2456,29 @@ Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, Nd4jLong N, int *dz, float threshold) { + try { - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - int *resultPointer = reinterpret_cast(extraPointers[2]); - int *reductionPointer = reinterpret_cast(extraPointers[3]); + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + int *resultPointer = reinterpret_cast(extraPointers[2]); + int *reductionPointer = reinterpret_cast(extraPointers[3]); - dim3 launchDims(512, 512, 32768); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, cudaEncodeBitmapGeneric, (launchDims, stream, dx, N, dz, resultPointer, reductionPointer, threshold), LIBND4J_TYPES); + dim3 launchDims(512, 512, 32768); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + BUILD_SINGLE_SELECTOR(xType, cudaEncodeBitmapGeneric, + (launchDims, stream, dx, N, dz, resultPointer, reductionPointer, threshold), + LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "encodeBitmapFloat(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "encodeBitmapFloat(...) failed"); - Nd4jLong dZ = (Nd4jLong) resultPointer[0]; - resultPointer[0] = 0; + Nd4jLong dZ = (Nd4jLong) resultPointer[0]; + resultPointer[0] = 0; - return dZ; + return dZ; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 0; + } } @@ -2486,13 +2486,17 @@ void decodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo) { + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + dim3 launchDims(512, 512, 16384); + auto xType = nd4j::ArrayOptions::dataType(zShapeInfo); + BUILD_SINGLE_SELECTOR(xType, cudaDecodeBitmapGeneric, (launchDims, stream, dx, N, dz), LIBND4J_TYPES); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - dim3 launchDims(512, 512, 16384); - auto xType = nd4j::ArrayOptions::dataType(zShapeInfo); - BUILD_SINGLE_SELECTOR(xType, cudaDecodeBitmapGeneric, (launchDims, stream, dx, N, dz), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "decodeBitmapFloat(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "decodeBitmapFloat(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length) { @@ -2505,7 +2509,13 @@ void munmapFile(Nd4jPointer *extraPointers, Nd4jLong* ptrMap, Nd4jLong length) { nd4j::graph::ResultWrapper* executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer) { - return nd4j::graph::GraphExecutioner::executeFlatBuffer(flatBufferPointer); + try { + return nd4j::graph::GraphExecutioner::executeFlatBuffer(flatBufferPointer); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getResultWrapperSize(nd4j::graph::ResultWrapper* ptr) { @@ -2560,9 +2570,16 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D } nd4j::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs); + return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, + iArgs, numIArgs, bArgs, numBArgs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::DeclarableOp* op, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { @@ -2584,9 +2601,15 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D } nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); + return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getShapeListSize(nd4j::ShapeList* list) { @@ -2681,39 +2704,57 @@ static FORCEINLINE Nd4jStatus realExec(nd4j::ops::DeclarableOp* op, Nd4jPointer* int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); + return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, + numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - auto context = reinterpret_cast(opContext); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + auto context = reinterpret_cast(opContext); - auto result = op->execute(context); + auto result = op->execute(context); - auto res = cudaStreamSynchronize(*context->launchContext()->getCudaStream()); - if (res != 0) - throw nd4j::cuda_exception::build("customOp execution failed", res); + auto res = cudaStreamSynchronize(*context->launchContext()->getCudaStream()); + if (res != 0) + throw nd4j::cuda_exception::build("customOp execution failed", res); - for (auto v:context->fastpath_in()) { - v->syncToDevice(); + for (auto v:context->fastpath_in()) { + v->syncToDevice(); + } + + for (auto v:context->fastpath_out()) { + v->syncToDevice(); + } + + return result; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; } - - for (auto v:context->fastpath_out()) { - v->syncToDevice(); - } - - return result; } int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer) { + try { + auto graph = nd4j::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); - auto graph = nd4j::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); + nd4j::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); - nd4j::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); - - return ND4J_STATUS_OK; + return ND4J_STATUS_OK; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } @@ -2764,7 +2805,13 @@ static VariablesSet* executeStoredGraphT(Nd4jPointer *extraPointers, Nd4jLong gr } VariablesSet* executeStoredGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) { - return executeStoredGraphT(extraPointers, graphId, inputBuffers, inputShapes, inputIndices, numInputs); + try { + return executeStoredGraphT(extraPointers, graphId, inputBuffers, inputShapes, inputIndices, numInputs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getVariablesSetSize(nd4j::graph::VariablesSet* set) { @@ -2800,10 +2847,15 @@ void* getVariableBuffer(nd4j::graph::Variable* variable) { } int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId) { + try { + nd4j::graph::GraphHolder::getInstance()->dropGraphAny(graphId); - nd4j::graph::GraphHolder::getInstance()->dropGraphAny(graphId); - - return ND4J_STATUS_OK; + return ND4J_STATUS_OK; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } void deletePointerArray(Nd4jPointer pointer) { @@ -2918,8 +2970,15 @@ Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, nd4j::graph::GraphS Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs) { - - return execCustomOpWithScope(extraPointers, reinterpret_cast(state), opHash, scopes, numScopes, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs); + try { + return execCustomOpWithScope(extraPointers, reinterpret_cast(state), opHash, scopes, + numScopes, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, + numOutputs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } void deleteResultWrapper(Nd4jPointer ptr) { @@ -2937,181 +2996,186 @@ int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer dX, Nd4jLong *dXSh * void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer dX, long N, int dstType, Nd4jPointer dZ); */ void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer dX, Nd4jLong N, int dstType, Nd4jPointer dZ) { - auto dx = reinterpret_cast(dX); - auto dz = reinterpret_cast(dZ); + try { + auto dx = reinterpret_cast(dX); + auto dz = reinterpret_cast(dZ); - if (srcType == ND4J_FLOAT8) { - if (dstType == ND4J_FLOAT8) { - // convertKernel(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { + if (srcType == ND4J_FLOAT8) { + if (dstType == ND4J_FLOAT8) { + // convertKernel(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { - } else if (dstType == ND4J_FLOAT32) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_INT8) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - //convertKernel(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { - // TODO: eventually we might want to add it - } else if (dstType == ND4J_FLOAT32) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_UINT8) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { - // TODO: still might want to add - } else if (dstType == ND4J_FLOAT32) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_FLOAT16) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { - // TODO: .... ^^^ - } else if (dstType == ND4J_FLOAT32) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_THRESHOLD) { - //nd4j::convertToThreshold(nullptr, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_INT16) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { - // TODO... - } else if (dstType == ND4J_FLOAT32) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else { - printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_FLOAT24) { + } else if (dstType == ND4J_FLOAT32) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_INT8) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + //convertKernel(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { + // TODO: eventually we might want to add it + } else if (dstType == ND4J_FLOAT32) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_UINT8) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { + // TODO: still might want to add + } else if (dstType == ND4J_FLOAT32) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_FLOAT16) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { + // TODO: .... ^^^ + } else if (dstType == ND4J_FLOAT32) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_THRESHOLD) { + //nd4j::convertToThreshold(nullptr, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_INT16) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { + // TODO... + } else if (dstType == ND4J_FLOAT32) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else { + printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_FLOAT24) { - } else if (srcType == ND4J_FLOAT32) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { + } else if (srcType == ND4J_FLOAT32) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { - } else if (dstType == ND4J_DOUBLE) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_THRESHOLD) { - //nd4j::convertToThreshold(nullptr, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_DOUBLE) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { + } else if (dstType == ND4J_DOUBLE) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_THRESHOLD) { + //nd4j::convertToThreshold(nullptr, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_DOUBLE) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { - } else if (dstType == ND4J_FLOAT32) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - // - } else if (dstType == ND4J_THRESHOLD) { - //nd4j::convertToThreshold(nullptr, dx, N, dz); + } else if (dstType == ND4J_FLOAT32) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + // + } else if (dstType == ND4J_THRESHOLD) { + //nd4j::convertToThreshold(nullptr, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_THRESHOLD) { + if (dstType == ND4J_FLOAT16) { + //nd4j::convertFromThreshold(nullptr, dx, N, dz); + } else if (dstType == ND4J_FLOAT32) { + //nd4j::convertFromThreshold(nullptr, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + //nd4j::convertFromThreshold(nullptr, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } } else { nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); } - } else if (srcType == ND4J_THRESHOLD) { - if (dstType == ND4J_FLOAT16) { - //nd4j::convertFromThreshold(nullptr, dx, N, dz); - } else if (dstType == ND4J_FLOAT32) { - //nd4j::convertFromThreshold(nullptr, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - //nd4j::convertFromThreshold(nullptr, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } } @@ -3209,20 +3273,31 @@ void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, int* hIindexes, int* dIndexes) { + try { + auto stream = reinterpret_cast(extraPointers[1]); - auto stream = reinterpret_cast(extraPointers[1]); + nd4j::DataType type = ArrayOptions::dataType(hXShapeInfo); - nd4j::DataType type = ArrayOptions::dataType(hXShapeInfo); - - BUILD_SINGLE_SELECTOR(type, scatterUpdateCudaLauncher, (stream, opCode, numOfSubArrs, dX, dXShapeInfo, dXOffsets, dY, dYShapeInfo, dYOffsets, dIndexes), LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "scatterUpdate(...) failed"); + BUILD_SINGLE_SELECTOR(type, scatterUpdateCudaLauncher, + (stream, opCode, numOfSubArrs, dX, dXShapeInfo, dXOffsets, dY, dYShapeInfo, dYOffsets, dIndexes), + LIBND4J_TYPES); + nd4j::DebugHelper::checkErrorCode(stream, "scatterUpdate(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo) { - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - auto p = reinterpret_cast(debugInfo); - NDArray array(buffer, specialBuffer, shapeInfo, &lc); - nd4j::DebugHelper::retrieveDebugStatistics(p, &array); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + auto p = reinterpret_cast(debugInfo); + NDArray array(buffer, specialBuffer, shapeInfo, &lc); + nd4j::DebugHelper::retrieveDebugStatistics(p, &array); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void __global__ tryPointerKernel(void* p, int len) { @@ -3239,26 +3314,37 @@ void __global__ tryPointerKernel(void* p, int len) { } void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { + try { + cudaStream_t stream; + cudaStreamCreate(&stream); - cudaStream_t stream; - cudaStreamCreate(&stream); + tryPointerKernel << < 256, 512, len + 64, stream >> > (p, len); + auto e = cudaStreamSynchronize(stream); - tryPointerKernel<<<256, 512, len+64, stream>>>(p, len); - auto e = cudaStreamSynchronize(stream); + if (e != 0) + throw nd4j::cuda_exception::build("tryPointer failed", e); - if (e != 0) - throw nd4j::cuda_exception::build("tryPointer failed", e); - - cudaStreamDestroy(stream); + cudaStreamDestroy(stream); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } int dataTypeFromNpyHeader(void *header) { return (int) cnpy::dataTypeFromHeader(reinterpret_cast(header)); } nd4j::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty) { - auto buffer = new ConstantDataBuffer(); - *buffer = nd4j::ConstantShapeHelper::getInstance()->bufferForShapeInfo(ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); - return buffer; + try { + auto buffer = new ConstantDataBuffer(); + *buffer = nd4j::ConstantShapeHelper::getInstance()->bufferForShapeInfo( + ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); + return buffer; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } void deleteShapeBuffer(nd4j::ConstantDataBuffer* ptr) { @@ -3359,60 +3445,79 @@ void deleteRandomGenerator(nd4j::graph::RandomGenerator* ptr) { Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { - cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); - unsigned int shapeSize = arr.shape.size(); - std::vector shape(shapeSize); - bool _empty = false; - for(unsigned int i = 0; i < shapeSize; i++) { - shape[i] = arr.shape[i]; + try { + cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); + unsigned int shapeSize = arr.shape.size(); + std::vector shape(shapeSize); + bool _empty = false; + for (unsigned int i = 0; i < shapeSize; i++) { + shape[i] = arr.shape[i]; - if (arr.shape[i] == 0) - _empty = true; + if (arr.shape[i] == 0) + _empty = true; + } + + auto dtype = cnpy::dataTypeFromHeader(reinterpret_cast(npyArray)); + + Nd4jLong *shapeBuffer; + if (shape.size() == 1 && shape[0] == 0) { + // scalar case + shapeBuffer = nd4j::ShapeBuilders::createScalarShapeInfo(dtype); + } else if (_empty) { + if (shapeSize > 0) + shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); + else + shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype); + } else { + shapeBuffer = nd4j::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); + } + return reinterpret_cast(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, + true)); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; } - - auto dtype = cnpy::dataTypeFromHeader(reinterpret_cast(npyArray)); - - Nd4jLong *shapeBuffer; - if (shape.size() == 1 && shape[0] == 0) { - // scalar case - shapeBuffer = nd4j::ShapeBuilders::createScalarShapeInfo(dtype); - } else if (_empty) { - if (shapeSize > 0) - shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); - else - shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype); - } else { - shapeBuffer = nd4j::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); - } - return reinterpret_cast(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); } const char* runLightBenchmarkSuit(bool printOut) { - nd4j::LightBenchmarkSuit suit; - auto result = suit.runSuit(); + try { + nd4j::LightBenchmarkSuit suit; + auto result = suit.runSuit(); - if (printOut) - nd4j_printf("%s\n", result.data()); + if (printOut) + nd4j_printf("%s\n", result.data()); - auto chars = new char[result.length()+1]; - std::memcpy(chars, result.data(), result.length()); - chars[result.length()] = (char) 0x0; + auto chars = new char[result.length() + 1]; + std::memcpy(chars, result.data(), result.length()); + chars[result.length()] = (char) 0x0; - return chars; + return chars; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } const char* runFullBenchmarkSuit(bool printOut) { - nd4j::FullBenchmarkSuit suit; - auto result = suit.runSuit(); + try { + nd4j::FullBenchmarkSuit suit; + auto result = suit.runSuit(); - if (printOut) - nd4j_printf("%s\n", result.data()); + if (printOut) + nd4j_printf("%s\n", result.data()); - auto chars = new char[result.length()+1]; - std::memcpy(chars, result.data(), result.length()); - chars[result.length()] = (char) 0x0; + auto chars = new char[result.length() + 1]; + std::memcpy(chars, result.data(), result.length()); + chars[result.length()] = (char) 0x0; - return chars; + return chars; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getCachedMemory(int deviceId) { @@ -3449,4 +3554,12 @@ Nd4jPointer lcBlasHandle(OpaqueLaunchContext* lc) { Nd4jPointer lcSolverHandle(OpaqueLaunchContext* lc) { return lc->getCusolverHandle(); +} + +int lastErrorCode() { + return nd4j::LaunchContext::defaultContext()->errorReference()->errorCode(); +} + +const char* lastErrorMessage() { + return nd4j::LaunchContext::defaultContext()->errorReference()->errorMessage(); } \ No newline at end of file diff --git a/libnd4j/include/execution/ContextBuffers.h b/libnd4j/include/execution/ContextBuffers.h index 130354070..67c428d27 100644 --- a/libnd4j/include/execution/ContextBuffers.h +++ b/libnd4j/include/execution/ContextBuffers.h @@ -23,6 +23,7 @@ #include #include +#include namespace nd4j { class ND4J_EXPORT ContextBuffers { @@ -32,6 +33,7 @@ namespace nd4j { void* _allocationPointer = nullptr; void* _execStream = nullptr; void* _specialStream = nullptr; + sd::ErrorReference _errorReference; bool _allocated = false; bool _initialized = false; @@ -60,6 +62,8 @@ namespace nd4j { void setScalarBuffer(void* pointer); void setAllocationBuffer(void* pointer); + sd::ErrorReference* errorReference(); + void triggerOwnership(bool isOwner); int deviceId(); diff --git a/libnd4j/include/helpers/ProviderRNG.h b/libnd4j/include/execution/ErrorReference.h similarity index 57% rename from libnd4j/include/helpers/ProviderRNG.h rename to libnd4j/include/execution/ErrorReference.h index e82f6ac98..2b68d5855 100644 --- a/libnd4j/include/helpers/ProviderRNG.h +++ b/libnd4j/include/execution/ErrorReference.h @@ -15,32 +15,32 @@ ******************************************************************************/ // -// Created by Yurii Shyrma on 27.01.2018 +// @author raver119@gmail.com // -#ifndef LIBND4J_PROVIDERRNG_H -#define LIBND4J_PROVIDERRNG_H +#ifndef DEV_TESTS_ERRORREFERENCE_H +#define DEV_TESTS_ERRORREFERENCE_H -#include -#include - -namespace nd4j { - -class ProviderRNG { - - protected: - random::RandomBuffer* _rng; - static std::mutex _mutex; - ProviderRNG(); +#include +#include +namespace sd { + class ND4J_EXPORT ErrorReference { + private: + int _errorCode = 0; + std::string _errorMessage; public: - ProviderRNG(const ProviderRNG&) = delete; - void operator=(const ProviderRNG&) = delete; - random::RandomBuffer* getRNG() const; - static ProviderRNG& getInstance(); -}; + ErrorReference() = default; + ~ErrorReference() = default; + int errorCode(); + const char* errorMessage(); + void setErrorCode(int errorCode); + void setErrorMessage(std::string message); + void setErrorMessage(const char* message); + }; } -#endif //LIBND4J_PROVIDERRNG_H + +#endif //DEV_TESTS_ERRORREFERENCE_H diff --git a/libnd4j/include/execution/LaunchContext.h b/libnd4j/include/execution/LaunchContext.h index 23165fa0e..5fae2162c 100644 --- a/libnd4j/include/execution/LaunchContext.h +++ b/libnd4j/include/execution/LaunchContext.h @@ -37,6 +37,7 @@ #include #include #include +#include @@ -97,9 +98,12 @@ class ND4J_EXPORT LaunchContext { int getDeviceID() const {return _deviceID;} void setDeviceID(int deviceID) { _deviceID = deviceID; } + sd::ErrorReference* errorReference(); static bool isInitialized(); static void releaseBuffers(); + + static LaunchContext* defaultContext(); diff --git a/libnd4j/include/execution/cpu/ContextBuffers.cpp b/libnd4j/include/execution/cpu/ContextBuffers.cpp index 3bf0a01eb..0038990c2 100644 --- a/libnd4j/include/execution/cpu/ContextBuffers.cpp +++ b/libnd4j/include/execution/cpu/ContextBuffers.cpp @@ -99,4 +99,8 @@ namespace nd4j { ContextBuffers& ContextBuffers::operator=(ContextBuffers&& other) { return *this; } + + sd::ErrorReference* ContextBuffers::errorReference() { + return &_errorReference; + } } \ No newline at end of file diff --git a/libnd4j/include/execution/cpu/LaunchContext.cpp b/libnd4j/include/execution/cpu/LaunchContext.cpp index 3ee460350..60e29c7ca 100644 --- a/libnd4j/include/execution/cpu/LaunchContext.cpp +++ b/libnd4j/include/execution/cpu/LaunchContext.cpp @@ -23,7 +23,11 @@ #include #include +#ifdef IOS_BUILD nd4j::ContextBuffers contextBuffers = nd4j::ContextBuffers(); +#else +thread_local nd4j::ContextBuffers contextBuffers = nd4j::ContextBuffers(); +#endif namespace nd4j { @@ -65,4 +69,8 @@ namespace nd4j { void LaunchContext::releaseBuffers() { // } + + sd::ErrorReference* LaunchContext::errorReference() { + return contextBuffers.errorReference(); + } } \ No newline at end of file diff --git a/libnd4j/include/execution/cuda/ContextBuffers.cu b/libnd4j/include/execution/cuda/ContextBuffers.cu index 84db0c284..895bb6623 100644 --- a/libnd4j/include/execution/cuda/ContextBuffers.cu +++ b/libnd4j/include/execution/cuda/ContextBuffers.cu @@ -220,5 +220,9 @@ namespace nd4j { bool ContextBuffers::isInitialized() { return _initialized; } + + sd::ErrorReference* ContextBuffers::errorReference() { + return &_errorReference; + } } diff --git a/libnd4j/include/execution/cuda/LaunchContext.cu b/libnd4j/include/execution/cuda/LaunchContext.cu index 1292f756c..9d9f2c506 100644 --- a/libnd4j/include/execution/cuda/LaunchContext.cu +++ b/libnd4j/include/execution/cuda/LaunchContext.cu @@ -168,4 +168,8 @@ LaunchContext::LaunchContext() { bool LaunchContext::isInitialized() { return contextBuffers.isInitialized(); } + + sd::ErrorReference* LaunchContext::errorReference() { + return contextBuffers.errorReference(); + } } \ No newline at end of file diff --git a/libnd4j/include/helpers/impl/ProviderRNG.cpp b/libnd4j/include/execution/impl/ErrorReference.cpp similarity index 52% rename from libnd4j/include/helpers/impl/ProviderRNG.cpp rename to libnd4j/include/execution/impl/ErrorReference.cpp index 216aa3a32..7b3409aa1 100644 --- a/libnd4j/include/helpers/impl/ProviderRNG.cpp +++ b/libnd4j/include/execution/impl/ErrorReference.cpp @@ -15,37 +15,32 @@ ******************************************************************************/ // -// Created by Yurii Shyrma on 27.01.2018 +// @author raver119@gmail.com // -#include -#include +#include -namespace nd4j { - -ProviderRNG::ProviderRNG() { +namespace sd { + int ErrorReference::errorCode() { + return _errorCode; + } - Nd4jLong *buffer = new Nd4jLong[100000]; - std::lock_guard lock(_mutex); - #ifndef __CUDABLAS__ - // at this moment we don't have streams etc, so let's just skip this for now - _rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, 123, 100000, (Nd4jPointer) buffer); - #endif - // if(_rng != nullptr) -} - -ProviderRNG& ProviderRNG::getInstance() { - - static ProviderRNG instance; - return instance; -} - -random::RandomBuffer* ProviderRNG::getRNG() const { - - return _rng; -} - -std::mutex ProviderRNG::_mutex; - + const char* ErrorReference::errorMessage() { + // since we're fetching error message - error code will be assumed consumed & nullified + _errorCode = 0; + return _errorMessage.c_str(); + } + + void ErrorReference::setErrorCode(int errorCode) { + _errorCode = errorCode; + } + + void ErrorReference::setErrorMessage(std::string message) { + _errorMessage = message; + } + + void ErrorReference::setErrorMessage(const char* message) { + _errorMessage = std::string(message); + } } diff --git a/libnd4j/include/ops/declarable/generic/convo/ismax.cpp b/libnd4j/include/ops/declarable/generic/convo/ismax.cpp index ad5a485e1..13de73e81 100644 --- a/libnd4j/include/ops/declarable/generic/convo/ismax.cpp +++ b/libnd4j/include/ops/declarable/generic/convo/ismax.cpp @@ -45,7 +45,7 @@ DECLARE_SYN(IsMax, ismax); DECLARE_TYPES(ismax) { getOpDescriptor() ->setAllowedInputTypes(0, DataType::ANY) - ->setAllowedOutputTypes(0, DataType::BOOL); + ->setAllowedOutputTypes(0, DataType::ANY); } diff --git a/libnd4j/include/ops/declarable/generic/nlp/cbow.cpp b/libnd4j/include/ops/declarable/generic/nlp/cbow.cpp index 2ae69e296..21906f4eb 100644 --- a/libnd4j/include/ops/declarable/generic/nlp/cbow.cpp +++ b/libnd4j/include/ops/declarable/generic/nlp/cbow.cpp @@ -84,7 +84,8 @@ namespace nd4j { ->setAllowedInputTypes(11, nd4j::DataType::INT64) ->setAllowedInputTypes(12, nd4j::DataType::INT32) ->setAllowedInputTypes(13, nd4j::DataType::INT32) - ->setAllowedInputTypes(14, {ALL_FLOATS}); + ->setAllowedInputTypes(14, {ALL_FLOATS}) + ->setAllowedOutputTypes(nd4j::DataType::ANY); } } } diff --git a/libnd4j/include/ops/declarable/generic/nlp/skipgram.cpp b/libnd4j/include/ops/declarable/generic/nlp/skipgram.cpp index 78c6e3818..a97e1a79e 100644 --- a/libnd4j/include/ops/declarable/generic/nlp/skipgram.cpp +++ b/libnd4j/include/ops/declarable/generic/nlp/skipgram.cpp @@ -79,7 +79,7 @@ namespace nd4j { ->setAllowedInputTypes(9, {ALL_FLOATS}) ->setAllowedInputTypes(10, nd4j::DataType::INT64) ->setAllowedInputTypes(11, {ALL_FLOATS}) - ->setAllowedOutputTypes(nd4j::DataType::INT8); + ->setAllowedOutputTypes(nd4j::DataType::ANY); } /* diff --git a/libnd4j/include/ops/declarable/generic/nn/softmax.cpp b/libnd4j/include/ops/declarable/generic/nn/softmax.cpp index 08dba09f2..d96f97c10 100644 --- a/libnd4j/include/ops/declarable/generic/nn/softmax.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/softmax.cpp @@ -70,7 +70,7 @@ CONFIGURABLE_OP_IMPL(softmax_bp, 2, 1, true, 0, 0) { DECLARE_TYPES(softmax_bp) { getOpDescriptor() - ->setAllowedInputTypes(DataType::ANY) + ->setAllowedInputTypes({ALL_FLOATS}) ->setAllowedOutputTypes({ALL_FLOATS}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu index fab9577d6..cf891feab 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu @@ -30,51 +30,9 @@ namespace nd4j { namespace ops { namespace helpers { - template - inline void __device__ indexSwap(T* arr, Nd4jLong idx1, Nd4jLong idx2) { - T tmp = arr[idx1]; - arr[idx1] = arr[idx2]; - arr[idx2] = tmp; - } -// template -// void reverseArray(nd4j::LaunchContext * context, void* inArr, Nd4jLong *inShapeBuffer, void *result, Nd4jLong *zShapeBuffer, int numOfElemsToReverse = 0); - - ///////////////////////////////////////////////////////////////////////////////////// - template - static __global__ void reverseArrayInplaceKernel(void *input, Nd4jLong *inputShape, Nd4jLong numOfElemsToReverse) { - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; - const auto step = gridDim.x * blockDim.x; - __shared__ Nd4jLong length; - __shared__ int linearStatus; - __shared__ T* inputArr; - if (threadIdx.x == 0) { - length = shape::length(inputShape); - linearStatus = shape::elementWiseStride(inputShape); - inputArr = reinterpret_cast(input); - } - __syncthreads(); - - for (Nd4jLong e = tid; e < numOfElemsToReverse / 2; e += step) { - if (linearStatus == 1) { - auto idx = numOfElemsToReverse - e - 1; - indexSwap(inputArr, e, idx); - } - else if (linearStatus > 1) { - auto idx1 = (numOfElemsToReverse - e - 1) * linearStatus; - Nd4jLong idx2 = e * linearStatus; - indexSwap(inputArr, idx1, idx2); - } - else { - auto inOffset = shape::getIndexOffset(e, inputShape, length); - auto outOffset = shape::getIndexOffset(numOfElemsToReverse - e - 1, inputShape, length); - indexSwap(inputArr, inOffset, outOffset); - } - } - } - template static __global__ void reverseArrayKernel(void* input, Nd4jLong *inputShape, void* output, Nd4jLong *outputShape, Nd4jLong numOfElemsToReverse) { - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; + const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; __shared__ Nd4jLong length; __shared__ int linearStatus; @@ -93,51 +51,47 @@ namespace helpers { } __syncthreads(); - for (Nd4jLong e = tid; e < length; e += step) { - if (e < numOfElemsToReverse ) { - if (linearStatus == 1) { - auto idx = numOfElemsToReverse - e - 1; - outputArr[idx] = inputArr[e]; - } else if (linearStatus > 1) { - auto idx1 = (numOfElemsToReverse - e - 1) * linearStatus; - Nd4jLong idx2 = e * linearStatus; - outputArr[idx1] = inputArr[idx2]; - } else { - auto inOffset = shape::getIndexOffset(e, inputShape, length); - auto outOffset = shape::getIndexOffset(numOfElemsToReverse - e - 1, outputShape, length); - outputArr[outOffset] = inputArr[inOffset]; - } - } - else { - if (linearStatus == 1) { - outputArr[e] = inputArr[e]; - } else if (linearStatus > 1) { - auto idx1 = e * linearStatus; - Nd4jLong idx2 = e * linearStatus; - outputArr[idx1] = inputArr[idx2]; - } else { - auto inOffset = shape::getIndexOffset(e, inputShape, length); - auto outOffset = shape::getIndexOffset(e, outputShape, length); - outputArr[outOffset] = inputArr[inOffset]; - } - } + auto odd = length % 2 != 0; + auto limit = length / 2; + + for (Nd4jLong e = tid; e < limit; e += step) { + // we're calculating offsets within input array + auto fOffset = shape::getIndexOffset(e, inputShape, length); + auto lOffset = shape::getIndexOffset(numOfElemsToReverse - e - 1, inputShape, length); + + // now we're storing input values + auto v1 = inputArr[fOffset]; + auto v2 = inputArr[lOffset]; + + // now we're calculating offsets within output array + auto zfOffset = shape::getIndexOffset(e, outputShape, length); + auto zlOffset = shape::getIndexOffset(numOfElemsToReverse - e - 1, outputShape, length); + + // and saving values to output arrays + outputArr[zfOffset] = v2; + outputArr[zlOffset] = v1; + + //printf("TID: %i; E: %lld; z[%lld], z[%lld] = x[%lld], x[%lld];\n", tid, e, zfOffset, zlOffset, lOffset, fOffset); } - //printf("\n"); + // in case of odd array we'll have to move middle value + if (odd && tid == 0) { + auto xOffset = shape::getIndexOffset(limit, inputShape, length); + auto zOffset = shape::getIndexOffset(limit, outputShape, length); + + outputArr[zOffset] = inputArr[xOffset]; + //printf("TID: %i; E: %lld; z[%lld] = x[%lld];\n", tid, limit, zOffset, xOffset); + } } template - static void reverseArray(nd4j::LaunchContext * context, NDArray* input, NDArray* output, int numOfElemsToReverse) { + static void reverseArray(nd4j::LaunchContext * context, NDArray* input, NDArray* output, Nd4jLong numOfElemsToReverse) { auto stream = context->getCudaStream(); Nd4jLong numOfReverse = numOfElemsToReverse; if (numOfElemsToReverse == 0) numOfReverse = input->lengthOf(); - if (input == output) { - reverseArrayInplaceKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), numOfReverse); - } - else { - reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfReverse); - } + + reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfReverse); } @@ -221,7 +175,7 @@ namespace helpers { delete listIn; } -BUILD_SINGLE_TEMPLATE(template void reverseArray, (nd4j::LaunchContext * context, NDArray *inArr, NDArray *outArr, int numOfElemsToReverse), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void reverseArray, (nd4j::LaunchContext * context, NDArray *inArr, NDArray *outArr, Nd4jLong numOfElemsToReverse), LIBND4J_TYPES); } } diff --git a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp index 5d29ed826..4fe28df8c 100644 --- a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp +++ b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp @@ -19,7 +19,6 @@ // #include -#include #include #include #include @@ -190,32 +189,6 @@ namespace nd4j { auto outSha = this->calculateOutputShape(&inSha, ctx); results = outSha->size(); - // we must "validate" our output shapes - /* - for (int e = 0; e < results; e++) { - auto ptr = outSha->at(e); - - // checking for the same pointer used twice - for (int i = 0; i < results; i++){ - if (i == e) - continue; - - auto com = outSha->at(i); - - if (ptr == com) - throw std::runtime_error("ShapeFunction returned same shape instance twice [" + *_descriptor->getOpName() + "]"); - } - - // checking for input pointer returned back - for (int i = 0; i < inSha.size(); i++){ - auto com = inSha.at(i); - - if (ptr == com) - throw std::runtime_error("ShapeFunction returned input shape instance as output [" + *_descriptor->getOpName() + "]"); - } - } - */ - // optionally saving shapeTime if (Environment::getInstance()->isProfiling() && node != nullptr) { shapeEnd = std::chrono::system_clock::now(); @@ -355,75 +328,139 @@ namespace nd4j { // rolling over inputs first int cnt = 0, inT = 0; std::vector inputTypes(block.width()); - for (auto &p: *(block.inputs())) { - auto var = block.variable(p); - - // we're not checking validity, if ANY types were explicitly allowed - //if (block.dataType(cnt) == nd4j::DataType::ANY) - // continue; - - // only validating non-null variables - if (var != nullptr && var->hasNDArray()) { - auto array = var->getNDArray(); - + if (block.isFastPath()) { + for (auto array: block.fastpath_in()) { inputTypes[inT++] = array->dataType(); if (!_descriptor->checkInputMatch(cnt, array->dataType())) { auto ctype = DataTypeUtils::asString(array->dataType()); - nd4j_printf("Op [%s] failed check for input [%i], DataType: [%s]\n", _descriptor->getOpName()->data(), cnt, ctype.c_str()); + nd4j_printf("Op [%s] failed check for input [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), cnt, ctype.c_str()); return ND4J_STATUS_BAD_ARGUMENTS; } + cnt++; } + } else { + for (auto &p: *(block.inputs())) { + auto var = block.variable(p); - cnt++; - } - - // checking optionally available outputs - auto varSpace = block.getVariableSpace(); - for (int index = 0; index < DataTypeUtils::max(); index++) { - if (varSpace != nullptr && varSpace->hasVariable(block.nodeId(), index)) { - auto var = block.variable(block.nodeId(), index); + // we're not checking validity, if ANY types were explicitly allowed + //if (block.dataType(cnt) == nd4j::DataType::ANY) + // continue; // only validating non-null variables if (var != nullptr && var->hasNDArray()) { auto array = var->getNDArray(); - auto cType = array->dataType(); - if (_descriptor->isSameMode()) { - - if (index >= block.width()) { - auto iv = block.variable(0); - - if (iv->getNDArray()->dataType() != cType) { - auto t = DataTypeUtils::asString(cType); - nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", _descriptor->getOpName()->data(), index, t.c_str()); - return ND4J_STATUS_BAD_ARGUMENTS; - } - } else { - // for same mode, output type must be the same as input type - auto iv = block.variable(index); - - if (iv->getNDArray()->dataType() != cType) { - auto t = DataTypeUtils::asString(cType); - nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", _descriptor->getOpName()->data(), index, t.c_str()); - return ND4J_STATUS_BAD_ARGUMENTS; - } - } - } else if (_descriptor->isInherit(index)) { - // in inherit mode, output type must be the same as one of input types - if (std::find(inputTypes.begin(), inputTypes.end(), cType) == inputTypes.end()) { - auto t = DataTypeUtils::asString(cType); - nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s].\n", _descriptor->getOpName()->data(), index, t.c_str()); - return ND4J_STATUS_BAD_ARGUMENTS; - } - - } else if (!_descriptor->checkOutputMatch(index, cType)) { - auto t = DataTypeUtils::asString(cType); - nd4j_printf("Op [%s] failed check for output [%i], DataType: [%i];\n", _descriptor->getOpName()->data(), index, t.c_str()); + inputTypes[inT++] = array->dataType(); + if (!_descriptor->checkInputMatch(cnt, array->dataType())) { + auto ctype = DataTypeUtils::asString(array->dataType()); + nd4j_printf("Op [%s] failed check for input [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), cnt, ctype.c_str()); return ND4J_STATUS_BAD_ARGUMENTS; } } - } else - break; + + cnt++; + } + } + + if (block.isFastPath()) { + int index = 0; + for (auto array: block.fastpath_out()) { + auto cType = array->dataType(); + + if (_descriptor->isSameMode()) { + + if (index >= block.width()) { + auto ia = block.fastpath_in()[0]; + + if (ia->dataType() != cType) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + } else { + // for same mode, output type must be the same as input type + auto ia = block.fastpath_in()[index]; + + if (ia->dataType() != cType) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + } + } else if (_descriptor->isInherit(index)) { + // in inherit mode, output type must be the same as one of input types + if (std::find(inputTypes.begin(), inputTypes.end(), cType) == inputTypes.end()) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s].\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + + } else if (!_descriptor->checkOutputMatch(index, cType)) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s];\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + index++; + } + } else { + // checking optionally available outputs + auto varSpace = block.getVariableSpace(); + for (int index = 0; index < DataTypeUtils::max(); index++) { + if (varSpace != nullptr && varSpace->hasVariable(block.nodeId(), index)) { + auto var = block.variable(block.nodeId(), index); + + // only validating non-null variables + if (var != nullptr && var->hasNDArray()) { + auto array = var->getNDArray(); + auto cType = array->dataType(); + + if (_descriptor->isSameMode()) { + + if (index >= block.width()) { + auto iv = block.variable(0); + + if (iv->getNDArray()->dataType() != cType) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + } else { + // for same mode, output type must be the same as input type + auto iv = block.variable(index); + + if (iv->getNDArray()->dataType() != cType) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + } + } else if (_descriptor->isInherit(index)) { + // in inherit mode, output type must be the same as one of input types + if (std::find(inputTypes.begin(), inputTypes.end(), cType) == inputTypes.end()) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s].\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + + } else if (!_descriptor->checkOutputMatch(index, cType)) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s];\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + } + } else + break; + } } diff --git a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp index ef3710371..d0d67000b 100644 --- a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp @@ -400,6 +400,32 @@ TEST_F(JavaInteropTests, Test_Synonyms_3) { ASSERT_EQ(nameRef, name); } +TEST_F(JavaInteropTests, Test_FastPath_Validation_1) { + auto x = NDArrayFactory::create('c', {4}, {1, 2, 3, 4}); + auto z = NDArrayFactory::create('c', {4}, {1, 2, 3, 4}); + + Context ctx(1); + ctx.setInputArray(0, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo()); + ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + + nd4j::ops::softmax op; + auto status = op.execute(&ctx); + ASSERT_NE(Status::OK(), status); +} + +TEST_F(JavaInteropTests, Test_FastPath_Validation_2) { + auto x = NDArrayFactory::create('c', {4}, {1.f, 2.f, 3.f, 4.f}); + auto z = NDArrayFactory::create('c', {4}, {1, 2, 3, 4}); + + Context ctx(1); + ctx.setInputArray(0, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo()); + ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + + nd4j::ops::softmax op; + auto status = op.execute(&ctx); + ASSERT_NE(Status::OK(), status); +} + /* TEST_F(JavaInteropTests, test_avgpooling_edge_1) { int inOutH = 35; diff --git a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp index fe190d9bb..9aac42ddf 100644 --- a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp @@ -992,81 +992,6 @@ TEST_F(NativeOpsTests, ScalarTadTest_2) { ASSERT_TRUE(exp.e(5) == z.e(5) && exp.e(15)); } -TEST_F(NativeOpsTests, FlattenTest_1) { - auto x = NDArrayFactory::create('c', {5, 5}); - auto y = NDArrayFactory::create('c', {5, 5}); - auto exp = NDArrayFactory::create('c', {2, 5,5}); - auto z = NDArrayFactory::create('c', {2, 5,5}); - - Nd4jPointer extra[6]; -#ifdef __CUDABLAS__ - extra[1] = x.getContext()->getCudaStream(); - extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr; - x.syncToHost(); - y.syncToHost(); - printf("Unsupported for CUDA platform yet.\n"); - return; -#endif - x.linspace(1.0,2); - y.linspace(2,2); - - //y.assign(2.); - x.syncToDevice(); - z.syncToDevice(); - auto dimension = NDArrayFactory::create({0, 1}); - auto dimensions = reinterpret_cast(dimension.buffer()); - auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); - auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); - exp(1, {0}).linspace(1,2); - ::flatten(extra, - 25, 'c', z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - x.buffer(), x.shapeInfo(), - x.specialBuffer(), x.specialShapeInfo()); - -// exp.printIndexedBuffer("Exp"); -// z.printIndexedBuffer("Flatten"); - ASSERT_TRUE(exp.equalsTo(z)); -} - -TEST_F(NativeOpsTests, ConcatTest_1) { - auto x = NDArrayFactory::create('c', {5, 5}); - auto y = NDArrayFactory::create('c', {5, 5}); - auto exp = NDArrayFactory::create('c', {10,5}); - auto z = NDArrayFactory::create('c', {10,5}); - - Nd4jPointer extra[6]; -#ifdef __CUDABLAS__ - extra[1] = x.getContext()->getCudaStream(); - extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr; - x.syncToHost(); - y.syncToHost(); - printf("Unsupported for CUDA platform yet.\n"); - return; -#endif - x.linspace(1.0); - y.linspace(26); - - //y.assign(2.); - x.syncToDevice(); - z.syncToDevice(); - int d = 0; - auto dimension = NDArrayFactory::create('c', {1}, {d}); - auto dimensions = reinterpret_cast(dimension.buffer()); - //auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); - auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); - exp.linspace(1); - Nd4jPointer datas[] = {x.buffer(), y.buffer()}; - Nd4jPointer shapes[] = {x.shapeInfo(), y.shapeInfo()}; - - ::concat(extra, - 0, 2, datas, shapes, nullptr, nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, nullptr); - -// exp.printIndexedBuffer("Exp"); -// z.printIndexedBuffer("Concat"); - ASSERT_TRUE(exp.equalsTo(z)); -} - TEST_F(NativeOpsTests, ConcatTest_2) { auto x = NDArrayFactory::create('c', {5, 5}); auto y = NDArrayFactory::create('c', {5, 5}); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java index 174be9a7d..576cea78a 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java @@ -557,43 +557,6 @@ public interface NativeOps { @Cast("Nd4jLong *") LongPointer tadShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets, @Cast("Nd4jLong *") LongPointer tadShapeInfoZ, @Cast("Nd4jLong *") LongPointer tadOffsetsZ); - /** - * @param extraPointers - * @param offset - * @param order - * @param results - * @param resultShapeInfo - * @param input - * @param inputShapeInfo - */ - void flatten(PointerPointer extraPointers, - int offset, - char order, - Pointer results, @Cast("Nd4jLong *") LongPointer resultShapeInfo, - Pointer dresults, @Cast("Nd4jLong *") LongPointer dresultShapeInfo, - Pointer input, @Cast("Nd4jLong *") LongPointer inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong *") LongPointer dinputShapeInfo); - - /** - * @param extraPointers - * @param dimension - * @param numArrays - * @param data - * @param inputShapeInfo - * @param results - * @param resultShapeInfo - * @param tadPointers - * @param tadOffsets - */ - void concat(PointerPointer extraPointers, - int dimension, - int numArrays, - PointerPointer data, PointerPointer inputShapeInfo, - PointerPointer ddata, PointerPointer dinputShapeInfo, - Pointer results, @Cast("Nd4jLong *") LongPointer resultShapeInfo, - Pointer dresults, @Cast("Nd4jLong *") LongPointer dresultShapeInfo, - PointerPointer tadPointers, - PointerPointer tadOffsets); void specialConcat(PointerPointer extraPointers, int dimension, @@ -1185,4 +1148,7 @@ public interface NativeOps { Pointer lcCopyStream(OpaqueLaunchContext lc); Pointer lcBlasHandle(OpaqueLaunchContext lc); Pointer lcSolverHandle(OpaqueLaunchContext lc); + + int lastErrorCode(); + String lastErrorMessage(); } diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaEvent_t.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaEvent_t.java index 1650e08ac..52b7d7332 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaEvent_t.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaEvent_t.java @@ -22,6 +22,7 @@ import org.bytedeco.javacpp.Pointer; import org.nd4j.jita.allocator.pointers.CudaPointer; import org.nd4j.linalg.exception.ND4JException; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.nativeblas.NativeOps; import org.nd4j.nativeblas.NativeOpsHolder; /** @@ -67,14 +68,18 @@ public class cudaEvent_t extends CudaPointer { int res = NativeOpsHolder.getInstance().getDeviceNativeOps().eventSynchronize(this); if (res == 0) throw new ND4JException("CUDA exception happened. Terminating. Last op: [" + Nd4j.getExecutioner().getLastOp() +"]"); + + if (NativeOpsHolder.getInstance().getDeviceNativeOps().lastErrorCode() != 0) + throw new RuntimeException(NativeOpsHolder.getInstance().getDeviceNativeOps().lastErrorMessage()); } } public void register(cudaStream_t stream) { if (!isDestroyed()) { int res = NativeOpsHolder.getInstance().getDeviceNativeOps().registerEvent(this, stream); - if (res == 0) - throw new ND4JException("CUDA exception happened. Terminating. Last op: [" + Nd4j.getExecutioner().getLastOp() +"]"); + + if (NativeOpsHolder.getInstance().getDeviceNativeOps().lastErrorCode() != 0) + throw new RuntimeException(NativeOpsHolder.getInstance().getDeviceNativeOps().lastErrorMessage()); } } } diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaStream_t.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaStream_t.java index b18ceb2fa..8d78ee950 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaStream_t.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaStream_t.java @@ -36,8 +36,9 @@ public class cudaStream_t extends CudaPointer { public int synchronize() { NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps(); int res = nativeOps.streamSynchronize(this); - if (res == 0) - throw new ND4JException("CUDA exception happened. Terminating. Last op: [" + Nd4j.getExecutioner().getLastOp() +"]"); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); return res; } diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/JCublasNDArrayFactory.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/JCublasNDArrayFactory.java index 44c361d87..9e9dc34b2 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/JCublasNDArrayFactory.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/JCublasNDArrayFactory.java @@ -24,6 +24,7 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.DataTypeEx; import org.nd4j.linalg.api.buffer.Utf8Buffer; import org.nd4j.linalg.api.memory.enums.MemoryKind; +import org.nd4j.linalg.api.ops.custom.Flatten; import org.nd4j.linalg.api.ops.impl.shape.Concat; import org.nd4j.linalg.api.ops.performance.PerformanceTracker; import org.nd4j.linalg.api.shape.options.ArrayOptionsHelper; @@ -104,6 +105,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { functions.put(11, Loader.addressof("cusolverDnSgesvd")); functions.put(12, Loader.addressof("cusolverDnDgesvd")); nativeOps.initializeFunctions(functions); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } @Override @@ -335,75 +339,7 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); - int length = 0; - DataType t = null; - for (INDArray m : matrices) { - length += m.length(); - if (t == null) - t = m.dataType(); - - Preconditions.checkArgument(t == m.dataType(), "Arrays must have same data type"); - } - - INDArray ret = Nd4j.create(t, new long[] {length}, order); - int linearIndex = 0; - - AtomicAllocator allocator = AtomicAllocator.getInstance(); - - - for (INDArray m : matrices) { - if (m.isEmpty()) - continue; - - CudaContext context = allocator.getFlowController().prepareAction(ret, m); - - if (m.ordering() == order && ret.elementWiseStride() == m.elementWiseStride() - && ret.elementWiseStride() == 1) { - // do memcpy in proper direction and forget about that - // FIXME: get rid of this - ((BaseCudaDataBuffer) m.data()).lazyAllocateHostPointer(); - allocator.memcpyAsync(ret.data(), new CudaPointer(allocator.getHostPointer(m).address()), - AllocationUtils.getRequiredMemory(AllocationUtils.buildAllocationShape(m)), - linearIndex * (m.data().dataType() == DataType.DOUBLE ? 8 - : m.data().dataType() == DataType.FLOAT ? 4 : 2)); - linearIndex += m.length(); - } else { - Pointer hostYShapeInfo = AddressRetriever.retrieveHostPointer(m.shapeInfoDataBuffer()); - - PointerPointer extras = new PointerPointer( - AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), context.getOldStream(), - allocator.getDeviceIdPointer(), null, - context.getBufferReduction(), context.getBufferScalar(), null, - hostYShapeInfo, AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer())); - - - nativeOps.flatten(extras, linearIndex, order, - null, - (LongPointer) allocator.getHostPointer(ret.shapeInfoDataBuffer()), - allocator.getPointer(ret, context), - (LongPointer) allocator.getPointer(ret.shapeInfoDataBuffer(), context), - null, - (LongPointer) allocator.getHostPointer(m.shapeInfoDataBuffer()), - allocator.getPointer(m, context), - (LongPointer) allocator.getPointer(m.shapeInfoDataBuffer(), context)); - - - - - //Works for all cases... - - /* NdIndexIterator iter = new NdIndexIterator(order, m.shape()); - while (iter.hasNext()) { - ret.putScalar(linearIndex++, m.getDouble(iter.next())); - }*/ - - linearIndex += m.length(); - } - - if (ret != null) - allocator.registerAction(context, ret, m); - } - return ret; + return Nd4j.exec(new Flatten(order, matrices.toArray(new INDArray[0])))[0]; } @Override @@ -412,131 +348,6 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); return Nd4j.exec(new Concat(dimension, toConcat))[0]; - - // legacy implementation -/* - boolean allScalars = true; - - var outputShape = ArrayUtil.copy(toConcat[0].shape()); - - if (toConcat.length == 1) - return toConcat[0]; - - int sumAlongDim = 0; - for (int i = 0; i < toConcat.length; i++) { - if (toConcat[i].isCompressed()) - Nd4j.getCompressor().decompressi(toConcat[i]); - - allScalars &= toConcat[i].rank() == 0; - - sumAlongDim += toConcat[i].size(dimension); - } - - if (allScalars) { - outputShape = new long[]{sumAlongDim}; - } else { - outputShape[dimension] = sumAlongDim; - } - - INDArray ret = Nd4j.createUninitialized(toConcat[0].dataType(), outputShape, Nd4j.order()); - - AtomicAllocator allocator = AtomicAllocator.getInstance(); - - CudaContext context = allocator.getFlowController().prepareAction(ret, toConcat); - - val shapeInfoPointers = new long[toConcat.length]; - val dataPointers = new long[toConcat.length]; - val tadPointers = new long[toConcat.length]; - val offsetsPointers = new long[toConcat.length]; - val hostShapeInfoPointers = new long[toConcat.length]; - - TADManager tadManager = Nd4j.getExecutioner().getTADManager(); - for (int i = 0; i < toConcat.length; i++) { - shapeInfoPointers[i] = AddressRetriever.retrieveDeviceAddress(toConcat[i].shapeInfoDataBuffer(), context); - dataPointers[i] = AtomicAllocator.getInstance().getPointer(toConcat[i], context).address(); - hostShapeInfoPointers[i] = AtomicAllocator.getInstance().getHostPointer(toConcat[i].shapeInfoDataBuffer()).address(); - - sumAlongDim += toConcat[i].size(dimension); - for (int j = 0; j < toConcat[i].rank(); j++) - if (j != dimension && toConcat[i].size(j) != outputShape[j]) { - throw new IllegalArgumentException( - "Illegal concatenation at array " + i + " and shape element " + j); - } - - if (!allScalars) { - val tadBuffers = tadManager.getTADOnlyShapeInfo(toConcat[i], new int[]{dimension}); - - long devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context).address(); - - val offsets = tadBuffers.getSecond(); - long devTadOffsets = AtomicAllocator.getInstance().getPointer(offsets, context).address(); - - tadPointers[i] = devTadShapeInfo; - offsetsPointers[i] = devTadOffsets; - } - } - - // getting tadOnlyShape for result - val zBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[] {dimension}); - val hostPointers = new LongPointer(hostShapeInfoPointers); - val hosthost = new PointerPointerWrapper(hostPointers); - - //System.out.println("shapePointers: " + Arrays.toString(shapeInfoPointers)); - - val dZ = AtomicAllocator.getInstance().getPointer(ret, context); - val dZShapeInfo = AddressRetriever.retrieveDevicePointer(ret.shapeInfoDataBuffer(), context); - - - - //val tempData = new CudaDoubleDataBuffer(toConcat.length); - //val tempShapes = new CudaDoubleDataBuffer(toConcat.length); - //val tempTAD = new CudaDoubleDataBuffer(toConcat.length); - //val tempOffsets = new CudaDoubleDataBuffer(toConcat.length); - - //AtomicAllocator.getInstance().memcpyBlocking(tempData, new LongPointer(dataPointers), dataPointers.length * 8,0); - //AtomicAllocator.getInstance().memcpyBlocking(tempShapes, new LongPointer(shapeInfoPointers), shapeInfoPointers.length * 8, 0); - //AtomicAllocator.getInstance().memcpyBlocking(tempTAD, new LongPointer(tadPointers), tadPointers.length * 8, 0); - //AtomicAllocator.getInstance().memcpyBlocking(tempOffsets, new LongPointer(offsetsPointers), offsetsPointers.length * 8, 0); - - val dataPointer = new PointerPointerWrapper(new LongPointer(dataPointers)); //AtomicAllocator.getInstance().getPointer(tempData, context); - val shapesPointer = new PointerPointerWrapper(new LongPointer(shapeInfoPointers));//AtomicAllocator.getInstance().getPointer(tempShapes, context); - //val tadPointer = AtomicAllocator.getInstance().getPointer(tempTAD, context); - //val offsetPointer = AtomicAllocator.getInstance().getPointer(tempOffsets, context); - - - // System.out.println("ShapesPointer after conversion: " + shapesPointer); - - val extras = new PointerPointer(AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), - context.getOldStream(), allocator.getDeviceIdPointer(), null, - context.getBufferReduction(), context.getBufferScalar(), null, - AddressRetriever.retrieveHostPointer(toConcat[0].shapeInfoDataBuffer()), - AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), - new LongPointer(hostShapeInfoPointers), - AtomicAllocator.getInstance().getPointer(zBuffers.getFirst(), context), // getting zTADShape - AtomicAllocator.getInstance().getPointer(zBuffers.getSecond(), context) // getting zOffset - ); - - - nativeOps.concat(extras, - dimension, - toConcat.length, - null, - hosthost, - dataPointer, - shapesPointer, - null, - (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), - dZ, - (LongPointer) dZShapeInfo, - null, - null); - - - allocator.registerAction(context, ret, toConcat); - - return ret; - //return super.concat(dimension, toConcat); - */ } @@ -590,6 +401,8 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), null, null); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AllocationPoint point = allocator.getAllocationPoint(ret); @@ -598,6 +411,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { nativeOps.memcpyAsync(point.getDevicePointer(), point.getHostPointer(), ret.lengthLong() * Nd4j.sizeOfDataType(ret.data().dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getSpecialStream()); context.getSpecialStream().synchronize(); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + PerformanceTracker.getInstance().helperRegisterTransaction(point.getDeviceId(), perfD, point.getNumberOfBytes(), MemcpyDirection.HOST_TO_DEVICE); point.tickHostRead(); @@ -729,6 +545,8 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { (LongPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets)); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); allocator.registerAction(context, ret, source); @@ -743,7 +561,6 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { return target.assign(arrays[0]); // we do averaging on GPU only if ALL devices have p2p links - //if (CudaEnvironment.getInstance().getConfiguration().isCrossDeviceAccessAllowed() && nativeOps.isP2PAvailable()) { if (true) { Nd4j.getExecutioner().push(); @@ -781,6 +598,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { nativeOps.accumulate(extras, null, (LongPointer) arrays[0].shapeInfoDataBuffer().addressPointer(), x, null, null, (LongPointer) allocator.getHostPointer(target.shapeInfoDataBuffer()) , z, (LongPointer) allocator.getPointer(target.shapeInfoDataBuffer()), arrays.length, len); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + allocator.getFlowController().registerAction(context, target, arrays); return target; @@ -824,6 +644,8 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { arrays.length, len); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().getAllocationPoint(target).tickHostWrite(); @@ -895,6 +717,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { arrays.length, len, true); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + allocator.getFlowController().registerAction(context, target, arrays); return target; @@ -940,6 +765,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { arrays.length, len, true); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + if (target != null) AtomicAllocator.getInstance().getAllocationPoint(target).tickHostWrite(); @@ -1115,6 +943,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { (IntPointer) shuffleMap, new PointerPointer(allocator.getPointer(tempTAD, context)), new PointerPointer(allocator.getPointer(tempOffsets, context))); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + for (int f = 0; f < arrays.size(); f++) { allocator.getFlowController().registerAction(context, arrays.get(f)); } @@ -1260,6 +1091,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { val p = new PointerPointer<>(new Pointer[]{null, stream}); nativeOps.convertTypes(p, typeSrc.ordinal(), source, length, typeDst.ordinal(), target); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } @Override @@ -1277,7 +1111,13 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { srcPtr = nativeOps.mallocDevice(ssize, 0, 0); dstPtr = nativeOps.mallocDevice(size, 0, 0); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + nativeOps.memcpyAsync(srcPtr, source, ssize, CudaConstants.cudaMemcpyHostToDevice, stream); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } else { // decompressing throw new UnsupportedOperationException(); @@ -1288,9 +1128,15 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { stream.synchronize(); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + if (buffer instanceof CompressedDataBuffer) { nativeOps.freeDevice(srcPtr, 0); nativeOps.freeDevice(dstPtr, 0); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } } @@ -1309,13 +1155,15 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { val size = ((CompressedDataBuffer) source).getCompressionDescriptor().getCompressedLength(); srcPtr = ws.alloc(size, MemoryKind.DEVICE, DataType.HALF, false); nativeOps.memcpyAsync(srcPtr, source.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, stream); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } // if true - we're compressing into host memory if (target instanceof CompressedDataBuffer) { val size = ((CompressedDataBuffer) target).getCompressionDescriptor().getCompressedLength(); dstPtr = ws.alloc(size, MemoryKind.DEVICE, DataType.HALF, false); - //nativeOps.memcpyAsync(dstPtr, target.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, stream); } } else { // if true - we're decompressing from host memory @@ -1325,6 +1173,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { srcPtr = nativeOps.mallocDevice(size, 0, 0); nativeOps.memcpyAsync(srcPtr, source.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, stream); stream.synchronize(); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } else srcPtr = AtomicAllocator.getInstance().getPointer(source); @@ -1333,8 +1184,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { log.info("Replacing target ptr"); val size = ((CompressedDataBuffer) target).getCompressionDescriptor().getCompressedLength(); dstPtr = nativeOps.mallocDevice(size, 0, 0); - //nativeOps.memcpyAsync(dstPtr, source.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, stream); - //stream.synchronize(); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } else dstPtr = AtomicAllocator.getInstance().getPointer(target); } @@ -1342,6 +1194,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { convertDataEx(typeSrc, srcPtr, typeDst, dstPtr, target.length()); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + Nd4j.getExecutioner().commit(); @@ -1364,6 +1219,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + Nd4j.getExecutioner().commit(); } @@ -1462,6 +1320,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)) ); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().getFlowController().registerActionAllWrite(context, result); AtomicAllocator.getInstance().getFlowController().registerAction(context,null, result); @@ -1517,6 +1378,8 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { descending ); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().getFlowController().registerAction(context, x); @@ -1565,6 +1428,8 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { descending ); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().getFlowController().registerAction(context, x); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java index 789f0f1a3..38a1ba382 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java @@ -207,6 +207,10 @@ public class CudaExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException("Unknown op type: " + op.getOpType()); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + + AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); profilingConfigurableHookOut(op, st); @@ -461,6 +465,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { } } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + profilingConfigurableHookOut(op, st); return op.z(); @@ -619,7 +626,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { AtomicAllocator.getInstance().getPointer(op.dimensions(), context), null); - + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); @@ -777,6 +785,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException("Unknown opType: " + op.getOpType()); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); profilingConfigurableHookOut(op, st); @@ -868,6 +879,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { AtomicAllocator.getInstance().registerAction(context, null, op.x(), op.y()); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + profilingConfigurableHookOut(op, st); return null; @@ -1105,6 +1119,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); profilingConfigurableHookOut(op, st); @@ -1194,6 +1210,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException(); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().getFlowController().registerAction(context, op.z(), op.x(), op.y()); profilingConfigurableHookOut(op, st); @@ -1268,6 +1287,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException("Unknown op type: " + op.getOpType()); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.scalar()); profilingConfigurableHookOut(op, st); @@ -1423,6 +1445,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { } } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); @@ -1582,6 +1606,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { batch.getSample().maxIndexArguments(), batch.getSample().maxRealArguments(), AtomicAllocator.getInstance().getPointer(surfaceBuffer, context), FlatBuffersMapper.getDataTypeAsByte(dataType)); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + surfacePoint.tickHostWrite(); } @@ -1676,6 +1703,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { numIndexArguments, iPtr, numIntArrays, AtomicAllocator.getInstance().getPointer(realsBuffer.data(), context), numRealArguments, FlatBuffersMapper.getDataTypeAsByte(dataType)); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } /** @@ -1739,6 +1769,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(op.z().dataType()), context)); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().getFlowController().registerAction(context, op.z(), op.x(), op.y()); profilingConfigurableHookOut(op, st); @@ -1969,6 +2002,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { nativeOps.decodeThreshold(extras, AtomicAllocator.getInstance().getPointer(buffer), compressedLength, AtomicAllocator.getInstance().getPointer(result), (LongPointer) AtomicAllocator.getInstance().getHostPointer(target.shapeInfoDataBuffer())); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().getAllocationPoint(result).tickDeviceWrite(); return target; @@ -2013,7 +2049,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { (IntPointer) AtomicAllocator.getInstance().getPointer(buffer, context), (float) threshold); - + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().getFlowController().registerAction(context, indArray); @@ -2039,6 +2076,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { nativeOps.decodeBitmap(extras, AtomicAllocator.getInstance().getPointer(encoded.data(), context), target.lengthLong(), AtomicAllocator.getInstance().getPointer(target, context), (LongPointer) AtomicAllocator.getInstance().getHostPointer(target.shapeInfoDataBuffer())); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().getFlowController().registerAction(context, target); @@ -2151,6 +2190,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { OpaqueShapeList ptrptr = nativeOps.calculateOutputShapes2(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length, bArgs, op.numBArguments()); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + if (ptrptr == null) throw new RuntimeException(); @@ -2221,109 +2263,6 @@ public class CudaExecutioner extends DefaultOpExecutioner { } catch (Exception e) { throw new RuntimeException("Op [" + name + "] execution failed", e); } - - /* - long st = profilingConfigurableHookIn(op); - - CudaContext context =(CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); - //AtomicAllocator.getInstance().getFlowController().prepareActionAllWrite(op.outputArguments()); - - if (extraz.get() == null) - extraz.set(new PointerPointer(32)); - - - PointerPointer extras = extraz.get().put( - new CudaPointer(1), - context.getOldStream(), - context.getBufferScalar(), - context.getBufferReduction()); - - val outputArgs = op.outputArguments(); - val inputArgs = op.inputArguments(); - - if (outputArgs.length == 0 && !op.isInplaceCall()) - throw new ND4JIllegalStateException("You can't execute non-inplace CustomOp without outputs being specified"); - - val lc = op.opName().toLowerCase(); - val hash = op.opHash(); - - - val inputShapes = new PointerPointer<>(inputArgs.length * 2); - val inputBuffers = new PointerPointer<>(inputArgs.length * 2); - - int cnt= 0; - for (val in: inputArgs) { - val hp = AtomicAllocator.getInstance().getHostPointer(in.shapeInfoDataBuffer()); - inputBuffers.put(cnt, AtomicAllocator.getInstance().getHostPointer(in)); - inputShapes.put(cnt, hp); - - - val dp = AtomicAllocator.getInstance().getPointer(in.shapeInfoDataBuffer(), context); - - inputBuffers.put(cnt + inputArgs.length, AtomicAllocator.getInstance().getPointer(in, context)); - inputShapes.put(cnt+ inputArgs.length, dp); - - if (op.isInplaceCall()) { - val ap = AtomicAllocator.getInstance().getAllocationPoint(in); - if (ap != null) - ap.tickHostWrite(); - } - - cnt++; - } - - - val outputShapes = new PointerPointer<>(outputArgs.length * 2); - val outputBuffers = new PointerPointer<>(outputArgs.length * 2); - - cnt= 0; - for (val out: outputArgs) { - outputBuffers.put(cnt, AtomicAllocator.getInstance().getHostPointer(out)); - outputShapes.put(cnt, AtomicAllocator.getInstance().getHostPointer(out.shapeInfoDataBuffer())); - - outputBuffers.put(cnt + outputArgs.length, AtomicAllocator.getInstance().getPointer(out, context)); - outputShapes.put(cnt + outputArgs.length, AtomicAllocator.getInstance().getPointer(out.shapeInfoDataBuffer(), context)); - - val ap = AtomicAllocator.getInstance().getAllocationPoint(out); - - if (ap != null) - ap.tickHostWrite(); - - cnt++; - } - - val iArgs = op.iArgs().length > 0 ? new LongPointer(op.iArgs().length) : null; - - cnt = 0; - for (val i: op.iArgs()) - iArgs.put(cnt++, i); - - - val tArgs = op.tArgs().length > 0 ? new DoublePointer(op.tArgs().length) : null; - - val bArgs = op.bArgs().length > 0 ? new BooleanPointer(op.numBArguments()) : null; - - cnt = 0; - for (val t: op.tArgs()) - tArgs.put(cnt++, t); - - cnt = 0; - for (val b: op.bArgs()) - bArgs.put(cnt++, b); - - try { - val status = OpStatus.byNumber(nativeOps.execCustomOp(extras, hash, inputBuffers, inputShapes, inputArgs.length, outputBuffers, outputShapes, outputArgs.length, tArgs, op.tArgs().length, iArgs, op.iArgs().length, bArgs, op.numBArguments(), op.isInplaceCall())); - if (status != OpStatus.ND4J_STATUS_OK) - throw new ND4JIllegalStateException("Op execution failed: " + status); - } catch (Exception e) { - throw new RuntimeException("Op [" + op.opName() + "] execution failed"); - } - - //AtomicAllocator.getInstance().getFlowController().prepareActionAllWrite(op.outputArguments()); - - profilingConfigurableHookOut(op, st); - return op.outputArguments(); - */ } @Override @@ -2341,6 +2280,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { @Override public void registerGraph(long id, Pointer graph) { nativeOps.registerGraph(null, id, graph); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } @Override @@ -2368,6 +2310,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { OpaqueVariablesSet result = nativeOps.executeStoredGraph(null, id, ptrBuffers, ptrShapes, ptrIndices, map.size()); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + OpStatus status = OpStatus.byNumber(nativeOps.getVariablesSetStatus(result)); if (status != OpStatus.ND4J_STATUS_OK) @@ -2398,6 +2343,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { newMap.put(nodeName, array); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + nativeOps.deleteVariablesSet(result); return newMap; @@ -2406,6 +2354,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { @Override public void forgetGraph(long id) { nativeOps.unregisterGraph(null, id); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } /** @@ -2474,6 +2425,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { null, (LongPointer) AtomicAllocator.getInstance().getHostPointer(tadY.getFirst()), null, AtomicAllocator.getInstance().getPointer(updates, context), (LongPointer) AtomicAllocator.getInstance().getPointer(tadY.getFirst()), (LongPointer) AtomicAllocator.getInstance().getPointer(tadY.getSecond()), null, (IntPointer) AtomicAllocator.getInstance().getPointer(indices, context)); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().getFlowController().registerAction(context, array, indices, updates); } @@ -2490,9 +2444,14 @@ public class CudaExecutioner extends DefaultOpExecutioner { ((CudaOpContext) context).setCudaStream(ctx.getOldStream(), ctx.getBufferReduction(), ctx.getBufferAllocation()); val status = nativeOps.execCustomOp2(null, op.opHash(), context.contextPointer()); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + if (status != 0) throw new RuntimeException("Op [" + op.opName() + "] execution failed"); + + for (val arr:op.outputArguments()) AtomicAllocator.getInstance().registerAction(ctx, arr); @@ -2527,6 +2486,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { nativeOps.inspectArray(extras, AtomicAllocator.getInstance().getHostPointer(array), (LongPointer) AtomicAllocator.getInstance().getHostPointer(array.shapeInfoDataBuffer()), AtomicAllocator.getInstance().getPointer(array, ctx), (LongPointer) AtomicAllocator.getInstance().getPointer(array.shapeInfoDataBuffer()), debugInfo); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + return INDArrayStatistics.builder() .minValue(debugInfo._minValue()) .maxValue(debugInfo._maxValue()) @@ -2545,6 +2507,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { public DataBuffer createShapeInfo(long[] shape, long[] stride, long elementWiseStride, char order, DataType dtype, boolean empty) { OpaqueConstantDataBuffer dbf = nativeOps.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + val result = new CudaLongDataBuffer(nativeOps.getConstantDataBufferPrimary(dbf), nativeOps.getConstantDataBufferSpecial(dbf), Shape.shapeInfoLength(shape.length)); nativeOps.deleteShapeBuffer(dbf); @@ -2556,6 +2521,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { public TadPack tadShapeInfoAndOffsets(INDArray array, int[] dimension) { OpaqueTadPack pack = nativeOps.tadOnlyShapeInfo((LongPointer) array.shapeInfoDataBuffer().addressPointer(), new IntPointer(dimension), dimension.length); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + val tadShape = new CudaLongDataBuffer(nativeOps.getPrimaryShapeInfo(pack), nativeOps.getSpecialShapeInfo(pack), nativeOps.getShapeInfoLength(pack)); val tadOffsets = new CudaLongDataBuffer(nativeOps.getPrimaryOffsets(pack), nativeOps.getSpecialOffsets(pack), nativeOps.getNumberOfTads(pack)); @@ -2568,6 +2536,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { public DataBuffer createConstantBuffer(long[] values, DataType desiredType) { OpaqueConstantDataBuffer dbf = nativeOps.constantBufferLong(desiredType.toInt(), new LongPointer(values), values.length); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + val buffer = Nd4j.createBuffer(nativeOps.getConstantDataBufferPrimary(dbf), nativeOps.getConstantDataBufferSpecial(dbf), values.length, desiredType); buffer.setConstant(true); @@ -2578,6 +2549,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { public DataBuffer createConstantBuffer(double[] values, DataType desiredType) { OpaqueConstantDataBuffer dbf = nativeOps.constantBufferDouble(desiredType.toInt(), new DoublePointer(values), values.length); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + val buffer = Nd4j.createBuffer(nativeOps.getConstantDataBufferPrimary(dbf), nativeOps.getConstantDataBufferSpecial(dbf), values.length, desiredType); buffer.setConstant(true); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java index 47cfa2584..603413fd6 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java @@ -449,6 +449,60 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { // #endif //DEV_TESTS_TADPACK_H +// Parsed from execution/ErrorReference.h + +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef DEV_TESTS_ERRORREFERENCE_H +// #define DEV_TESTS_ERRORREFERENCE_H + +// #include +// #include + @Namespace("sd") @NoOffset public static class ErrorReference extends Pointer { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public ErrorReference(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public ErrorReference(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public ErrorReference position(long position) { + return (ErrorReference)super.position(position); + } + + public ErrorReference() { super((Pointer)null); allocate(); } + private native void allocate(); + + public native int errorCode(); + public native @Cast("char*") String errorMessage(); + + public native void setErrorCode(int errorCode); + public native void setErrorMessage(@StdString BytePointer message); + public native void setErrorMessage(@StdString String message); + } + + + +// #endif //DEV_TESTS_ERRORREFERENCE_H + + // Parsed from memory/MemoryType.h // @@ -688,6 +742,18 @@ bool verbose = false; // #include // #include +/** + * This function returns last error code stored, + * @return non-zero if something bad happened + */ +public native int lastErrorCode(); + +/** + * This function returns last error message, if last error code > 0 + * @return + */ +public native @Cast("char*") String lastErrorMessage(); + /** * * @param p @@ -1710,72 +1776,6 @@ public native void execScalarBoolTad(@Cast("Nd4jPointer*") PointerPointer extraP @Cast("Nd4jLong*") long[] tadShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets, @Cast("Nd4jLong*") long[] tadShapeInfoZ, @Cast("Nd4jLong*") long[] tadOffsetsZ); - -/** -* Append an input array -* to the end of a flat array -* in a particular order -* @param offset the offset of the array to start at -* @param order the order -* @param result the result array -* @param resultShapeInfo the shape info for te array -* @param input the input for the array -* @param inputShapeInfo the shape information for that array -*/ -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") LongPointer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongPointer dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") LongPointer inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") LongPointer dinputShapeInfo); -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") LongBuffer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongBuffer dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") LongBuffer inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") LongBuffer dinputShapeInfo); -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") long[] resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") long[] dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") long[] inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") long[] dinputShapeInfo); - -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") LongPointer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongPointer dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") LongBuffer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongBuffer dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") long[] resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") long[] dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); - - public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer extraPointers, int dimension, @@ -9950,6 +9950,7 @@ public static final int PREALLOC_SIZE = 33554432; // #include // #include +// #include @Namespace("nd4j") @NoOffset public static class ContextBuffers extends Pointer { static { Loader.load(); } /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ @@ -9985,6 +9986,8 @@ public static final int PREALLOC_SIZE = 33554432; public native void setScalarBuffer(Pointer pointer); public native void setAllocationBuffer(Pointer pointer); + public native ErrorReference errorReference(); + public native void triggerOwnership(@Cast("bool") boolean isOwner); public native int deviceId(); @@ -10038,6 +10041,7 @@ public static final int PREALLOC_SIZE = 33554432; // #include // #include // #include +// #include @Namespace("nd4j") @NoOffset public static class LaunchContext extends Pointer { static { Loader.load(); } @@ -10067,9 +10071,12 @@ public static final int PREALLOC_SIZE = 33554432; public native int getDeviceID(); public native void setDeviceID(int deviceID); + public native ErrorReference errorReference(); public static native @Cast("bool") boolean isInitialized(); public static native void releaseBuffers(); + + public static native LaunchContext defaultContext(); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java index 4466cf4b5..8f95fe5cb 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java @@ -32,6 +32,7 @@ import org.bytedeco.javacpp.tools.InfoMapper; "array/ConstantDescriptor.h", "array/ConstantDataBuffer.h", "array/TadPack.h", + "execution/ErrorReference.h", "memory/MemoryType.h", "Environment.h", "types/utf8string.h", diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/CpuNDArrayFactory.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/CpuNDArrayFactory.java index 2b47103c3..cacf32b38 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/CpuNDArrayFactory.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/CpuNDArrayFactory.java @@ -106,6 +106,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { functions.put(8, Loader.addressof("LAPACKE_sgesdd")); functions.put(9, Loader.addressof("LAPACKE_dgesdd")); nativeOps.initializeFunctions(functions); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } @Override @@ -489,32 +492,7 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { @Override public INDArray toFlattened(char order, Collection matrices) { Preconditions.checkArgument(matrices.size() > 0, "toFlattened expects > 0 operands"); -/* - int length = 0; - val list = new ArrayList(matrices); - val t = list.get(0).dataType(); - for (INDArray m : matrices) { - length += m.length(); - Preconditions.checkArgument(m.dataType() == t, "All operands must have same data type"); - } - INDArray ret = Nd4j.create(t, new long[] {length}, order); - int linearIndex = 0; - PointerPointer dummy = new PointerPointer(new Pointer[] {null}); - for (INDArray m : matrices) { - Nd4j.getCompressor().autoDecompress(m); - - nativeOps.flatten(dummy, linearIndex, order, - ret.data().addressPointer(), (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), - null, null, - m.data().addressPointer(), - (LongPointer) m.shapeInfoDataBuffer().addressPointer(), - null, null); - - linearIndex += m.length(); - } - return ret; - */ return Nd4j.exec(new Flatten(order, matrices.toArray(new INDArray[matrices.size()])))[0]; } @@ -555,6 +533,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { new LongPointerWrapper(tadBuffers.getSecond().pointer()) ); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + return result; } @@ -574,65 +555,6 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { return toConcat[0]; return Nd4j.exec(new Concat(dimension, toConcat))[0]; - - // legacy implementation -/* - // if reusable var wasn't created for this thread, or is smaller then needed - set it to new value - if (extrazA.get() == null || extrazB.get() == null || extrazSize.get() == null || extrazSize.get() < toConcat.length) { - extrazA.set(new PointerPointer(toConcat.length)); - extrazB.set(new PointerPointer(toConcat.length)); - extrazSize.set(toConcat.length); - } - - PointerPointer shapeInfoPointers = extrazA.get(); - PointerPointer dataPointers = extrazB.get(); - int sumAlongDim = 0; - - long[] outputShape = ArrayUtil.copy(toConcat[0].shape()); - - boolean allScalars = true; - - for (int i = 0; i < toConcat.length; i++) { - Preconditions.checkState(toConcat[i].rank() == outputShape.length, "Encountered different array ranks for concat: input[0].shape()=%ndShape, input[%s].shape()=%ndShape", - toConcat[0], i, toConcat[i]); - - if (toConcat[i].isCompressed()) - Nd4j.getCompressor().decompressi(toConcat[i]); - - Preconditions.checkArgument(toConcat[i].dataType() == toConcat[0].dataType(), "All operands must have same data type: input 0 has type %s, input %s has type %s", - toConcat[0].dataType(), i, toConcat[i].dataType()); - - allScalars &= toConcat[i].rank() == 0; - - shapeInfoPointers.put(i, toConcat[i].shapeInfoDataBuffer().addressPointer()); - dataPointers.put(i, toConcat[i].data().addressPointer()); - sumAlongDim += toConcat[i].size(dimension); - for (int j = 0; j < toConcat[i].rank(); j++) { - - if (j != dimension && toConcat[i].size(j) != outputShape[j]) { - throw new IllegalArgumentException( - "Illegal concatenation at array " + i + " and shape element " + j); - } - } - } - - if (allScalars) { - outputShape = new long[]{sumAlongDim}; - } else { - outputShape[dimension] = sumAlongDim; - } - - INDArray ret = Nd4j.createUninitialized(toConcat[0].dataType(), outputShape, Nd4j.order()); - - nativeOps.concat(null, dimension, toConcat.length, - dataPointers, shapeInfoPointers, - null, null, - ret.data().addressPointer(), (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), - null, null, - null, null); - - return ret; - */ } @@ -757,6 +679,8 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { (LongPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets)); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); return ret; } @@ -794,6 +718,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { arrays.length, len); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + return target; } @@ -846,6 +773,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { len, true); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + return target; } @@ -983,6 +913,8 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { arrays.size(), ptrMap, tadPointers, offsetPointers); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); dataPointers.address(); shapePointers.address(); @@ -990,84 +922,6 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { offsetPointers.address(); } - - /** - * This method converts Half-precision databuffer to current dType buffer. - * - * @param buffer - * @return - */ - /* - @Override - public DataBuffer restoreFromHalfs(DataBuffer buffer) { - if (buffer.dataType() != DataType.COMPRESSED) - throw new IllegalStateException("DataBuffer contains wrong data: " + buffer.dataType()); - - CompressedDataBuffer comp = (CompressedDataBuffer) buffer; - CompressionDescriptor descriptor = comp.getCompressionDescriptor(); - - DataBuffer targetBuffer = Nd4j.createBuffer(descriptor.getCompressedLength() / 2); - - if (Nd4j.dataType() == DataType.DOUBLE) { - nativeOps.convertHalfsToDoubles( - null, - comp.addressPointer(), - (int) descriptor.getCompressedLength() / 2, - targetBuffer.addressPointer() - ); - } else if (Nd4j.dataType() == DataType.FLOAT) { - nativeOps.convertHalfsToFloats( - null, - comp.addressPointer(), - (int) descriptor.getCompressedLength() / 2, - targetBuffer.addressPointer() - ); - } else { - throw new UnsupportedOperationException("Target dtype isn't supported: " + Nd4j.dataType()); - } - - return targetBuffer; - } - */ - - /** - * This method converts Single/Double precision databuffer to Half-precision databuffer - * - * @param buffer - * @return - */ - /*@Override - public DataBuffer convertToHalfs(DataBuffer buffer) { - // we allocate pointer - ShortPointer pointer = new ShortPointer(buffer.length()); - - if (buffer.dataType() == DataType.DOUBLE) { - nativeOps.convertDoublesToHalfs( - null, - buffer.addressPointer(), - (int) buffer.length(), - pointer - ); - } else if (buffer.dataType() == DataType.FLOAT) { - nativeOps.convertFloatsToHalfs( - null, - buffer.addressPointer(), - (int) buffer.length(), - pointer - ); - } else { - throw new UnsupportedOperationException("Source dtype isn't supported: " + buffer.dataType()); - } - - CompressionDescriptor descriptor = new CompressionDescriptor(buffer, new Float16()); - descriptor.setCompressedLength(buffer.length() * 2); - - - CompressedDataBuffer result = new CompressedDataBuffer(pointer, descriptor); - return result; - } - */ - /** * This method converts Single/Double precision databuffer to Half-precision databuffer * @@ -1081,6 +935,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { throw new UnsupportedOperationException("Impossible to compress View. Consider using dup() before. "); DataBuffer buffer = convertDataEx(typeSrc, source.data(), typeDst); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + source.setData(buffer); if (buffer instanceof CompressedDataBuffer) @@ -1125,6 +982,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { convertDataEx(typeSrc, source, typeDst, buffer); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + return buffer; } @@ -1132,6 +992,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { public void convertDataEx(DataTypeEx typeSrc, Pointer source, DataTypeEx typeDst, Pointer target, long length) { nativeOps.convertTypes(null, typeSrc.ordinal(), source, length, typeDst.ordinal(), target); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } @Override diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java index 11373c440..e79c21feb 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java @@ -234,6 +234,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { null); } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + profilingConfigurableHookOut(op, st); return op.z(); } @@ -563,6 +566,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { } } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + return ret; } @@ -644,6 +650,8 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException(); } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } public INDArray exec(ScalarOp op) { @@ -690,6 +698,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { throw new ND4JIllegalStateException("Unknown op type: [" + op.getOpType() +"]"); } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + profilingConfigurableHookOut(op, st); return op.z(); @@ -886,6 +897,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + profilingConfigurableHookOut(op, st); } @@ -962,6 +976,8 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException("Unknown operation type: [" + op.getOpType() + "]"); } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); return op.z(); } @@ -1091,6 +1107,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { batch.getSample().maxIntArrays(), batch.getSample().maxIntArraySize(), batch.getSample().maxIndexArguments(), batch.getSample().maxRealArguments(), pointer, FlatBuffersMapper.getDataTypeAsByte(dataType)); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + } /** @@ -1197,6 +1216,8 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { numIndexArguments, intArrays, numIntArrays, block.getRealArgumentsPointer(), numRealArguments, FlatBuffersMapper.getDataTypeAsByte(dataType)); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } /** @@ -1284,6 +1305,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { op.extraArgsDataBuff(op.z().dataType()).addressPointer()); } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + profilingConfigurableHookOut(op, st); return op.z(); @@ -1370,6 +1394,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { (float) threshold); //long t2 = System.currentTimeMillis(); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + if (cntAbs < 2) return null; @@ -1429,6 +1456,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { loop.convertTypes(null, DataTypeEx.THRESHOLD.ordinal(), buffer.addressPointer(), target.length(), typeDst.ordinal(), target.data().addressPointer()); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + return target; } @@ -1460,6 +1490,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { (IntPointer) buffer.addressPointer(), (float) threshold); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + return affected; } @@ -1473,6 +1506,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { (LongPointer) target.shapeInfoDataBuffer().addressPointer() ); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + return target; } @@ -1673,136 +1709,6 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { } catch (Exception e) { throw new RuntimeException("Op [" + name + "] execution failed", e); } -/* - val name = op.opName().toLowerCase(); - val hash = op.opHash(); - - if (name.equals("noop")) { - return op.outputArguments(); - } - - val inputShapes = getInputShapes(op.numInputArguments()); - val inputBuffers = getInputBuffers(op.numInputArguments()); - - int cnt= 0; - val inputArgs = op.inputArguments(); - for (val in: inputArgs) { - if(in == null) - throw new NullPointerException("Input argument is null for op " + op.getClass().getName()); - - if (!in.isEmpty()) - inputBuffers.put(cnt, in.data().addressPointer()); - - inputShapes.put(cnt++, in.shapeInfoDataBuffer().addressPointer()); - } - - val outputArgs = op.outputArguments(); - for(int i = 0; i < outputArgs.length; i++) { - if(outputArgs[i] == null) - throw new ND4JIllegalStateException("Op output arguments must not be null! Op " + op.getClass().getName()); - } - - - val outputShapes = getOutputShapes(op.numOutputArguments()); - val outputBuffers = getOutputBuffers(op.numOutputArguments()); - - cnt= 0; - for (val out: outputArgs) { - if(out.isEmpty()){ - outputBuffers.put(cnt, null); - } else { - outputBuffers.put(cnt, out.data().addressPointer()); - } - outputShapes.put(cnt++, out.shapeInfoDataBuffer().addressPointer()); - } - - val iArgs = op.numIArguments() > 0 ? getLongPointerFrom(iArgsPointer,op.numIArguments()) : null; - val tArgs = op.numTArguments() > 0 ? getDoublePointerFrom(tArgsPointer,op.numTArguments()) : null; - val bArgs = op.numBArguments() > 0 ? getBooleanPointerFrom(bArgsPointer,op.numBArguments()) : null; - - cnt = 0; - val iArgs1 = op.iArgs(); - for (val i: iArgs1) - iArgs.put(cnt++, i); - - cnt = 0; - val bArgs1 = op.bArgs(); - for (val b: bArgs1) - bArgs.put(cnt++, b); - - cnt = 0; - val tArgs1 = op.tArgs(); - for (val t: tArgs1) - tArgs.put(cnt++, t); - - val t = op.numInputArguments(); - - OpStatus status = OpStatus.ND4J_STATUS_OK; - try { - val code = loop.execCustomOp( - null, - hash, - inputBuffers, - inputShapes, - op.numInputArguments(), - outputBuffers, - outputShapes, - op.numOutputArguments(), - tArgs, op.numTArguments(), - iArgs, op.numIArguments(), - bArgs, op.numBArguments(), - op.isInplaceCall()); - - status = OpStatus.byNumber(code); - - if (status != OpStatus.ND4J_STATUS_OK) - throw new ND4JIllegalStateException("Failed to execute op [" + name + "] with error code [" + status +"]"); - }catch(Exception e) { - val sb = new StringBuilder(); - sb.append("Inputs: [("); - for( int i=0; i 0) - sb.append("), ("); - sb.append(Shape.shapeToStringShort(inputArgs[i])); - } - sb.append(")]. Outputs: [("); - for( int i=0; i 0) - sb.append("), ("); - sb.append(Shape.shapeToStringShort(outputArgs[i])); - } - sb.append(")]. tArgs: "); - if(op.numTArguments() > 0){ - sb.append(Arrays.toString(op.tArgs())); - } else { - sb.append("-"); - } - sb.append(". iArgs: "); - if(op.numIArguments() > 0){ - sb.append(Arrays.toString(op.iArgs())); - } else { - sb.append("-"); - } - if(op instanceof DifferentialFunction){ - String n = ((DifferentialFunction) op).getOwnName(); - if(n != null && !n.equals(op.opName())){ - sb.append(". Op own name: \"").append(n).append("\""); - } - } - log.error("Failed to execute op " + op.opName() + ". Attempted to execute with " + - String.valueOf(op.numInputArguments()) + " inputs, " + - String.valueOf(op.numOutputArguments()) + " outputs, "+ - String.valueOf(op.numTArguments()) + " targs and " + - String.valueOf(op.numIArguments()) + " iargs. " + - sb.toString() + - " - Please see above message (printed out from c++) for a possible cause of error."); - throw e; - } - - profilingConfigurableHookOut(op, st); - - return op.outputArguments(); - */ } protected LongShapeDescriptor getShapeFromPointer(LongPointer ptr) { @@ -1870,6 +1776,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { ptrptr = loop.calculateOutputShapes2(null, hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs, op.numTArguments(), iArgs, op.numIArguments(), bArgs, op.numBArguments()); + + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } catch (Throwable t){ StringBuilder sb = new StringBuilder(); sb.append("Inputs: [("); @@ -1893,6 +1802,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { throw t; } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + if (ptrptr == null) throw new RuntimeException(); @@ -1929,6 +1841,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { @Override public void registerGraph(long id, Pointer graph) { loop.registerGraph(null, id, graph); + + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } @Override @@ -1952,7 +1867,10 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { val newMap = new LinkedHashMap(); - OpaqueVariablesSet result = loop.executeStoredGraph(null, id, ptrBuffers, ptrShapes, ptrIndices, map.size()); + OpaqueVariablesSet result = loop.executeStoredGraph(null, id, ptrBuffers, ptrShapes, ptrIndices, map.size()); + + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); OpStatus status = OpStatus.byNumber(loop.getVariablesSetStatus(result)); @@ -1996,6 +1914,8 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { @Override public void forgetGraph(long id) { loop.unregisterGraph(null, id); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } /** @@ -2055,6 +1975,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { array.data().addressPointer(), (LongPointer) tadX.getFirst().addressPointer(), (LongPointer) tadX.getSecond().addressPointer(), null, null, null, updates.data().addressPointer(), (LongPointer) tadY.getFirst().addressPointer(), (LongPointer) tadY.getSecond().addressPointer(), null, null, null, (IntPointer) indices.data().addressPointer(), null); + + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } @Override @@ -2078,6 +2001,10 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { val status = loop.execCustomOp2(null, op.opHash(), context.contextPointer()); + + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + if (status != 0) throw new RuntimeException("Op [" + op.opName() + "] execution failed"); @@ -2155,6 +2082,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { loop.inspectArray(null, array.data().addressPointer(), (LongPointer) array.shapeInfoDataBuffer().addressPointer(), null, null, debugInfo); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + return INDArrayStatistics.builder() .minValue(debugInfo._minValue()) .maxValue(debugInfo._maxValue()) @@ -2171,6 +2101,8 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { @Override public DataBuffer createShapeInfo(long[] shape, long[] stride, long elementWiseStride, char order, DataType dtype, boolean empty) { OpaqueConstantDataBuffer dbf = loop.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); val result = new LongBuffer(loop.getConstantDataBufferPrimary(dbf), Shape.shapeInfoLength(shape.length)); @@ -2183,6 +2115,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { public TadPack tadShapeInfoAndOffsets(INDArray array, int[] dimension) { OpaqueTadPack pack = loop.tadOnlyShapeInfo((LongPointer) array.shapeInfoDataBuffer().addressPointer(), new IntPointer(dimension), dimension.length); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + val tadShape = new LongBuffer(loop.getPrimaryShapeInfo(pack), loop.getShapeInfoLength(pack)); val tadOffsets = new LongBuffer(loop.getPrimaryOffsets(pack), loop.getNumberOfTads(pack)); @@ -2205,11 +2140,19 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { @Override public String runLightBenchmarkSuit(boolean printOut) { - return loop.runLightBenchmarkSuit(printOut); + val s = loop.runLightBenchmarkSuit(printOut); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + + return s; } @Override public String runFullBenchmarkSuit(boolean printOut) { - return loop.runFullBenchmarkSuit(printOut); + val s = loop.runFullBenchmarkSuit(printOut); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + + return s; } } diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java index 8e71816f8..38c0cb8c4 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java @@ -467,6 +467,60 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { // #endif //DEV_TESTS_TADPACK_H +// Parsed from execution/ErrorReference.h + +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef DEV_TESTS_ERRORREFERENCE_H +// #define DEV_TESTS_ERRORREFERENCE_H + +// #include +// #include + @Namespace("sd") @NoOffset public static class ErrorReference extends Pointer { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public ErrorReference(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public ErrorReference(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public ErrorReference position(long position) { + return (ErrorReference)super.position(position); + } + + public ErrorReference() { super((Pointer)null); allocate(); } + private native void allocate(); + + public native int errorCode(); + public native @Cast("char*") String errorMessage(); + + public native void setErrorCode(int errorCode); + public native void setErrorMessage(@StdString BytePointer message); + public native void setErrorMessage(@StdString String message); + } + + + +// #endif //DEV_TESTS_ERRORREFERENCE_H + + // Parsed from Environment.h /******************************************************************************* @@ -688,6 +742,18 @@ bool verbose = false; // #include // #include +/** + * This function returns last error code stored, + * @return non-zero if something bad happened + */ +public native int lastErrorCode(); + +/** + * This function returns last error message, if last error code > 0 + * @return + */ +public native @Cast("char*") String lastErrorMessage(); + /** * * @param p @@ -1710,72 +1776,6 @@ public native void execScalarBoolTad(@Cast("Nd4jPointer*") PointerPointer extraP @Cast("Nd4jLong*") long[] tadShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets, @Cast("Nd4jLong*") long[] tadShapeInfoZ, @Cast("Nd4jLong*") long[] tadOffsetsZ); - -/** -* Append an input array -* to the end of a flat array -* in a particular order -* @param offset the offset of the array to start at -* @param order the order -* @param result the result array -* @param resultShapeInfo the shape info for te array -* @param input the input for the array -* @param inputShapeInfo the shape information for that array -*/ -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") LongPointer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongPointer dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") LongPointer inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") LongPointer dinputShapeInfo); -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") LongBuffer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongBuffer dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") LongBuffer inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") LongBuffer dinputShapeInfo); -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") long[] resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") long[] dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") long[] inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") long[] dinputShapeInfo); - -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") LongPointer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongPointer dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") LongBuffer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongBuffer dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") long[] resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") long[] dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); - - public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer extraPointers, int dimension, @@ -22877,6 +22877,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // #include // #include +// #include @Namespace("nd4j") @NoOffset public static class ContextBuffers extends Pointer { static { Loader.load(); } /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ @@ -22912,6 +22913,8 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); public native void setScalarBuffer(Pointer pointer); public native void setAllocationBuffer(Pointer pointer); + public native ErrorReference errorReference(); + public native void triggerOwnership(@Cast("bool") boolean isOwner); public native int deviceId(); @@ -22961,6 +22964,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // #include // #include // #include +// #include @Namespace("nd4j") @NoOffset public static class LaunchContext extends Pointer { static { Loader.load(); } @@ -22985,9 +22989,12 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); public native int getDeviceID(); public native void setDeviceID(int deviceID); + public native ErrorReference errorReference(); public static native @Cast("bool") boolean isInitialized(); public static native void releaseBuffers(); + + public static native LaunchContext defaultContext(); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java index 58a2a7d02..554016686 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java @@ -38,6 +38,7 @@ import java.util.Scanner; "array/ConstantDataBuffer.h", "array/ConstantDescriptor.h", "array/TadPack.h", + "execution/ErrorReference.h", "Environment.h", "types/utf8string.h", "NativeOps.h", diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java index b302c8c0f..915d6f650 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java @@ -5216,6 +5216,8 @@ public class Nd4jTestsC extends BaseNd4jTest { INDArray array = Nd4j.create(new double[] {10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); INDArray exp = Nd4j.create(new double[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + log.info("Array shapeInfo: {}", array.shapeInfoJava()); + INDArray rev = Nd4j.reverse(array); assertEquals(exp, rev); @@ -5226,7 +5228,7 @@ public class Nd4jTestsC extends BaseNd4jTest { INDArray array = Nd4j.create(new double[] {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); INDArray exp = Nd4j.create(new double[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); - INDArray rev = Nd4j.getExecutioner().exec(new Reverse(array, Nd4j.createUninitialized(array.length())))[0]; + INDArray rev = Nd4j.getExecutioner().exec(new Reverse(array, array.ulike()))[0]; assertEquals(exp, rev); } @@ -5236,7 +5238,7 @@ public class Nd4jTestsC extends BaseNd4jTest { INDArray array = Nd4j.create(new double[] {10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); INDArray exp = Nd4j.create(new double[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); - INDArray rev = Nd4j.getExecutioner().exec(new Reverse(array, Nd4j.createUninitialized(array.length())))[0]; + INDArray rev = Nd4j.getExecutioner().exec(new Reverse(array,array.ulike()))[0]; assertEquals(exp, rev); } @@ -5335,11 +5337,103 @@ public class Nd4jTestsC extends BaseNd4jTest { assertNotNull(lsd); //Fails here on CUDA, OK on native/cpu } + @Test + public void testReverseSmall_1() { + val array = Nd4j.linspace(1, 10, 10, DataType.INT); + val exp = array.dup(array.ordering()); + + Transforms.reverse(array, false); + Transforms.reverse(array, false); + + val jexp = exp.data().asInt(); + val jarr = array.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, array); + } + + @Test + public void testReverseSmall_2() { + val array = Nd4j.linspace(1, 10, 10, DataType.INT); + val exp = array.dup(array.ordering()); + + val reversed = Transforms.reverse(array, true); + val rereversed = Transforms.reverse(reversed, true); + + val jexp = exp.data().asInt(); + val jarr = rereversed.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, rereversed); + } + + @Test + public void testReverseSmall_3() { + val array = Nd4j.linspace(1, 11, 11, DataType.INT); + val exp = array.dup(array.ordering()); + + Transforms.reverse(array, false); + + log.info("Reversed shapeInfo: {}", array.shapeInfoJava()); + log.info("Reversed: {}", array); + + Transforms.reverse(array, false); + + val jexp = exp.data().asInt(); + val jarr = array.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, array); + } + + @Test + public void testReverseSmall_4() { + val array = Nd4j.linspace(1, 11, 11, DataType.INT); + val exp = array.dup(array.ordering()); + + val reversed = Transforms.reverse(array, true); + + log.info("Reversed: {}", reversed); + + val rereversed = Transforms.reverse(reversed, true); + + val jexp = exp.data().asInt(); + val jarr = rereversed.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, rereversed); + } + + @Test + public void testReverse_1() { + val array = Nd4j.linspace(1, 2017152, 2017152, DataType.INT); + val exp = array.dup(array.ordering()); + + Transforms.reverse(array, false); + Transforms.reverse(array, false); + + val jexp = exp.data().asInt(); + val jarr = array.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, array); + } + + @Test + public void testReverse_2() { + val array = Nd4j.linspace(1, 2017152, 2017152, DataType.INT); + val exp = array.dup(array.ordering()); + + val reversed = Transforms.reverse(array, true); + val rereversed = Transforms.reverse(reversed, true); + + val jexp = exp.data().asInt(); + val jarr = rereversed.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, rereversed); + } + @Test public void testNativeSort3_1() { INDArray array = Nd4j.linspace(1, 2017152, 2017152, DataType.DOUBLE).reshape(1, -1); INDArray exp = array.dup(); Transforms.reverse(array, false); + log.info("Reverse: {}", array); long time1 = System.currentTimeMillis(); From e92f7218f3bfe5833faa00670a5144903e552d95 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Tue, 27 Aug 2019 12:15:56 +1000 Subject: [PATCH 13/56] Add new tests (#171) Signed-off-by: AlexDBlack --- .../opvalidation/MiscOpValidation.java | 58 +++++++++++++++++++ .../opvalidation/RandomOpValidation.java | 9 +++ .../opvalidation/ShapeOpValidation.java | 20 +++++++ .../opvalidation/TransformOpValidation.java | 39 +++++++++++++ 4 files changed, 126 insertions(+) diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/MiscOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/MiscOpValidation.java index 2a4b032b5..99a2f57ac 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/MiscOpValidation.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/MiscOpValidation.java @@ -438,6 +438,27 @@ public class MiscOpValidation extends BaseOpValidation { assertEquals(failed.toString(), 0, failed.size()); } + @Test + public void testScatterUpdate(){ + INDArray x = Nd4j.linspace(DataType.FLOAT, 1, 30, 1).reshape(10, 3); + INDArray updates = Nd4j.create(new float[][]{ + {100, 101, 102}, + {200, 201, 202}}); + INDArray indices = Nd4j.createFromArray(2, 5); + + INDArray exp = x.dup(); + exp.putRow(2, updates.getRow(0)); + exp.putRow(5, updates.getRow(1)); + + INDArray out = exp.ulike(); + Nd4j.exec(DynamicCustomOp.builder("scatter_upd") + .addInputs(x, indices, updates) + .addOutputs(out) + .build()); + + assertEquals(exp, out); + } + @Test public void testGatherGradient() { Nd4j.getRandom().setSeed(12345); @@ -1688,4 +1709,41 @@ public class MiscOpValidation extends BaseOpValidation { Nd4j.getExecutioner().exec(op); } + + @Test + public void testHistogramFixedWidth(){ + //Bins: [-inf, 0.2), [0.2, 0.4), [0.4, 0.6), [0.6, 0.8), [0.8, inf] + INDArray in = Nd4j.createFromArray(0.0, 0.1, 0.1, 0.3, 0.5, 0.5, 0.9); + INDArray range = Nd4j.createFromArray(0.0, 1.0); + INDArray n = Nd4j.scalar(5); + + INDArray out = Nd4j.create(DataType.INT, 5); + + Nd4j.exec(DynamicCustomOp.builder("histogram_fixed_width") + .addInputs(in, range, n) + .addOutputs(out) + .build()); + + INDArray exp = Nd4j.createFromArray(3, 1, 2, 0, 1); + assertEquals(exp, out); + } + + @Test + public void testListDiff(){ + INDArray x = Nd4j.createFromArray(0, 1, 2, 3); + INDArray y = Nd4j.createFromArray(3, 1); + + INDArray out = Nd4j.create(DataType.INT, 2); + INDArray outIdx = Nd4j.create(DataType.INT, 2); + + Nd4j.exec(DynamicCustomOp.builder("listdiff") + .addInputs(x, y) + .addOutputs(out, outIdx) + .build()); + + INDArray exp = Nd4j.createFromArray(0, 2); + + assertEquals(exp, out); //Values in x not in y + assertEquals(exp, outIdx); //Indices of the values in x not in y + } } diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/RandomOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/RandomOpValidation.java index 646cae454..8d64f6404 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/RandomOpValidation.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/RandomOpValidation.java @@ -27,6 +27,7 @@ import org.nd4j.autodiff.validation.TestCase; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.iter.NdIndexIterator; import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.api.ops.impl.reduce.bool.All; import org.nd4j.linalg.api.ops.random.custom.RandomBernoulli; import org.nd4j.linalg.api.ops.random.custom.RandomExponential; import org.nd4j.linalg.api.ops.random.impl.BinomialDistribution; @@ -371,6 +372,14 @@ public class RandomOpValidation extends BaseOpValidation { assertNull(OpValidation.validate(tc)); } + } + @Test + public void testAllEmptyReduce(){ + INDArray x = Nd4j.createFromArray(true, true, true); + All all = new All(x); + all.setEmptyReduce(true); //For TF compatibility - empty array for axis (which means no-op - and NOT all array reduction) + INDArray out = Nd4j.exec(all); + assertEquals(x, out); } } diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ShapeOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ShapeOpValidation.java index 2965f367f..ffb585183 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ShapeOpValidation.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/ShapeOpValidation.java @@ -1342,6 +1342,26 @@ public class ShapeOpValidation extends BaseOpValidation { assertEquals(failed.toString(), 0, failed.size()); } + @Test + public void testSegmentMean(){ + INDArray x = Nd4j.linspace(DataType.FLOAT, 1, 18, 1).reshape(6, 3); + INDArray segmentIds = Nd4j.createFromArray(0, 0, 1, 1, 2, 2); + + INDArray out = Nd4j.create(DataType.FLOAT, 3, 3); + + Nd4j.exec(DynamicCustomOp.builder("segment_mean") + .addInputs(x, segmentIds) + .addOutputs(out) + .build()); + + INDArray exp = out.like(); + exp.putRow(0, x.getRow(0).add(x.getRow(1)).muli(0.5)); + exp.putRow(1, x.getRow(2).add(x.getRow(3)).muli(0.5)); + exp.putRow(2, x.getRow(4).add(x.getRow(5)).muli(0.5)); + + assertEquals(exp, out); + } + @Test public void testSequenceMask() { OpValidationSuite.ignoreFailing(); //2018-01-09: output datatype issue? diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java index 0d177027d..9183a0884 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/TransformOpValidation.java @@ -415,6 +415,24 @@ public class TransformOpValidation extends BaseOpValidation { assertNull(err, err); } + @Test + public void testDynamicPartition2(){ + INDArray data = Nd4j.createFromArray(2, 1, 2, 0); + INDArray partitions = Nd4j.createFromArray(0, 2, 1, 0); + INDArray[] out = Nd4j.exec(DynamicCustomOp.builder("dynamic_partition") + .addOutputs(Nd4j.createUninitialized(DataType.INT, 2), Nd4j.createUninitialized(DataType.INT, 1), Nd4j.createUninitialized(DataType.INT, 1)) + .addIntegerArguments(3) //3 partitions + .addInputs(data, partitions).build()); + + INDArray exp0 = Nd4j.createFromArray(2, 0); + INDArray exp1 = Nd4j.createFromArray(2); + INDArray exp2 = Nd4j.createFromArray(1); + + assertEquals(exp0, out[0]); //Usually just gives [0,0] + assertEquals(exp1, out[1]); + assertEquals(exp2, out[2]); + } + @Test public void testDynamicStitch() { SameDiff sd = SameDiff.create(); @@ -1612,6 +1630,27 @@ public class TransformOpValidation extends BaseOpValidation { } } + @Test + public void testTopK1(){ + INDArray x = Nd4j.createFromArray(0.0, 0.0, 0.0, 10.0, 0.0); + INDArray k = Nd4j.scalar(1); + INDArray outValue = Nd4j.create(DataType.DOUBLE, 1); + INDArray outIdx = Nd4j.create(DataType.INT, 1); + + Nd4j.exec(DynamicCustomOp.builder("top_k") + .addInputs(x, k) + .addOutputs(outValue, outIdx) + .addBooleanArguments(false) //not sorted + .addIntegerArguments(1) + .build()); + + INDArray expValue = Nd4j.createFromArray(10.0); + INDArray expIdx = Nd4j.createFromArray(3); + + assertEquals(expValue, outValue); + assertEquals(expIdx, outIdx); + } + @Test public void testInTopK() { for( int k=4; k>= 1; k--){ From df84bc7255b53c359ab1e2284eab73de4d390a79 Mon Sep 17 00:00:00 2001 From: raver119 Date: Tue, 27 Aug 2019 10:37:10 +0300 Subject: [PATCH 14/56] [WIP] More tweaks (#173) * CUDA empty reduction Signed-off-by: raver119 * - listdiff synchronization fix for CUDA - listdiff test Signed-off-by: raver119 * - IndexReduce ops now allow INDEXING_TYPES output - topK op accepts only INDEXING_TYPES as output Signed-off-by: raver119 --- libnd4j/blas/NDArray.hpp | 4 +- libnd4j/blas/cpu/NativeOpExecutioner.cpp | 6 +- libnd4j/blas/cuda/NativeOpExecutioner.cu | 14 +-- libnd4j/include/helpers/Loops.h | 6 +- .../helpers/cpu/loops/IndexReductionLoops.cpp | 35 +++--- libnd4j/include/loops/cpu/indexreduce.cpp | 32 +++--- libnd4j/include/loops/cuda/indexreduce.cu | 105 +++++++++--------- libnd4j/include/loops/indexreduce.h | 16 +-- .../declarable/generic/parity_ops/top_k.cpp | 2 +- .../ops/declarable/helpers/impl/listdiff.cpp | 25 +---- libnd4j/include/ops/ops.h | 12 +- .../ops/executioner/CudaExecutioner.java | 47 +++++++- .../nd4j/linalg/custom/CustomOpsTests.java | 40 +++++++ .../org/nd4j/linalg/shape/EmptyTests.java | 10 ++ 14 files changed, 217 insertions(+), 137 deletions(-) diff --git a/libnd4j/blas/NDArray.hpp b/libnd4j/blas/NDArray.hpp index fdbcae49f..72b029c0b 100644 --- a/libnd4j/blas/NDArray.hpp +++ b/libnd4j/blas/NDArray.hpp @@ -3590,8 +3590,8 @@ void NDArray::applyIndexReduce(nd4j::indexreduce::Ops op, NDArray* target, const if (isS()) throw std::runtime_error("NDArray::applyIndexReduce: you can't use this method on String array!"); - if (target->dataType() != nd4j::DataType::INT64) - throw std::runtime_error("NDArray::applyIndexReduce operations return INT64"); + if (target->dataType() != nd4j::DataType::INT64 && target->dataType() != nd4j::DataType::INT32) + throw std::runtime_error("NDArray::applyIndexReduce operations return INT32/INT64"); void* params = extraParams != nullptr ? const_cast(extraParams)->argumentsAsT(this->dataType()) : nullptr; diff --git a/libnd4j/blas/cpu/NativeOpExecutioner.cpp b/libnd4j/blas/cpu/NativeOpExecutioner.cpp index e320b4f57..b2ce7846a 100644 --- a/libnd4j/blas/cpu/NativeOpExecutioner.cpp +++ b/libnd4j/blas/cpu/NativeOpExecutioner.cpp @@ -79,9 +79,10 @@ void NativeOpExecutioner::execIndexReduceScalar(nd4j::LaunchContext *lc, int op #endif auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); auto hz = reinterpret_cast(hZ); - BUILD_SINGLE_SELECTOR(xType, hz[0] = functions::indexreduce::IndexReduce, ::execScalar(opNum,hX,hXShapeInfo,extraParams), LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, zType, hz[0] = functions::indexreduce::IndexReduce, ::execScalar(opNum,hX,hXShapeInfo,extraParams), LIBND4J_TYPES, INDEXING_TYPES); } //////////////////////////////////////////////////////////////////////// @@ -111,9 +112,10 @@ void NativeOpExecutioner::execIndexReduce(nd4j::LaunchContext *lc, #endif auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); Nd4jLong* hz = reinterpret_cast(hZ); - BUILD_SINGLE_SELECTOR(xType, functions::indexreduce::IndexReduce, ::exec(opNum, hX, hXShapeInfo, extraParams, hz, hZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets), LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, zType, functions::indexreduce::IndexReduce, ::exec(opNum, hX, hXShapeInfo, extraParams, hz, hZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets), LIBND4J_TYPES, INDEXING_TYPES); // BUILD_SINGLE_SELECTOR(xType, functions::indexreduce::IndexReduce, ::exec(opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets), LIBND4J_TYPES); } diff --git a/libnd4j/blas/cuda/NativeOpExecutioner.cu b/libnd4j/blas/cuda/NativeOpExecutioner.cu index b3573c7ab..8c4f1d3fa 100644 --- a/libnd4j/blas/cuda/NativeOpExecutioner.cu +++ b/libnd4j/blas/cuda/NativeOpExecutioner.cu @@ -475,12 +475,12 @@ void NativeOpExecutioner::execIndexReduce(nd4j::LaunchContext *lc, auto numBlocks = shape::length(hZShapeInfo); dim3 launchDims(numBlocks, 256, 32768); - if (zType != nd4j::DataType::INT64) - throw datatype_exception::build("NativeOpExecutioner::execIndexReduce requires Z operand to have INT64 type", zType); + if (zType != nd4j::DataType::INT64 && zType != nd4j::DataType::INT32) + throw datatype_exception::build("NativeOpExecutioner::execIndexReduce requires Z operand to have INT32/INT64 type", zType); auto dz = reinterpret_cast(dZ); - BUILD_SINGLE_SELECTOR(xType, functions::indexreduce::IndexReduce, ::executeIndexReduce(launchDims, stream, opNum, dX, dXShapeInfo, shape::rank(hXShapeInfo), extraParams, dz, dZShapeInfo, shape::rank(hZShapeInfo), dimension, dimensionLength, 1, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, zType, functions::indexreduce::IndexReduce, ::executeIndexReduce(launchDims, stream, opNum, dX, dXShapeInfo, shape::rank(hXShapeInfo), extraParams, dz, dZShapeInfo, shape::rank(hZShapeInfo), dimension, dimensionLength, 1, allocationPointer, reductionPointer, tadShapeInfo, tadOffsets), LIBND4J_TYPES, INDEXING_TYPES); // TODO: remove after the release auto res = cudaStreamSynchronize(*stream); @@ -567,12 +567,12 @@ void NativeOpExecutioner::execIndexReduceScalar(nd4j::LaunchContext *lc, // FIXME: we want Z to be one of integer types //if (!DataTypeUtils::isZ(zType)) // throw nd4j::datatype_exception("NativeOpExecutioner::execIndexReduceScalar requires Z operand to have one of integer types") - if (zType != nd4j::DataType::INT64) - throw nd4j::datatype_exception::build("NativeOpExecutioner::execIndexReduceScalar requires Z operand to have INT64 data type", zType); + if (zType != nd4j::DataType::INT64 && zType != nd4j::DataType::INT32) + throw nd4j::datatype_exception::build("NativeOpExecutioner::execIndexReduceScalar requires Z operand to have INT32/INT64 data type", zType); auto dz = reinterpret_cast(dZ); - BUILD_SINGLE_SELECTOR(xType, functions::indexreduce::IndexReduce, ::executeIndexReduceScalar(launchDims, stream, + BUILD_DOUBLE_SELECTOR(xType, zType, functions::indexreduce::IndexReduce, ::executeIndexReduceScalar(launchDims, stream, opNum, dX, dXShapeInfo, shape::rank(hXShapeInfo), extraParams, @@ -580,7 +580,7 @@ void NativeOpExecutioner::execIndexReduceScalar(nd4j::LaunchContext *lc, nullptr, 0, 1, allocationPointer, reductionPointer, - nullptr, nullptr), LIBND4J_TYPES); + nullptr, nullptr), LIBND4J_TYPES, INDEXING_TYPES); // TODO: remove after the release auto res = cudaStreamSynchronize(*stream); if (res != 0) diff --git a/libnd4j/include/helpers/Loops.h b/libnd4j/include/helpers/Loops.h index bda04414f..d04d3315d 100644 --- a/libnd4j/include/helpers/Loops.h +++ b/libnd4j/include/helpers/Loops.h @@ -80,14 +80,14 @@ namespace nd4j { }; - template + template class ND4J_EXPORT IndexReductionLoops { private: public: - static void wrapIndexReduce(const int opNum, void* x, Nd4jLong* xShapeInfo, Nd4jLong* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* extraParams); + static void wrapIndexReduce(const int opNum, void* x, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* extraParams); template - static void loopIndexReduce(X* x, Nd4jLong* xShapeInfo, Nd4jLong* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams); + static void loopIndexReduce(X* x, Nd4jLong* xShapeInfo, Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams); }; diff --git a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.cpp b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.cpp index 33e230bd5..0a096b65f 100644 --- a/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.cpp +++ b/libnd4j/include/helpers/cpu/loops/IndexReductionLoops.cpp @@ -24,10 +24,10 @@ using namespace simdOps; ////////////////////////////////////////////////////////////////////////////// -template +template template -void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, - Nd4jLong* z, Nd4jLong* zShapeInfo, +void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, + Z* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, X* extraParams) { @@ -62,7 +62,7 @@ void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, indexValue = OpType::update(indexValue, comp, extraParams); } - z[i] = indexValue.index; + z[i] = (Z) indexValue.index; } } break; @@ -80,7 +80,7 @@ void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, indexValue = OpType::update(indexValue, comp, extraParams); } - z[i * zEws] = indexValue.index; + z[i * zEws] = (Z) indexValue.index; } } break; @@ -98,7 +98,7 @@ void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, indexValue = OpType::update(indexValue, comp, extraParams); } - z[i] = indexValue.index; + z[i] = (Z) indexValue.index; } } break; @@ -122,7 +122,7 @@ void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } } - z[i] = indexValue.index; + z[i] = (Z) indexValue.index; } } break; @@ -148,7 +148,7 @@ void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } } - z[i] = indexValue.index; + z[i] = (Z) indexValue.index; } } break; @@ -176,7 +176,7 @@ void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } } - z[i] = indexValue.index; + z[i] = (Z) indexValue.index; } } break; @@ -206,7 +206,7 @@ void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } } - z[i] = indexValue.index; + z[i] = (Z) indexValue.index; } } break; @@ -227,7 +227,7 @@ void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } auto zOffset = shape::indexOffset(i, zShapeInfo, castZShapeInfo, zLen, canCastZ); - z[zOffset] = indexValue.index; + z[zOffset] = (Z) indexValue.index; } } break; @@ -248,7 +248,7 @@ void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, indexValue = OpType::update(indexValue, comp, extraParams); } - z[i * zEws] = indexValue.index; + z[i * zEws] = (Z) indexValue.index; } } break; @@ -272,18 +272,19 @@ void nd4j::IndexReductionLoops::loopIndexReduce(X* x, Nd4jLong* xShapeInfo, } auto zOffset = shape::indexOffset(i, zShapeInfo, castZShapeInfo, zLen, canCastZ); - z[zOffset] = indexValue.index; + z[zOffset] = (Z) indexValue.index; } } } } -template -void nd4j::IndexReductionLoops::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, Nd4jLong* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams) { +template +void nd4j::IndexReductionLoops::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* vz, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams) { auto x = reinterpret_cast(vx); + auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); - DISPATCH_BY_OPNUM_T(loopIndexReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams), INDEX_REDUCE_OPS); + DISPATCH_BY_OPNUM_TT(loopIndexReduce, PARAMS(x, xShapeInfo, z, zShapeInfo, tadShapeInfo, tadOffsets, extraParams), INDEX_REDUCE_OPS); } -BUILD_SINGLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, Nd4jLong* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES); \ No newline at end of file +BUILD_DOUBLE_TEMPLATE(template void nd4j::IndexReductionLoops, ::wrapIndexReduce(const int opNum, void* vx, Nd4jLong* xShapeInfo, void* z, Nd4jLong* zShapeInfo, Nd4jLong* tadShapeInfo, Nd4jLong* tadOffsets, void* vextraParams), LIBND4J_TYPES, INDEXING_TYPES); \ No newline at end of file diff --git a/libnd4j/include/loops/cpu/indexreduce.cpp b/libnd4j/include/loops/cpu/indexreduce.cpp index 951ac287b..5a7beee24 100644 --- a/libnd4j/include/loops/cpu/indexreduce.cpp +++ b/libnd4j/include/loops/cpu/indexreduce.cpp @@ -31,26 +31,27 @@ namespace functions { namespace indexreduce { //////////////////////////////////////////////////////////////////////// -template Nd4jLong IndexReduce::execScalar( const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams) { - RETURNING_DISPATCH_BY_OPNUM_T(execScalar, PARAMS(x, xShapeInfo, extraParams), INDEX_REDUCE_OPS); +template +Nd4jLong IndexReduce::execScalar( const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams) { + RETURNING_DISPATCH_BY_OPNUM_TT(execScalar, PARAMS(x, xShapeInfo, extraParams), INDEX_REDUCE_OPS); } //////////////////////////////////////////////////////////////////////// -template -void IndexReduce::exec(const int opNum, +template +void IndexReduce::exec(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, - Nd4jLong *z, Nd4jLong *zShapeInfo, + void *z, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { -DISPATCH_BY_OPNUM_T(exec, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffset), INDEX_REDUCE_OPS); +DISPATCH_BY_OPNUM_TT(exec, PARAMS(x, xShapeInfo, extraParams, z, zShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffset), INDEX_REDUCE_OPS); } //////////////////////////////////////////////////////////////////////// -template +template template -Nd4jLong IndexReduce::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { +Nd4jLong IndexReduce::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextraParams) { auto x = reinterpret_cast(vx); auto extraParams = reinterpret_cast(vextraParams); @@ -105,15 +106,16 @@ Nd4jLong IndexReduce::execScalar(void *vx, Nd4jLong *xShapeInfo, void *vextra //////////////////////////////////////////////////////////////////////// -template +template template -void IndexReduce::exec(void *vx, Nd4jLong *xShapeInfo, +void IndexReduce::exec(void *vx, Nd4jLong *xShapeInfo, void *vextraParams, - Nd4jLong *z, Nd4jLong *zShapeInfo, + void *vz, Nd4jLong *zShapeInfo, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { auto x = reinterpret_cast(vx); + auto z = reinterpret_cast(vz); auto extraParams = reinterpret_cast(vextraParams); const Nd4jLong zLen = shape::length(zShapeInfo); @@ -124,12 +126,12 @@ void IndexReduce::exec(void *vx, Nd4jLong *xShapeInfo, const auto indexValue = OpType::startingIndexValue(x); PRAGMA_OMP_PARALLEL_FOR_IF(zLen > nd4j::Environment::getInstance()->elementwiseThreshold()) for (uint i = 0; i < zLen; i++) - z[i] = indexValue.index;; + z[i] = (Z) indexValue.index;; return; } if(shape::isScalar(zShapeInfo)) { - z[0] = execScalar(x,xShapeInfo,extraParams); + z[0] = (Z) execScalar(x,xShapeInfo,extraParams); return; } @@ -146,11 +148,11 @@ void IndexReduce::exec(void *vx, Nd4jLong *xShapeInfo, tadOffsets = tadPack.primaryOffsets(); } - nd4j::IndexReductionLoops::template loopIndexReduce(x, xShapeInfo, z, zShapeInfo, tadOnlyShapeInfo, tadOffsets, extraParams); + nd4j::IndexReductionLoops::template loopIndexReduce(x, xShapeInfo, z, zShapeInfo, tadOnlyShapeInfo, tadOffsets, extraParams); } -BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES); +BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES, INDEXING_TYPES); } } \ No newline at end of file diff --git a/libnd4j/include/loops/cuda/indexreduce.cu b/libnd4j/include/loops/cuda/indexreduce.cu index 18e5b1432..5f0cf07ae 100644 --- a/libnd4j/include/loops/cuda/indexreduce.cu +++ b/libnd4j/include/loops/cuda/indexreduce.cu @@ -29,37 +29,37 @@ using namespace simdOps; -template +template static __global__ void simpleIndexReduceGeneric(const int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, - Nd4jLong *result, + void *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { - functions::indexreduce::IndexReduce::transform(op,dx,xShapeInfo,extraParams,result,resultShapeInfo,dimension,dimensionLength,postProcessOrNot,allocationBuffer,reductionBuffer,tadOnlyShapeInfo,tadOffsets); + functions::indexreduce::IndexReduce::transform(op,dx,xShapeInfo,extraParams,result,resultShapeInfo,dimension,dimensionLength,postProcessOrNot,allocationBuffer,reductionBuffer,tadOnlyShapeInfo,tadOffsets); } namespace functions { namespace indexreduce { - template - _CUDA_H void IndexReduce::executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, + template + _CUDA_H void IndexReduce::executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, const int opNum, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, - Nd4jLong *result, Nd4jLong *resultShapeInfo, + void *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { - simpleIndexReduceGeneric<<>>(opNum, + simpleIndexReduceGeneric<<>>(opNum, dx, xShapeInfo, xRank, extraParams, result, resultShapeInfo, 0, @@ -67,13 +67,11 @@ namespace functions { 1, allocationBuffer, reductionBuffer, tadOnlyShapeInfo, tadOffsets); - - nd4j::DebugHelper::checkErrorCode(stream, "execIndexReduceScalar(...) failed"); } - template - _CUDA_H void IndexReduce::executeIndexReduce(dim3 launchDims, cudaStream_t *stream, const int opNum, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, Nd4jLong *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { - simpleIndexReduceGeneric<<>>( + template + _CUDA_H void IndexReduce::executeIndexReduce(dim3 launchDims, cudaStream_t *stream, const int opNum, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets) { + simpleIndexReduceGeneric<<>>( opNum, dx, xShapeInfo, xRank, @@ -83,8 +81,6 @@ namespace functions { dimension, dimensionLength, 1, allocationBuffer, reductionBuffer, tadOnlyShapeInfo, tadOffsets); - - DEBUG_KERNEL(stream, opNum); } // This is the un-specialized struct. Note that we prevent instantiation of this @@ -122,14 +118,14 @@ namespace functions { } }; - template + template template - __device__ void IndexReduce::aggregatePartials(IndexValue **sPartialsRef, Nd4jLong tid, Nd4jLong numElements, void *vextraParams) { + __device__ void IndexReduce::aggregatePartials(IndexValue **sPartialsRef, Nd4jLong tid, Nd4jLong numElements, void *vextraParams) { // start the shared memory loop on the next power of 2 less // than the block size. If block size is not a power of 2, // accumulate the intermediate sums in the remainder range. - auto extraParams = static_cast(vextraParams); - IndexValue *sPartials = *sPartialsRef; + auto extraParams = static_cast(vextraParams); + IndexValue *sPartials = *sPartialsRef; Nd4jLong floorPow2 = blockDim.x; if (floorPow2 & (floorPow2 - 1)) { @@ -138,8 +134,8 @@ namespace functions { } if (tid >= floorPow2) { - IndexValue prev = sPartials[tid - floorPow2]; - IndexValue curr = sPartials[tid]; + IndexValue prev = sPartials[tid - floorPow2]; + IndexValue curr = sPartials[tid]; sPartials[tid - floorPow2] = OpType::update(prev,curr,extraParams); } __syncthreads(); @@ -147,21 +143,21 @@ namespace functions { for (int activeThreads = floorPow2 >> 1;activeThreads; activeThreads >>= 1) { if (tid < activeThreads && tid + activeThreads < numElements) { - IndexValue curr = sPartials[tid]; - IndexValue next = sPartials[tid + activeThreads]; + IndexValue curr = sPartials[tid]; + IndexValue next = sPartials[tid + activeThreads]; sPartials[tid] = OpType::update(curr,next,extraParams); } __syncthreads(); } } - template - __device__ void IndexReduce::transform( + template + __device__ void IndexReduce::transform( const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, - Nd4jLong *result, + void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, @@ -170,15 +166,15 @@ namespace functions { void *reductionBuffer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { - DISPATCH_BY_OPNUM_T(transform, PARAMS(x, xShapeInfo, extraParams, result, resultShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationBuffer, reductionBuffer, tadShapeInfo, tadOffset), INDEX_REDUCE_OPS); + DISPATCH_BY_OPNUM_TT(transform, PARAMS(x, xShapeInfo, extraParams, result, resultShapeInfo, dimension, dimensionLength, postProcessOrNot, allocationBuffer, reductionBuffer, tadShapeInfo, tadOffset), INDEX_REDUCE_OPS); } - template + template template - __device__ void IndexReduce::transform(void *vdx, Nd4jLong *xShapeInfo, + __device__ void IndexReduce::transform(void *vdx, Nd4jLong *xShapeInfo, void *vextraParams, - Nd4jLong *result, Nd4jLong *resultShapeInfo, + void *vresult, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *vreductionBuffer, @@ -186,18 +182,19 @@ namespace functions { /**int * Gpu information for the problem */ - auto dx = static_cast(vdx); - auto extraParams = static_cast(vextraParams); - auto reductionBuffer = static_cast(vreductionBuffer); + auto dx = reinterpret_cast(vdx); + auto result = reinterpret_cast(vresult); + auto extraParams = static_cast(vextraParams); + auto reductionBuffer = static_cast(vreductionBuffer); auto order = shape::order(xShapeInfo); int tid = blockIdx.x * blockDim.x + threadIdx.x; __shared__ volatile int resultScalar; //shared memory space for storing intermediate results - __shared__ IndexValue* sPartials; + __shared__ IndexValue* sPartials; if(threadIdx.x == 0) { extern __shared__ unsigned char shmem[]; - sPartials = reinterpret_cast*>(shmem); + sPartials = reinterpret_cast*>(shmem); } __syncthreads(); @@ -210,7 +207,7 @@ namespace functions { //only compute the tad indexes once - IndexValue reduction = OpType::startingIndexValue(dx); + IndexValue reduction = OpType::startingIndexValue(dx); if (threadIdx.x == 0) { if (resultShapeInfo != nullptr) @@ -255,7 +252,7 @@ namespace functions { for(int i = threadIdx.x;i < tadLength; i += blockDim.x) { auto xOffset = tadOffsetForBlock + shape::getIndexOffset(i, tadOnlyShapeInfo, tadLength); - IndexValue comp {dx[xOffset], i}; + IndexValue comp {dx[xOffset], i}; sPartials[threadIdx.x] = OpType::update(sPartials[threadIdx.x], comp, extraParams); } @@ -264,7 +261,7 @@ namespace functions { __syncthreads(); if (threadIdx.x == 0) { - result[r] = sPartials[threadIdx.x].index; + result[r] = (Z) sPartials[threadIdx.x].index; } __syncthreads(); } @@ -276,7 +273,7 @@ namespace functions { sPartials[threadIdx.x] = OpType::startingIndexValue(dx); for (int x = threadIdx.x; x < tadLength; x+= blockDim.x) { - IndexValue comp {dx[tadOffsetForBlock + x * tadEWS], x}; + IndexValue comp {dx[tadOffsetForBlock + x * tadEWS], x}; sPartials[threadIdx.x] = OpType::update(sPartials[threadIdx.x], comp, extraParams); } @@ -285,7 +282,7 @@ namespace functions { __syncthreads(); if (threadIdx.x == 0) { - result[i] = sPartials[threadIdx.x].index; //postProcess(sPartials[0],tadLength ,extraParams); + result[i] = (Z) sPartials[threadIdx.x].index; //postProcess(sPartials[0],tadLength ,extraParams); } __syncthreads(); } @@ -296,14 +293,14 @@ namespace functions { if(xElementWiseStride >= 1 && order == 'c') { for(Nd4jLong i = tid;i < n; i += (blockDim.x * gridDim.x)) { - IndexValue indexVal = {dx[i * xElementWiseStride], i}; + IndexValue indexVal = {dx[i * xElementWiseStride], i}; reduction = OpType::update(reduction, indexVal, extraParams); } } else { for(Nd4jLong i = tid;i < n; i += blockDim.x * gridDim.x) { auto offset = shape::getIndexOffset(i, xShapeInfo, n); - IndexValue indexVal = {dx[offset], i}; + IndexValue indexVal = {dx[offset], i}; reduction = OpType::update(reduction, indexVal, extraParams); } } @@ -320,7 +317,7 @@ namespace functions { unsigned int *tc = (unsigned int *) reductionBuffer; tid = threadIdx.x; if (threadIdx.x == 0) { - auto pBuffer = reinterpret_cast *>(reductionBuffer); + auto pBuffer = reinterpret_cast *>(reductionBuffer); pBuffer[blockIdx.x] = {sPartials[0].value, sPartials[0].index}; } __threadfence(); @@ -335,7 +332,7 @@ namespace functions { if (amLast) { tc[16384] = 0; - IndexValue *pBuffer = (IndexValue *) reductionBuffer; + IndexValue *pBuffer = (IndexValue *) reductionBuffer; sPartials[threadIdx.x] = OpType::startingIndexValue(dx); @@ -348,14 +345,14 @@ namespace functions { __syncthreads(); if (tid == 0) { - result[0] = sPartials[0].index; + result[0] = (Z) sPartials[0].index; } } } else { if (tid == 0) { auto tc = reinterpret_cast(reductionBuffer); tc[16384] = 0; - result[0] = sPartials[0].index; + result[0] = (Z) sPartials[0].index; } } @@ -365,30 +362,30 @@ namespace functions { - template - Nd4jLong IndexReduce::execScalar(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams) { + template + Nd4jLong IndexReduce::execScalar(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams) { return 0; } - template - void IndexReduce::exec(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, Nd4jLong *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { + template + void IndexReduce::exec(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { } - template + template template - Nd4jLong IndexReduce:: execScalar(void *x, Nd4jLong *xShapeInfo, void *extraParams) { + Nd4jLong IndexReduce:: execScalar(void *x, Nd4jLong *xShapeInfo, void *extraParams) { return 0; } - template + template template - _CUDA_H void IndexReduce::exec(void *x, Nd4jLong *xShapeInfo, void *extraParams, Nd4jLong *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { + _CUDA_H void IndexReduce::exec(void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset) { } - BUILD_SINGLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES); + BUILD_DOUBLE_TEMPLATE(template class ND4J_EXPORT IndexReduce, , LIBND4J_TYPES, INDEXING_TYPES); } } diff --git a/libnd4j/include/loops/indexreduce.h b/libnd4j/include/loops/indexreduce.h index 40f98c692..792ed16a9 100755 --- a/libnd4j/include/loops/indexreduce.h +++ b/libnd4j/include/loops/indexreduce.h @@ -52,35 +52,35 @@ namespace functions { namespace indexreduce { - template + template class IndexReduce { public: #ifdef __CUDACC__ - static __device__ void transform(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, Nd4jLong *result, Nd4jLong *resultShapeInfo, int *dimension,int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); + static __device__ void transform(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int *dimension,int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); template - static __device__ void aggregatePartials(IndexValue **sPartialsRef, Nd4jLong tid, Nd4jLong numElements,void *extraParams); + static __device__ void aggregatePartials(IndexValue **sPartialsRef, Nd4jLong tid, Nd4jLong numElements,void *extraParams); template - static __device__ void transform(void *dx, Nd4jLong *xShapeInfo, void *extraParams, Nd4jLong *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static __device__ void transform(void *dx, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); - static _CUDA_H void executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, const int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, Nd4jLong *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeIndexReduceScalar(dim3 launchDims, cudaStream_t *stream, const int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); - static _CUDA_H void executeIndexReduce(dim3 launchDims, cudaStream_t *stream, const int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, Nd4jLong *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); + static _CUDA_H void executeIndexReduce(dim3 launchDims, cudaStream_t *stream, const int op, void *dx, Nd4jLong *xShapeInfo, int xRank, void *extraParams, void *result, Nd4jLong *resultShapeInfo, int zRank, int *dimension, int dimensionLength, int postProcessOrNot, int *allocationBuffer, void *reductionBuffer, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets); #endif static Nd4jLong execScalar(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams); - static void exec(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, Nd4jLong *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); + static void exec(const int opNum, void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); template static _CUDA_H Nd4jLong execScalar(void *x, Nd4jLong *xShapeInfo, void *extraParams); template - static _CUDA_H void exec(void *x, Nd4jLong *xShapeInfo, void *extraParams, Nd4jLong *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); + static _CUDA_H void exec(void *x, Nd4jLong *xShapeInfo, void *extraParams, void *result, Nd4jLong *resultShapeInfoBuffer, int *dimension, int dimensionLength, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffset); }; } } diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp index ea2e3330a..bd16cdd79 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/top_k.cpp @@ -87,7 +87,7 @@ namespace nd4j { getOpDescriptor() ->setAllowedInputTypes(nd4j::DataType::ANY) ->setAllowedOutputTypes(0, nd4j::DataType::ANY) - ->setAllowedOutputTypes(1, {ALL_INTS}); + ->setAllowedOutputTypes(1, {ALL_INDICES}); } } } diff --git a/libnd4j/include/ops/declarable/helpers/impl/listdiff.cpp b/libnd4j/include/ops/declarable/helpers/impl/listdiff.cpp index baa08dad9..c840f6960 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/listdiff.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/listdiff.cpp @@ -42,10 +42,11 @@ namespace helpers { Nd4jLong listDiffCount(nd4j::LaunchContext * context, NDArray* values, NDArray* keep) { auto xType = values->dataType(); - values->syncToHost(); - keep->syncToHost(); + NDArray::preparePrimaryUse({},{values, keep}); BUILD_SINGLE_SELECTOR(xType, return listDiffCount_, (values, keep), LIBND4J_TYPES); + + NDArray::registerPrimaryUse({},{values, keep}); } BUILD_SINGLE_TEMPLATE(template Nd4jLong listDiffCount_, (NDArray* values, NDArray* keep);, LIBND4J_TYPES); @@ -97,16 +98,7 @@ namespace helpers { int listDiffFunctor(nd4j::LaunchContext * context, NDArray* values, NDArray* keep, NDArray* output1, NDArray* output2) { auto xType = values->dataType(); - values->syncToHost(); - - if (keep != nullptr) - keep->syncToHost(); - - if (output1 != nullptr) - output1->syncToHost(); - - if (output2 != nullptr) - output2->syncToHost(); + NDArray::preparePrimaryUse({output1, output2}, {values, keep}); int result = 0; @@ -118,14 +110,7 @@ namespace helpers { throw std::runtime_error("ListDiff: Only integer and floating point data types are supported"); } - if (keep != nullptr) - keep->syncToDevice(); - - if (output1 != nullptr) - output1->syncToDevice(); - - if (output2 != nullptr) - output2->syncToDevice(); + NDArray::registerPrimaryUse({output1, output2}, {values, keep}); return result; } diff --git a/libnd4j/include/ops/ops.h b/libnd4j/include/ops/ops.h index 38122f985..fe6bfae81 100644 --- a/libnd4j/include/ops/ops.h +++ b/libnd4j/include/ops/ops.h @@ -3746,7 +3746,7 @@ namespace simdOps { }; - template + template class IndexAbsoluteMax { public: static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue val, X *extraParams) { @@ -3799,7 +3799,7 @@ namespace simdOps { } }; - template + template class FirstIndex { public: static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue val, X *extraParams) { @@ -3861,7 +3861,7 @@ namespace simdOps { }; - template + template class LastIndex { public: static _CUDA_HD inline functions::indexreduce::IndexValue op(functions::indexreduce::IndexValue val, X *extraParams) { @@ -3920,7 +3920,7 @@ namespace simdOps { }; - template + template class IndexMax { public: @@ -3974,7 +3974,7 @@ namespace simdOps { }; - template + template class IndexAbsoluteMin { public: static _CUDA_HD inline functions::indexreduce::IndexValue op( @@ -4030,7 +4030,7 @@ namespace simdOps { }; - template + template class IndexMin { public: static _CUDA_HD inline functions::indexreduce::IndexValue op( diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java index 38a1ba382..54649692c 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java @@ -226,6 +226,21 @@ public class CudaExecutioner extends DefaultOpExecutioner { */ protected INDArray naiveExec(ReduceOp op, int... dimension) { long st = profilingConfigurableHookIn(op); + + if(op instanceof BaseReduceOp && ((BaseReduceOp)op).isEmptyReduce()){ + //Edge case for TF import compatibility: [x,y].reduce(empty) = [x,y] + //Note that "empty" axis is NOT the same as length 0, as in INDArray.sum(new int[0]), which means "all dimensions" + if(op.z() != null){ + Preconditions.checkState(op.x().equalShapes(op.z()), "For empty reductions, result (z) array must have same shape as x shape." + + " Got: x=%ndShape, z=%ndShape", op.x(), op.z()); + op.z().assign(op.x()); + return op.z(); + } else { + op.setZ(op.x().dup()); + return op.z(); + } + } + INDArray ret = op.z(); checkForCompression(op); @@ -482,6 +497,20 @@ public class CudaExecutioner extends DefaultOpExecutioner { public INDArray exec(ReduceOp op) { checkForCompression(op); + if(op instanceof BaseReduceOp && ((BaseReduceOp)op).isEmptyReduce()){ + //Edge case for TF import compatibility: [x,y].reduce(empty) = [x,y] + //Note that "empty" axis is NOT the same as length 0, as in INDArray.sum(new int[0]), which means "all dimensions" + if(op.z() != null){ + Preconditions.checkState(op.x().equalShapes(op.z()), "For empty reductions, result (z) array must have same shape as x shape." + + " Got: x=%ndShape, z=%ndShape", op.x(), op.z()); + op.z().assign(op.x()); + return op.z(); + } else { + op.setZ(op.x().dup()); + return op.z(); + } + } + val dimension = op.dimensions().toIntVector(); if (extraz.get() == null) @@ -890,6 +919,22 @@ public class CudaExecutioner extends DefaultOpExecutioner { protected CudaContext invoke(ReduceOp op, int[] dimension) { + CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(op.z(), op.x(), op.y()); + + if(op instanceof BaseReduceOp && ((BaseReduceOp)op).isEmptyReduce()){ + //Edge case for TF import compatibility: [x,y].reduce(empty) = [x,y] + //Note that "empty" axis is NOT the same as length 0, as in INDArray.sum(new int[0]), which means "all dimensions" + if(op.z() != null){ + Preconditions.checkState(op.x().equalShapes(op.z()), "For empty reductions, result (z) array must have same shape as x shape." + + " Got: x=%ndShape, z=%ndShape", op.x(), op.z()); + op.z().assign(op.x()); + return context; + } else { + op.setZ(op.x().dup()); + return context; + } + } + long st = profilingConfigurableHookIn(op); checkForCompression(op); @@ -913,8 +958,6 @@ public class CudaExecutioner extends DefaultOpExecutioner { throw new ND4JIllegalStateException("Op target dimension " + Arrays.toString(dimension) + " contains element that higher then rank of op.X: [" + op.x().rank() + "]"); - CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(op.z(), op.x(), op.y()); - if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.opName()); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java index 6c4595a0c..f325348fb 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java @@ -733,6 +733,46 @@ public class CustomOpsTests extends BaseNd4jTest { } } + @Test + public void testListDiff(){ + INDArray x = Nd4j.createFromArray(0, 1, 2, 3); + INDArray y = Nd4j.createFromArray(3, 1); + + INDArray out = Nd4j.create(DataType.INT, 2); + INDArray outIdx = Nd4j.create(DataType.INT, 2); + + Nd4j.exec(DynamicCustomOp.builder("listdiff") + .addInputs(x, y) + .addOutputs(out, outIdx) + .build()); + + INDArray exp = Nd4j.createFromArray(0, 2); + + assertEquals(exp, out); //Values in x not in y + assertEquals(exp, outIdx); //Indices of the values in x not in y + } + + @Test + public void testTopK1(){ + INDArray x = Nd4j.createFromArray(0.0, 0.0, 0.0, 10.0, 0.0); + INDArray k = Nd4j.scalar(1); + INDArray outValue = Nd4j.create(DataType.DOUBLE, 1); + INDArray outIdx = Nd4j.create(DataType.INT, 1); + + Nd4j.exec(DynamicCustomOp.builder("top_k") + .addInputs(x, k) + .addOutputs(outValue, outIdx) + .addBooleanArguments(false) //not sorted + .addIntegerArguments(1) + .build()); + + INDArray expValue = Nd4j.createFromArray(10.0); + INDArray expIdx = Nd4j.createFromArray(3); + + assertEquals(expValue, outValue); + assertEquals(expIdx, outIdx); + } + @Test public void testMaxPool2Dbp_1() { val x = Nd4j.create(DataType.HALF, 2,3,16,16).assign(Double.NaN); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java index 261e1e300..e7e8f8288 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/shape/EmptyTests.java @@ -25,6 +25,7 @@ import org.nd4j.linalg.BaseNd4jTest; import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.DynamicCustomOp; +import org.nd4j.linalg.api.ops.impl.reduce.bool.All; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.factory.Nd4jBackend; @@ -299,6 +300,15 @@ public class EmptyTests extends BaseNd4jTest { assertNotNull(result[0].shapeInfoDataBuffer().asLong()); } + @Test + public void testAllEmptyReduce(){ + INDArray x = Nd4j.createFromArray(true, true, true); + val all = new All(x); + all.setEmptyReduce(true); //For TF compatibility - empty array for axis (which means no-op - and NOT all array reduction) + INDArray out = Nd4j.exec(all); + assertEquals(x, out); + } + @Override public char ordering() { return 'c'; From 05d45ec0508def2034510b373a0f61d1abe8bf0e Mon Sep 17 00:00:00 2001 From: raver119 Date: Tue, 27 Aug 2019 11:31:59 +0300 Subject: [PATCH 15/56] IndexReduce along dim CUDA fix Signed-off-by: raver119 --- .../nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java index 54649692c..afeff4d8b 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java @@ -894,13 +894,13 @@ public class CudaExecutioner extends DefaultOpExecutioner { //long dimensionPointer = AtomicAllocator.getInstance().getPointer(Nd4j.createBuffer(dimension), context); Pointer dimensionPointer = AtomicAllocator.getInstance() - .getPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension), context); + .getHostPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension)); nativeOps.execIndexReduce(xShapeInfoHostPointer, op.opNum(), null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, extraArgs, null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo, - null, + dimensionPointer, (LongPointer) op.dimensions().shapeInfoDataBuffer().addressPointer(), AtomicAllocator.getInstance().getPointer(op.dimensions(), context), null); From dce4751fc12065a285af294dda299e54a6468add Mon Sep 17 00:00:00 2001 From: Alex Black Date: Tue, 27 Aug 2019 18:34:53 +1000 Subject: [PATCH 16/56] Layer norm 4d case fixes (#174) Signed-off-by: AlexDBlack --- .../DifferentialFunctionFactory.java | 16 ++++---- .../org/nd4j/autodiff/samediff/ops/SDNN.java | 18 +++++---- .../nd4j/linalg/api/ops/DynamicCustomOp.java | 16 ++++++++ .../ops/impl/transforms/custom/LayerNorm.java | 21 +++++----- .../impl/transforms/custom/LayerNormBp.java | 31 ++++++++------- .../opvalidation/LayerOpValidation.java | 38 +++++++++++++++++-- 6 files changed, 96 insertions(+), 44 deletions(-) diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunctionFactory.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunctionFactory.java index 3bf1754db..e16bd3dc2 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunctionFactory.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/functions/DifferentialFunctionFactory.java @@ -790,20 +790,20 @@ public class DifferentialFunctionFactory { return new StandardizeBp(sameDiff(), stdInput, gradient, dimensions).outputVariable(); } - public SDVariable layerNorm(SDVariable input, SDVariable gain, SDVariable bias, int... dimensions) { - return new LayerNorm(sameDiff(), input, gain, bias, dimensions).outputVariable(); + public SDVariable layerNorm(SDVariable input, SDVariable gain, SDVariable bias, boolean channelsFirst, int... dimensions) { + return new LayerNorm(sameDiff(), input, gain, bias, channelsFirst, dimensions).outputVariable(); } - public SDVariable[] layerNormBp(SDVariable input, SDVariable gain, SDVariable bias, SDVariable gradient, int... dimensions) { - return new LayerNormBp(sameDiff(), input, gain, bias, gradient, dimensions).outputVariables(); + public SDVariable[] layerNormBp(SDVariable input, SDVariable gain, SDVariable bias, SDVariable gradient, boolean channelsFirst, int... dimensions) { + return new LayerNormBp(sameDiff(), input, gain, bias, gradient, channelsFirst, dimensions).outputVariables(); } - public SDVariable layerNorm(SDVariable input, SDVariable gain, int... dimensions) { - return new LayerNorm(sameDiff(), input, gain, dimensions).outputVariable(); + public SDVariable layerNorm(SDVariable input, SDVariable gain, boolean channelsFirst, int... dimensions) { + return new LayerNorm(sameDiff(), input, gain, channelsFirst, dimensions).outputVariable(); } - public SDVariable[] layerNormBp(SDVariable input, SDVariable gain, SDVariable gradient, int... dimensions) { - return new LayerNormBp(sameDiff(), input, gain, gradient, dimensions).outputVariables(); + public SDVariable[] layerNormBp(SDVariable input, SDVariable gain, SDVariable gradient, boolean channelsFirst, int... dimensions) { + return new LayerNormBp(sameDiff(), input, gain, gradient, channelsFirst, dimensions).outputVariables(); } public SDVariable squaredNorm(SDVariable input, boolean keepDims, int... dimensions) { diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDNN.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDNN.java index 928bf3e6e..eb89a0f3a 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDNN.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDNN.java @@ -759,8 +759,8 @@ public class SDNN extends SDOps { * * @return Output variable */ - public SDVariable layerNorm(SDVariable input, SDVariable gain, SDVariable bias, int... dimensions) { - return layerNorm(null, input, gain, bias, dimensions); + public SDVariable layerNorm(SDVariable input, SDVariable gain, SDVariable bias, boolean channelsFirst, int... dimensions) { + return layerNorm(null, input, gain, bias, channelsFirst, dimensions); } /** @@ -772,13 +772,15 @@ public class SDNN extends SDOps { * @param input Input variable * @param gain gain * @param bias bias + * @param channelsFirst For 2D input - unused. True for NCHW (minibatch, channels, height, width), false for NHWC data + * @param dimensions Dimensions to perform layer norm over - dimension=1 for 2d/MLP data, dimension=1,2,3 for CNNs * @return Output variable */ - public SDVariable layerNorm(String name, SDVariable input, SDVariable gain, SDVariable bias, int... dimensions) { + public SDVariable layerNorm(String name, SDVariable input, SDVariable gain, SDVariable bias, boolean channelsFirst, int... dimensions) { validateFloatingPoint("layerNorm", "input", input); validateFloatingPoint("layerNorm", "gain", gain); validateFloatingPoint("layerNorm", "bias", bias); - SDVariable result = f().layerNorm(input, gain, bias, dimensions); + SDVariable result = f().layerNorm(input, gain, bias, channelsFirst, dimensions); return updateVariableNameAndReference(result, name); } @@ -789,8 +791,8 @@ public class SDNN extends SDOps { * * @return Output variable */ - public SDVariable layerNorm(SDVariable input, SDVariable gain, int... dimensions) { - return layerNorm((String)null, input, gain, dimensions); + public SDVariable layerNorm(SDVariable input, SDVariable gain, boolean channelsFirst, int... dimensions) { + return layerNorm((String)null, input, gain, channelsFirst, dimensions); } /** @@ -803,10 +805,10 @@ public class SDNN extends SDOps { * @param gain gain * @return Output variable */ - public SDVariable layerNorm(String name, SDVariable input, SDVariable gain, int... dimensions) { + public SDVariable layerNorm(String name, SDVariable input, SDVariable gain, boolean channelsFirst, int... dimensions) { validateFloatingPoint("layerNorm", "input", input); validateFloatingPoint("layerNorm", "gain", gain); - SDVariable result = f().layerNorm(input, gain, dimensions); + SDVariable result = f().layerNorm(input, gain, channelsFirst, dimensions); return updateVariableNameAndReference(result, name); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/DynamicCustomOp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/DynamicCustomOp.java index f52450eee..27e8ae281 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/DynamicCustomOp.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/DynamicCustomOp.java @@ -35,6 +35,7 @@ import org.tensorflow.framework.AttrValue; import org.tensorflow.framework.GraphDef; import org.tensorflow.framework.NodeDef; +import java.lang.reflect.Array; import java.util.*; /** @@ -611,6 +612,21 @@ public class DynamicCustomOp extends DifferentialFunction implements CustomOp { return in == null ? null : new INDArray[]{in}; } + protected static T[] wrapFilterNull(T... in){ + int count = 0; + for( int i=0; i 0, "LayerNorm: You have to provide dimensions"); this.dimensions = dimensions; + this.iArguments.clear(); addIArgument(dimensions); + this.bArguments.clear(); + this.bArguments.add(channelsFirst); } @Override @@ -96,9 +99,9 @@ public class LayerNorm extends DynamicCustomOp { public List doDiff(List gradient) { SDVariable[] ret; if(noBias){ - ret = f().layerNormBp(arg(0), arg(1), gradient.get(0), dimensions); + ret = f().layerNormBp(arg(0), arg(1), gradient.get(0), channelsFirst, dimensions); }else{ - ret = f().layerNormBp(arg(0), arg(1), arg(2), gradient.get(0), dimensions); + ret = f().layerNormBp(arg(0), arg(1), arg(2), gradient.get(0), channelsFirst, dimensions); } return Arrays.asList(ret); } diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/LayerNormBp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/LayerNormBp.java index cfd4fff65..2168fd165 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/LayerNormBp.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/custom/LayerNormBp.java @@ -17,6 +17,7 @@ package org.nd4j.linalg.api.ops.impl.transforms.custom; import lombok.NoArgsConstructor; +import lombok.NonNull; import org.nd4j.autodiff.samediff.SDVariable; import org.nd4j.autodiff.samediff.SameDiff; import org.nd4j.base.Preconditions; @@ -39,33 +40,28 @@ import java.util.List; public class LayerNormBp extends DynamicCustomOp { private boolean noBias = false; + private boolean channelsFirst; - public LayerNormBp(SameDiff sameDiff, SDVariable input, SDVariable gain, SDVariable bias, SDVariable gradient, int... dimensions) { - super(null, sameDiff, new SDVariable[] {input, gain, bias, gradient}, false); - Preconditions.checkArgument(bias != null, "LayerNormBp: Use constructor without bias argument if bias is null / not available."); - + public LayerNormBp(@NonNull SameDiff sameDiff, @NonNull SDVariable input, @NonNull SDVariable gain, SDVariable bias, @NonNull SDVariable gradient, boolean channelsFirst, int... dimensions) { + super(null, sameDiff, wrapFilterNull(input, gain, bias, gradient), false); + this.channelsFirst = channelsFirst; setDimensions(dimensions); } - public LayerNormBp(INDArray input, INDArray gain, INDArray bias, INDArray grad, INDArray dLdx, INDArray dLdg, INDArray dLdb, int... dimensions) { - super("layer_norm_bp", new INDArray[]{input, gain, bias, grad}, new INDArray[]{dLdx, dLdg, dLdb}); - Preconditions.checkArgument(bias != null, "LayerNormBp: Use constructor without bias argument if bias is null / not available."); - + public LayerNormBp(@NonNull INDArray input, @NonNull INDArray gain, INDArray bias, @NonNull INDArray grad, @NonNull INDArray dLdx, @NonNull INDArray dLdg, INDArray dLdb, boolean channelsFirst, int... dimensions) { + super("layer_norm_bp", wrapFilterNull(input, gain, bias, grad), wrapFilterNull(dLdx, dLdg, dLdb)); + this.channelsFirst = channelsFirst; setDimensions(dimensions); } - public LayerNormBp(SameDiff sameDiff, SDVariable input, SDVariable gain, SDVariable gradient, int... dimensions) { - super(null, sameDiff, new SDVariable[] {input, gain, gradient}, false); - noBias = true; - setDimensions(dimensions); + public LayerNormBp(SameDiff sameDiff, SDVariable input, SDVariable gain, SDVariable gradient, boolean channelsFirst, int... dimensions) { + this(sameDiff, input, gain, null, gradient, channelsFirst, dimensions); } - public LayerNormBp(INDArray input, INDArray gain, INDArray grad, INDArray dLdx, INDArray dLdg, int... dimensions) { - super("layer_norm_bp", new INDArray[]{input, gain, grad}, new INDArray[]{dLdx, dLdg}); - noBias = true; - setDimensions(dimensions); + public LayerNormBp(INDArray input, INDArray gain, INDArray grad, INDArray dLdx, INDArray dLdg, boolean channelsFirst, int... dimensions) { + this(input, gain, null, grad, dLdx, dLdg, null, channelsFirst, dimensions); } @Override @@ -74,7 +70,10 @@ public class LayerNormBp extends DynamicCustomOp { Preconditions.checkArgument(dimensions.length > 0, "LayerNormBp: You have to provide dimensions"); this.dimensions = dimensions; + this.iArguments.clear(); addIArgument(dimensions); + this.bArguments.clear(); + addBArgument(channelsFirst); } @Override diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java index 84bd96ad6..fde2170a6 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java @@ -1126,7 +1126,7 @@ public class LayerOpValidation extends BaseOpValidation { SDVariable sdInput = sd.var("input", standardized); SDVariable sdGain = sd.var("gain", gain); SDVariable sdBias = sd.var("bias", bias); - SDVariable out = sd.nn.layerNorm(sdInput, sdGain, sdBias, axis); + SDVariable out = sd.nn.layerNorm(sdInput, sdGain, sdBias, true, axis); out.norm1("out"); String err = OpValidation.validate(new TestCase(sd) @@ -1135,6 +1135,38 @@ public class LayerOpValidation extends BaseOpValidation { assertNull(err, err); } + @Test + public void testLayerNorm4d() { + int mb = 3; + int ch = 4; + for(boolean nchw : new boolean[]{true, false}) { + double eps = 0.0; + INDArray x = Nd4j.rand(DataType.FLOAT, nchw ? new long[]{mb, ch, 8, 8} : new long[]{mb, 8, 8, ch}); + INDArray gain4d = Nd4j.rand(DataType.FLOAT, nchw ? new long[]{1, ch, 1, 1} : new long[]{1, 1, 1, ch}); + INDArray bias4d = Nd4j.rand(DataType.FLOAT, nchw ? new long[]{1, ch, 1, 1} : new long[]{1, 1, 1, ch}); + INDArray mean = x.mean(true, 1, 2, 3); + INDArray std = Transforms.sqrt(x.var(false,1,2,3).addi(eps)).reshape(mb, 1, 1, 1); + + INDArray standardized = x.sub(mean).div(std); + INDArray exp = standardized.mul(gain4d).add(bias4d); + + final int[] axis = new int[]{1, 2, 3}; + SameDiff sd = SameDiff.create(); + SDVariable sdInput = sd.var("input", x); + SDVariable sdGain = sd.var("gain", gain4d.reshape(ch)); + SDVariable sdBias = sd.var("bias", bias4d.reshape(ch)); + SDVariable out = sd.nn.layerNorm("layernorm", sdInput, sdGain, sdBias, nchw, axis); + + SDVariable loss = sd.loss.l2Loss(out); + + String err = OpValidation.validate(new TestCase(sd) + .expectedOutput("layernorm", exp) + .gradientCheck(true)); + assertNull(err); + } + } + + @Test public void testLayerNormOP() { final INDArray random = Nd4j.rand(new int[]{10, 4}); @@ -1165,7 +1197,7 @@ public class LayerOpValidation extends BaseOpValidation { SameDiff sd = SameDiff.create(); SDVariable sdInput = sd.var("input", standardized); SDVariable sdGain = sd.var("gain", gain); - SDVariable out = sd.nn.layerNorm(sdInput, sdGain, axis); + SDVariable out = sd.nn.layerNorm(sdInput, sdGain, true, axis); out.norm1("out"); String err = OpValidation.validate(new TestCase(sd) @@ -1209,7 +1241,7 @@ public class LayerOpValidation extends BaseOpValidation { SDVariable sdInput = sd.var("input", standardized); SDVariable sdGain = sd.var("gain", gain); SDVariable sdBias = sd.var("bias", bias); - SDVariable out = sd.nn.layerNorm(sdInput, sdGain, sdBias, axis); + SDVariable out = sd.nn.layerNorm(sdInput, sdGain, sdBias, true, axis); out.norm1("out"); String err = OpValidation.validate(new TestCase(sd) From 5cfbeb64ac3d0e84a885719f146f1162d5a9de48 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Tue, 27 Aug 2019 19:10:31 +1000 Subject: [PATCH 17/56] Another small fix (#175) * Layer norm 4d case fixes Signed-off-by: AlexDBlack * Small fix Signed-off-by: AlexDBlack --- .../main/java/org/nd4j/autodiff/validation/OpValidation.java | 4 ++-- .../imports/converters/DifferentialFunctionClassHolder.java | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java index e9ad61c04..3e329ad13 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/validation/OpValidation.java @@ -341,8 +341,8 @@ public class OpValidation { //Finally: check execution/output - Map outOrig = original.execAll(tc.placeholderValues()); - Map outDe = deserialized.execAll(tc.placeholderValues()); + Map outOrig = original.outputAll(tc.placeholderValues()); + Map outDe = deserialized.outputAll(tc.placeholderValues()); Preconditions.checkState(outOrig.keySet().equals(outDe.keySet()), "Keysets for execution after deserialization does not match key set for original model"); for(String s : outOrig.keySet()){ diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/DifferentialFunctionClassHolder.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/DifferentialFunctionClassHolder.java index a92066d7a..82bfdc843 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/DifferentialFunctionClassHolder.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/imports/converters/DifferentialFunctionClassHolder.java @@ -64,6 +64,7 @@ public class DifferentialFunctionClassHolder { add("outputVariables"); add("tArguments"); add("iArguments"); + add("bArguments"); add("hash"); add("opName"); add("sameDiff"); From efbfafe3f7ebd0ef593b1a88a398e1fc4e3d9958 Mon Sep 17 00:00:00 2001 From: raver119 Date: Tue, 27 Aug 2019 12:35:14 +0300 Subject: [PATCH 18/56] [WIP] gatherND fix (#176) * one test for gather_nd Signed-off-by: raver119 * get rid of old concat tests Signed-off-by: raver119 * one printf Signed-off-by: raver119 * one more legacy test removed Signed-off-by: raver119 * gatherNd launch params fix Signed-off-by: raver119 * gatherNd launch params fix Signed-off-by: raver119 --- .../ops/declarable/helpers/cuda/gather_nd.cu | 3 +- .../layers_tests/DeclarableOpsTests5.cpp | 17 + .../layers_tests/NDArrayCudaBasicsTests.cu | 302 +----------------- 3 files changed, 20 insertions(+), 302 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu b/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu index 709f0ed2c..6587b4ca7 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/gather_nd.cu @@ -106,6 +106,7 @@ namespace nd4j { const auto xOffset = shape::getOffset(0, xShapeInfo + 1, xShapeInfo + xRank + 1, xCoordStart, xRank); z[zOffset] = x[xOffset]; + printf("z[%lld] = x[%lld] = %f\n", zOffset, xOffset, (float) z[zOffset]); } } @@ -124,7 +125,7 @@ namespace nd4j { const int maxRank = nd4j::math::nd4j_max(indices.rankOf(), nd4j::math::nd4j_max(input.rankOf(), output.rankOf())); - const int threadsPerBlock = MAX_NUM_THREADS; + const int threadsPerBlock = 256; const int blocksPerGrid = (output.lengthOf() + threadsPerBlock - 1) / threadsPerBlock; const int sharedMem = 8 * threadsPerBlock * maxRank + 128; diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp index b596ebcd5..1fbe81046 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests5.cpp @@ -815,6 +815,23 @@ TEST_F(DeclarableOpsTests5, gatherNd_test7) { delete results; } +////////////////////////////////////////////////////////////////////// +TEST_F(DeclarableOpsTests5, gatherNd_test8) { + auto x = NDArrayFactory::create('c', {2, 2}, {1., 2., 3., 4.}); + auto y = NDArrayFactory::create('c', {2, 2}, {0, 0, 1, 1}); + auto e = NDArrayFactory::create('c', {2}, {1., 4.}); + + nd4j::ops::gather_nd op; + auto result = op.execute({&x, &y}, {}, {}); + ASSERT_EQ(Status::OK(), result->status()); + + auto z = result->at(0); + + ASSERT_EQ(e, *z); + + delete result; +} + ////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests5, reverse_sequense_test1) { diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu index 7b9e788f7..4ab884d28 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu +++ b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu @@ -2261,304 +2261,4 @@ TEST_F(NDArrayCudaBasicsTests, Test_Empty_4) { ASSERT_TRUE(x->isEmpty()); delete x; -} - -// printCudaGlobal<<<1,1,0,*stream>>>(dX, 6); -// printCudaGlobal<<<1,1,0,*stream>>>(dXShapeInfo, 8); -// printCudaGlobal<<<1,1,0,*stream>>>(dZ, 2); -// printCudaGlobal<<<1,1,0,*stream>>>(dZShapeInfo, 6); -// printCudaGlobal<<<1,1,0,*stream>>>(dimension, 1); -// printCudaGlobal<<<1,1,0,*stream>>>(tadShapeInfo, 6); -// printCudaGlobal<<<1,1,0,*stream>>>(tadOffsets, 2); -// cudaStreamSynchronize(*stream); - -TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_1) { - - auto x = NDArrayFactory::create('c', {5,2}, {0,1,2,3,4,5,6,7,8,9}); - x.syncToHost(); - auto z = NDArrayFactory::create('c', {5, 8}); - z.syncToHost(); - - std::vector buffers(4); - std::vector shapes(4); - std::vector hostShapes(4); - - for (size_t i = 0; i < buffers.size(); i++) { - buffers[i] = x.specialBuffer(); - shapes[i] = x.specialShapeInfo(); - hostShapes[i] = x.shapeInfo(); - } - Nd4jPointer extra[2]; - extra[1] = x.getContext()->getCudaStream(); - ::concat(extra, 1, 4, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); -} - -TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_2) { - - auto x = NDArrayFactory::create('c', {5,2}, {0,1,2,3,4,5,6,7,8,9}); - auto z = NDArrayFactory::create('f', {5, 8}); - - std::vector buffers(4); - std::vector shapes(4); - std::vector hostShapes(4); - - x.syncToHost(); - z.syncToHost(); - - for (size_t i = 0; i < buffers.size(); i++) { - buffers[i] = x.specialBuffer(); - shapes[i] = x.specialShapeInfo(); - hostShapes[i] = x.shapeInfo(); - } - - Nd4jPointer extra[2]; - extra[1] = x.getContext()->getCudaStream(); - - ::concat(extra, 1, 4, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); -} - -TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_3) { - - auto x = NDArrayFactory::create('c', {2,3}, {1,2,3,4,5,6}); - auto y = NDArrayFactory::create('c', {1,3}, {7,8,9}); - auto z = NDArrayFactory::create('f', {3, 3}); - - - std::vector buffers(2); - std::vector shapes(2); - std::vector hostShapes(2); - - x.syncToHost(); - y.syncToHost(); - z.syncToHost(); - - buffers[0] = x.specialBuffer(); shapes[0] = x.specialShapeInfo(); hostShapes[0] = x.shapeInfo(); - buffers[1] = y.specialBuffer(); shapes[1] = y.specialShapeInfo(); hostShapes[1] = y.shapeInfo(); - - Nd4jPointer extra[2]; - extra[1] = x.getContext()->getCudaStream(); - - ::concat(extra, 0, 2, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); -} - -TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_4) { - - auto x = NDArrayFactory::create('c', {2,3}, {1,2,3,4,5,6}); - auto y = NDArrayFactory::create('c', {1,3}, {7,8,9}); - auto z = NDArrayFactory::create('c', {3, 3}); - - x.syncToHost(); - y.syncToHost(); - z.syncToHost(); - - std::vector buffers(2); - std::vector shapes(2); - std::vector hostShapes(2); - - buffers[0] = x.specialBuffer(); shapes[0] = x.specialShapeInfo(); hostShapes[0] = x.shapeInfo(); - buffers[1] = y.specialBuffer(); shapes[1] = y.specialShapeInfo(); hostShapes[1] = y.shapeInfo(); - - Nd4jPointer extra[2]; - extra[1] = x.getContext()->getCudaStream(); - - ::concat(extra, 0, 2, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); -} - -TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_5) { - - auto x = NDArrayFactory::create('c', {1,2,3}, {1,2,3,4,5,6}); - auto y = NDArrayFactory::create('c', {1,2,3}, {7,8,9,10,11, 12}); - - auto z = NDArrayFactory::create('c', {2, 2, 3}); - auto stream = x.getContext()->getCudaStream();//reinterpret_cast(&nativeStream); - std::vector buffers(2); - std::vector shapes(2); - std::vector hostShapes(2); - - buffers[0] = x.specialBuffer(); shapes[0] = x.specialShapeInfo(); hostShapes[0] = x.shapeInfo(); - buffers[1] = y.specialBuffer(); shapes[1] = y.specialShapeInfo(); hostShapes[1] = y.shapeInfo(); - - Nd4jPointer extra[2]; - extra[1] = x.getContext()->getCudaStream(); - - ::concat(extra, 0, 2, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); -} - -TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_6) { - - auto x1 = NDArrayFactory::create('c', {2,2,3}, {1,2,3,4,5,6,7,8, 9, 10,11,12}); - auto x2 = NDArrayFactory::create('c', {1,2,3}, {13,14,15,16,17, 18}); - auto x3 = NDArrayFactory::create('c', {1,2,3}, {19,20,21,22,23, 24}); - - x1.syncToHost(); - x2.syncToHost(); - x3.syncToHost(); - - auto z = NDArrayFactory::create('c', {4, 2, 3}); - - std::vector buffers(3); - std::vector shapes(3); - std::vector hostShapes(3); - - buffers[0] = x1.specialBuffer(); shapes[0] = x1.specialShapeInfo(); hostShapes[0] = x1.shapeInfo(); - buffers[1] = x2.specialBuffer(); shapes[1] = x2.specialShapeInfo(); hostShapes[1] = x2.shapeInfo(); - buffers[2] = x3.specialBuffer(); shapes[2] = x3.specialShapeInfo(); hostShapes[2] = x3.shapeInfo(); - - Nd4jPointer extra[2]; - extra[1] = x1.getContext()->getCudaStream(); - - ::concat(extra, 0, 3, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); -} - -TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_7) { - - auto x1 = NDArrayFactory::create(1); - auto x2 = NDArrayFactory::create(2); - auto x3 = NDArrayFactory::create(3); - - auto z = NDArrayFactory::create('c', {3}, {1,2,3}); - - x1.syncToHost(); - x2.syncToHost(); - x3.syncToHost(); - - std::vector buffers(3); - std::vector shapes(3); - std::vector hostShapes(3); - - buffers[0] = x1.specialBuffer(); shapes[0] = x1.specialShapeInfo(); hostShapes[0] = x1.shapeInfo(); - buffers[1] = x2.specialBuffer(); shapes[1] = x2.specialShapeInfo(); hostShapes[1] = x2.shapeInfo(); - buffers[2] = x3.specialBuffer(); shapes[2] = x3.specialShapeInfo(); hostShapes[2] = x3.shapeInfo(); - - Nd4jPointer extra[2]; - extra[1] = x1.getContext()->getCudaStream(); - - ::concat(extra, 0, 3, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); -} - -TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_8) { - - auto totalCount = 1000; - auto width = 300; - std::vector lx(totalCount); - for (int i = 0; i < totalCount; i++) { - lx[i] = NDArrayFactory::create('c', {1, width}); - lx[i].assign(i); - lx[i].syncToHost(); - } - - auto z = NDArrayFactory::create('c', {totalCount, width}); - - std::vector buffers(totalCount); - std::vector shapes(totalCount); - std::vector hostShapes(totalCount); - - for (size_t i = 0; i < lx.size(); i++) { - buffers[i] = lx[i].specialBuffer(); - shapes[i] = lx[i].specialShapeInfo(); - hostShapes[i] = lx[i].shapeInfo(); - } - - Nd4jPointer extra[2]; - extra[1] = nd4j::LaunchContext::defaultContext()->getCudaStream(); - - ::concat(extra, 0, totalCount, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); -} - -TEST_F(NDArrayCudaBasicsTests, TestTear_1) { - auto input = NDArrayFactory::create('c', {1, 10, 10}); - std::vector arrays; // = {NDArrayFactory::create('c', {1, 10, 10}), NDArrayFactory::create('c', {1, 10, 10}), NDArrayFactory::create('c', {1, 10, 10}), NDArrayFactory::create('c', {1, 10, 10}), NDArrayFactory::create('c', {1, 10, 10})}; - int total = 151; - for (int e = 0; e < total; e++) { - input.assign(e); - arrays.emplace_back(input); - } - auto z = NDArrayFactory::create('c', {total, 10, 10}); - - Nd4jPointer extra[1]; - extra[1] = input.getContext()->getCudaStream(); - - std::vector buffers(total); - std::vector shapes(total); - std::vector hostShapes(total); - - for (size_t i = 0; i < buffers.size(); i++) { - buffers[i] = arrays[i].specialBuffer(); - shapes[i] = arrays[i].specialShapeInfo(); - hostShapes[i] = arrays[i].shapeInfo(); - } - - ::concat(extra, 0, total, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); - nd4j::ops::tear op; - - auto result = op.execute({&z}, {}, {1, 2}); - //ASSERT_EQ(10, result->size()); - auto e = result->size() - 1; - //for (size_t e = 0; e < result->size(); e++) { -// arrays[e].printIndexedBuffer("Input list at 40"); -// result->at(e)->printIndexedBuffer("OUtput TEAR at 40"); - //} -// ASSERT_TRUE(tads->at(e)->equalsTo(result->at(e))); - - delete result; -// delete tads; -} - -TEST_F(NDArrayCudaBasicsTests, TestTear_2) { - - auto input = NDArrayFactory::create('c', {1, 10, 10}); - - std::vector arrays; // = {NDArrayFactory::create('c', {1, 10, 10}), NDArrayFactory::create('c', {1, 10, 10}), NDArrayFactory::create('c', {1, 10, 10}), NDArrayFactory::create('c', {1, 10, 10}), NDArrayFactory::create('c', {1, 10, 10})}; - for (int e = 0; e < 10; e++) { - input.assign(e); - arrays.emplace_back(input); - arrays[e].syncToHost(); - } - - auto z = NDArrayFactory::create('c', {10, 10, 10}); - - Nd4jPointer extra[2]; - extra[1] = input.getContext()->getCudaStream(); - - std::vector buffers(10); - std::vector shapes(10); - std::vector hostShapes(10); - - for (size_t i = 0; i < buffers.size(); i++) { - buffers[i] = arrays[i].specialBuffer(); - shapes[i] = arrays[i].specialShapeInfo(); - hostShapes[i] = arrays[i].shapeInfo(); - } - - std::vector dimsToExclude({1,2}); - - - ::concat(extra, 0, 10, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); - - auto packX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(input.getShapeInfo(), dimsToExclude); - //std::vector arraysData(arrays.size()); - Nd4jPointer* arraysData; - cudaError_t err = cudaMalloc(&arraysData, arrays.size() * sizeof(void*)); - if (err != 0) { - printf("Cannot allocate device memory for targets due error %d\n", err); - ASSERT_TRUE(false); - } - for (size_t i = 0; i < arrays.size(); i++) { - Nd4jPointer target = arrays[i].specialBuffer(); - cudaMemcpy(&arraysData[i], &target, sizeof(Nd4jPointer), cudaMemcpyHostToDevice); - } - ::tear(extra, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), arraysData, input.specialShapeInfo(), packX.specialShapeInfo(), packX.specialOffsets()); -// auto result = op.execute({&z}, {}, {1, 2}); - - //ASSERT_EQ(10, result->size()); - err = cudaFree(arraysData); - if (err != 0) { - printf("Cannot deallocate device memory for targets due error %d\n", err); - ASSERT_TRUE(false); - } - -// ASSERT_TRUE(tads->at(e)->equalsTo(result->at(e))); - -// delete result; -// delete tads; -} +} \ No newline at end of file From fd22a8ecc753b4cbddc89e8eb8260fc439bbda90 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Tue, 27 Aug 2019 19:46:26 +1000 Subject: [PATCH 19/56] Small build fix, after last PR (#177) Signed-off-by: Alex Black --- .../org/deeplearning4j/nn/layers/BaseLayer.java | 4 ++-- .../nn/layers/recurrent/SimpleRnn.java | 8 ++++---- .../api/ops/impl/transforms/custom/LayerNorm.java | 13 +++++-------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java index 405889f0e..00ca7e7c4 100755 --- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/BaseLayer.java @@ -93,7 +93,7 @@ public abstract class BaseLayer end){ dldzNext = workspaceMgr.createUninitialized(ArrayType.BP_WORKING_MEM, dldzCurrent.dataType(), dldzCurrent.shape()); INDArray ggCur = workspaceMgr.createUninitialized(ArrayType.BP_WORKING_MEM, gg.dataType(), grg.shape()); - Nd4j.getExecutioner().exec(new LayerNormBp(rCurrent, gr, dldzCurrent, dldzNext, ggCur, 1)); + Nd4j.getExecutioner().exec(new LayerNormBp(rCurrent, gr, dldzCurrent, dldzNext, ggCur, true, 1)); grg.addi(ggCur); }else{ dldzNext = dldzCurrent; @@ -256,7 +256,7 @@ public class SimpleRnn extends BaseRecurrentLayer Date: Tue, 27 Aug 2019 13:21:01 +0300 Subject: [PATCH 20/56] [WIP] More fixes (#178) * skip string arrays for device validation Signed-off-by: raver119 * histogram_fixed_width now really supports indexing types Signed-off-by: raver119 --- .../transforms/histogram_fixed_width.cpp | 2 +- .../helpers/cuda/histogramFixedWidth.cu | 24 +++++++++---------- .../jita/allocator/impl/AtomicAllocator.java | 2 +- .../flow/impl/SynchronousFlowController.java | 5 ++-- .../ops/executioner/CudaOpContext.java | 4 ++-- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp b/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp index b3063c75d..529446e12 100644 --- a/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp +++ b/libnd4j/include/ops/declarable/generic/transforms/histogram_fixed_width.cpp @@ -49,7 +49,7 @@ CUSTOM_OP_IMPL(histogram_fixed_width, 2, 1, false, 0, 0) { DECLARE_TYPES(histogram_fixed_width) { getOpDescriptor() ->setAllowedInputTypes(nd4j::DataType::ANY) - ->setAllowedOutputTypes({ALL_INTS}); + ->setAllowedOutputTypes({ALL_INDICES}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu index ebde4909c..317f1d857 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/histogramFixedWidth.cu @@ -27,16 +27,16 @@ namespace ops { namespace helpers { /////////////////////////////////////////////////////////////////// -template +template __global__ static void histogramFixedWidthCuda( const void* vx, const Nd4jLong* xShapeInfo, void* vz, const Nd4jLong* zShapeInfo, - const T leftEdge, const T rightEdge) { + const X leftEdge, const X rightEdge) { - const T* x = reinterpret_cast(vx); - Nd4jLong* z = reinterpret_cast(vz); + const auto x = reinterpret_cast(vx); + auto z = reinterpret_cast(vz); __shared__ Nd4jLong xLen, zLen, totalThreads, nbins; - __shared__ T binWidth, secondEdge, lastButOneEdge; + __shared__ X binWidth, secondEdge, lastButOneEdge; if (threadIdx.x == 0) { @@ -55,7 +55,7 @@ __global__ static void histogramFixedWidthCuda( const void* vx, const Nd4jLong* for (Nd4jLong i = tid; i < xLen; i += totalThreads) { - const T value = x[shape::getIndexOffset(i, xShapeInfo, xLen)]; + const X value = x[shape::getIndexOffset(i, xShapeInfo, xLen)]; Nd4jLong zIndex; @@ -66,18 +66,18 @@ __global__ static void histogramFixedWidthCuda( const void* vx, const Nd4jLong* else zIndex = static_cast((value - leftEdge) / binWidth); - nd4j::math::atomics::nd4j_atomicAdd(&z[shape::getIndexOffset(zIndex, zShapeInfo, nbins)], 1LL); + nd4j::math::atomics::nd4j_atomicAdd(&z[shape::getIndexOffset(zIndex, zShapeInfo, nbins)], 1); } } /////////////////////////////////////////////////////////////////// -template +template __host__ static void histogramFixedWidthCudaLauncher(const cudaStream_t *stream, const NDArray& input, const NDArray& range, NDArray& output) { - const T leftEdge = range.e(0); - const T rightEdge = range.e(1); + const X leftEdge = range.e(0); + const X rightEdge = range.e(1); - histogramFixedWidthCuda<<<512, MAX_NUM_THREADS / 2, 512, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftEdge, rightEdge); + histogramFixedWidthCuda<<<256, 256, 1024, *stream>>>(input.getSpecialBuffer(), input.getSpecialShapeInfo(), output.specialBuffer(), output.specialShapeInfo(), leftEdge, rightEdge); } //////////////////////////////////////////////////////////////////////// @@ -89,7 +89,7 @@ void histogramFixedWidth(nd4j::LaunchContext* context, const NDArray& input, con PointersManager manager(context, "histogramFixedWidth"); NDArray::prepareSpecialUse({&output}, {&input}); - BUILD_SINGLE_SELECTOR(input.dataType(), histogramFixedWidthCudaLauncher, (context->getCudaStream(), input, range, output), LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(input.dataType(), output.dataType(), histogramFixedWidthCudaLauncher, (context->getCudaStream(), input, range, output), LIBND4J_TYPES, INDEXING_TYPES); NDArray::registerSpecialUse({&output}, {&input}); manager.synchronize(); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/impl/AtomicAllocator.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/impl/AtomicAllocator.java index 8fbf0a000..0ec1876ca 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/impl/AtomicAllocator.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/impl/AtomicAllocator.java @@ -312,7 +312,7 @@ public class AtomicAllocator implements Allocator { @Override public Pointer getPointer(INDArray array, CudaContext context) { // DataBuffer buffer = array.data().originalDataBuffer() == null ? array.data() : array.data().originalDataBuffer(); - if (array.isEmpty()) + if (array.isEmpty() || array.isS()) return null; return memoryHandler.getDevicePointer(array.data(), context); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/flow/impl/SynchronousFlowController.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/flow/impl/SynchronousFlowController.java index fb4510f1b..d81de381a 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/flow/impl/SynchronousFlowController.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/flow/impl/SynchronousFlowController.java @@ -172,7 +172,7 @@ public class SynchronousFlowController implements FlowController { val cId = allocator.getDeviceId(); - if (result != null && !result.isEmpty()) { + if (result != null && !result.isEmpty() && !result.isS()) { Nd4j.getCompressor().autoDecompress(result); prepareDelayedMemory(result); val pointData = allocator.getAllocationPoint(result); @@ -198,7 +198,8 @@ public class SynchronousFlowController implements FlowController { return context; for (INDArray operand : operands) { - if (operand == null || operand.isEmpty()) + // empty or String arrays can be skipped + if (operand == null || operand.isEmpty() || operand.isS()) continue; Nd4j.getCompressor().autoDecompress(operand); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java index 26d363f32..cf779f537 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java @@ -100,7 +100,7 @@ public class CudaOpContext extends BaseOpContext implements OpContext { @Override public Pointer contextPointer() { for (val v:fastpath_in.values()) { - if (v.isEmpty()) + if (v.isEmpty() || v.isS()) continue; AtomicAllocator.getInstance().getAllocationPoint(v).tickHostRead(); @@ -111,7 +111,7 @@ public class CudaOpContext extends BaseOpContext implements OpContext { } for (val v:fastpath_out.values()) { - if (v.isEmpty()) + if (v.isEmpty() || v.isS()) continue; AtomicAllocator.getInstance().getAllocationPoint(v).tickHostRead(); From dff599aa8fbbaa45765bb4ea82d494082c6300ac Mon Sep 17 00:00:00 2001 From: Alex Black Date: Tue, 27 Aug 2019 20:43:36 +1000 Subject: [PATCH 21/56] Test fix (#179) Signed-off-by: Alex Black --- .../org/nd4j/autodiff/opvalidation/LayerOpValidation.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java index fde2170a6..760165b3b 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/autodiff/opvalidation/LayerOpValidation.java @@ -1178,7 +1178,7 @@ public class LayerOpValidation extends BaseOpValidation { final INDArray res = standardized.mulRowVector(gain).addRowVector(bias); final INDArray output = Nd4j.zerosLike(res); - Nd4j.getExecutioner().exec(new LayerNorm(standardized, gain, bias, output, 1)); + Nd4j.getExecutioner().exec(new LayerNorm(standardized, gain, bias, output, true, 1)); assertEquals(res, output); } @@ -1216,7 +1216,7 @@ public class LayerOpValidation extends BaseOpValidation { final INDArray res = standardized.mulRowVector(gain); final INDArray output = Nd4j.zerosLike(res); - Nd4j.getExecutioner().exec(new LayerNorm(standardized, gain, output, 1)); + Nd4j.getExecutioner().exec(new LayerNorm(standardized, gain, output, true, 1)); assertEquals(res, output); } From 0e523490e93d1b2be71a238edbf3ec84cfa8f988 Mon Sep 17 00:00:00 2001 From: raver119 Date: Tue, 27 Aug 2019 14:30:37 +0300 Subject: [PATCH 22/56] [WIP] confusion (#180) * skip string arrays for device validation Signed-off-by: raver119 * confusion_matrix fix Signed-off-by: raver119 --- .../ops/declarable/helpers/cuda/confusion.cu | 56 +++++++------------ 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu b/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu index 513911f97..12f14b20b 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/confusion.cu @@ -30,10 +30,10 @@ namespace helpers { template __global__ static void copyBuffers(Nd4jLong* destination, void const* source, Nd4jLong bufferLength) { - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; + const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; for (int t = tid; t < bufferLength; t += step) { - destination[t] = reinterpret_cast(source)[t]; + destination[t] = static_cast(reinterpret_cast(source)[t]); } } @@ -51,38 +51,24 @@ namespace helpers { } __syncthreads(); - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; + const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; for (int t = tid; t < bufferLength; t += step) { - //auto tX = reinterpret_cast(inputList[t]); - //auto xShape = reinterpret_cast(inputShapeList[t]); auto label = labelsBuffer[t]; //->e(j); auto pred = predictionBuffer[t]; //->e(j); auto tZ = z + tadOffsets[label]; T val = (weightsBuffer == nullptr ? (T)1.0f : w[t]); - //for (int e = threadIdx.x; e < arrLen; e += blockDim.x) { - - tZ[shape::getIndexOffset(pred, tadShape, arrLen)] = val; //tX[shape::getIndexOffset(e, , arrLen)]; + auto idx = shape::getIndexOffset(pred, tadShape, arrLen); + tZ[idx] = val; } } - template + template void _confusionFunctor(nd4j::LaunchContext * context, NDArray* labels, NDArray* predictions, NDArray* weights, NDArray* output) { -// std::unique_ptr arrs(output->allTensorsAlongDimension({1})); -// -//#pragma omp parallel for if(labels->lengthOf() > Environment::getInstance()->elementwiseThreshold()) schedule(static) -// for (int j = 0; j < labels->lengthOf(); ++j){ -// auto label = labels->e(j); -// auto pred = predictions->e(j); -// T value = (weights == nullptr ? (T)1.0f : weights->e(j)); -// (*arrs->at(label)).p(pred, value); -// } - - int dimension = 1; - - auto pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), dimension); + auto stream = context->getCudaStream(); + auto pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(output->shapeInfo(), 1); PointersManager manager(context, "helpers::confusion"); @@ -90,26 +76,26 @@ namespace helpers { Nd4jLong* predictionLongBuffer = predictions->dataType() == nd4j::DataType::INT64?(Nd4jLong*)predictions->specialBuffer():nullptr; if (labelsLongBuffer == nullptr) { - cudaError_t err = cudaMalloc(&labelsLongBuffer, labels->lengthOf() * sizeof(Nd4jLong)); + auto err = cudaMalloc(&labelsLongBuffer, labels->lengthOf() * sizeof(Nd4jLong)); if (err != 0) throw nd4j::cuda_exception::build("Cannot allocate memory for labels long buffer", err); // copy with type conversion - copyBuffers<<<256, 512, 8192>>>(labelsLongBuffer, labels->getSpecialBuffer(), labels->lengthOf()); + copyBuffers<<<256, 512, 1024, *stream>>>(labelsLongBuffer, labels->getSpecialBuffer(), labels->lengthOf()); } if (predictionLongBuffer == nullptr) { - cudaError_t err = cudaMalloc(&predictionLongBuffer, predictions->lengthOf() * sizeof(Nd4jLong)); + auto err = cudaMalloc(&predictionLongBuffer, predictions->lengthOf() * sizeof(Nd4jLong)); if (err != 0) throw nd4j::cuda_exception::build("Cannot allocate memory for predictions long buffer", err); // copy with type conversion - copyBuffers<<<256, 512, 8192>>>(predictionLongBuffer, predictions->getSpecialBuffer(), predictions->lengthOf()); + copyBuffers<<<256, 512, 1024, *stream>>>(predictionLongBuffer, predictions->getSpecialBuffer(), predictions->lengthOf()); } auto bufferLength = labels->lengthOf(); dim3 launchDims(32, 32, 1024); - auto stream = context->getCudaStream(); - confusionFunctorKernel<<>>(labelsLongBuffer, predictionLongBuffer, - bufferLength, weights != nullptr? weights->getSpecialBuffer():nullptr, output->specialBuffer(), pack.specialShapeInfo(), pack.specialOffsets()); + confusionFunctorKernel<<>>(labelsLongBuffer, predictionLongBuffer, bufferLength, weights != nullptr? weights->getSpecialBuffer():nullptr, output->specialBuffer(), pack.specialShapeInfo(), pack.specialOffsets()); + + manager.synchronize(); if (predictionLongBuffer != predictions->getSpecialBuffer()) { cudaError_t err = cudaFree(predictionLongBuffer); @@ -122,17 +108,15 @@ namespace helpers { if (err != 0) throw nd4j::cuda_exception::build("Cannot deallocate memory for labels long buffer", err); } - manager.synchronize(); } void confusionFunctor(nd4j::LaunchContext * context, NDArray* labels, NDArray* predictions, NDArray* weights, NDArray* output) { - auto xType = output->dataType(); // weights can be null - - BUILD_SINGLE_SELECTOR(xType, _confusionFunctor, (context, labels, predictions, weights, output), NUMERIC_TYPES); + auto xType = predictions->dataType(); + auto zType = output->dataType(); // weights can be null + NDArray::prepareSpecialUse({output}, {labels, predictions, weights}); + BUILD_DOUBLE_SELECTOR(xType, zType, _confusionFunctor, (context, labels, predictions, weights, output), INDEXING_TYPES, NUMERIC_TYPES); + NDArray::registerSpecialUse({output}, {labels, predictions, weights}); } - - BUILD_SINGLE_TEMPLATE(template void _confusionFunctor, (nd4j::LaunchContext * context, NDArray* labels, NDArray* predictions, NDArray* weights, NDArray* output);, NUMERIC_TYPES); - } } } \ No newline at end of file From 7f0c660d8b7ad69bcf4f5f733ea130ac1e1bc619 Mon Sep 17 00:00:00 2001 From: raver119 Date: Tue, 27 Aug 2019 15:05:43 +0300 Subject: [PATCH 23/56] [WIP] HGemm (#181) * skip string arrays for device validation Signed-off-by: raver119 * confusion_matrix fix Signed-off-by: raver119 * exclude cublasHGemm from archs < 530 Signed-off-by: raver119 --- libnd4j/include/helpers/cuda_off/MmulHelper.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libnd4j/include/helpers/cuda_off/MmulHelper.cu b/libnd4j/include/helpers/cuda_off/MmulHelper.cu index dda709545..19e0d5baf 100644 --- a/libnd4j/include/helpers/cuda_off/MmulHelper.cu +++ b/libnd4j/include/helpers/cuda_off/MmulHelper.cu @@ -228,6 +228,7 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou float alphaF(alpha), betaF(beta); status = cublasSgemm(*handle, transAblas, transBblas, M, N, K, &alphaF, (float*)pA->getSpecialBuffer(), lda, (float*)pB->getSpecialBuffer(), ldb, &betaF, (float*)pC->getSpecialBuffer(), ldc); } +#if __CUDA_ARCH__ >= 530 else if(ABC && aType == DataType::HALF) { float16 alphaH(alpha), betaH(beta); status = cublasHgemm(*handle, transAblas, transBblas, M, N, K, &alphaH.data, (__half*)pA->getSpecialBuffer(), lda, (__half*)pB->getSpecialBuffer(), ldb, &betaH.data, (__half*)pC->getSpecialBuffer(), ldc); @@ -240,6 +241,7 @@ NDArray* MmulHelper::mmulMxM(const NDArray* A, const NDArray* B, NDArray* C, dou float alphaF(alpha), betaF(beta); status = cublasSgemmEx(*handle, transAblas, transBblas, M, N, K, &alphaF, pA->getSpecialBuffer(), CUDA_R_16F, lda, pB->getSpecialBuffer(), CUDA_R_16F, ldb, &betaF, pC->getSpecialBuffer(), CUDA_R_32F, ldc); } +#endif else { dim3 threadsPerBlock(N, M); dim3 blocksPerGrid(1, 1); From 9d325ad0700f23754b54405241a2aa5f9d253219 Mon Sep 17 00:00:00 2001 From: Alex Black Date: Tue, 27 Aug 2019 23:27:41 +1000 Subject: [PATCH 24/56] Small optimization to Nd4j.readNumpy (#183) Signed-off-by: Alex Black --- .../src/main/java/org/nd4j/linalg/factory/Nd4j.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java index d2a4f94a4..c8baedfa5 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java @@ -2265,15 +2265,12 @@ public class Nd4j { Preconditions.checkState(data.length == numColumns, "Data has inconsistent number of columns: data length %s, numColumns %s", data.length, numColumns); data2.add(readSplit(data)); - - } - ret = Nd4j.create(dataType, data2.size(), numColumns); - for (int i = 0; i < data2.size(); i++) { - float[] row = data2.get(i); - INDArray arr = Nd4j.create(row, new long[]{1, row.length}, dataType); - ret.putRow(i, arr); + float[][] fArr = new float[data2.size()][0]; + for(int i=0; i Date: Tue, 27 Aug 2019 18:25:39 +0300 Subject: [PATCH 25/56] Shugeo segment fix2 (#185) * Added test for segment_mean. * Added another test for segment_mean. * Fixed segment_* ops helpers for cuda to proper use external data. --- .../declarable/helpers/cuda/segment_max.cu | 8 +++++ .../declarable/helpers/cuda/segment_mean.cu | 14 +++++++- .../declarable/helpers/cuda/segment_min.cu | 8 +++++ .../declarable/helpers/cuda/segment_prod.cu | 8 +++++ .../declarable/helpers/cuda/segment_sqrtn.cu | 4 +++ .../declarable/helpers/cuda/segment_sum.cu | 8 +++++ .../layers_tests/DeclarableOpsTests7.cpp | 36 +++++++++++++++++++ 7 files changed, 85 insertions(+), 1 deletion(-) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu index 180af41e1..dc91a2704 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_max.cu @@ -201,7 +201,9 @@ namespace nd4j { } // -------------------------------------------------------------------------------------------------------------- // void segmentMaxFunctor(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(input->dataType(), indices->dataType(), segmentMaxFunctor_, (context, input, indices, output), NUMERIC_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // @@ -240,7 +242,9 @@ namespace nd4j { } // -------------------------------------------------------------------------------------------------------------- // void unsortedSegmentMaxFunctor(nd4j::LaunchContext* context, NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(input->dataType(), indices->dataType(), unsortedSegmentMaxFunctor_, (context, input, indices, numOfClasses, output), NUMERIC_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // @@ -370,8 +374,10 @@ namespace nd4j { } // -------------------------------------------------------------------------------------------------------------- // int segmentMaxFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return segmentMaxFunctorBP_, (context, input, indices, gradOut, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } // -------------------------------------------------------------------------------------------------------------- // @@ -416,7 +422,9 @@ namespace nd4j { } // -------------------------------------------------------------------------------------------------------------- // int unsortedSegmentMaxFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return unsortedSegmentMaxFunctorBP_, (context, input, indices, gradOut, numOfClasses, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu index 3f2168da4..fbb45a375 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_mean.cu @@ -163,7 +163,7 @@ namespace helpers { classesRangesBegs.assign(indices->lengthOf()); classesRangesLens.assign(0); - + NDArray::prepareSpecialUse({output}, {input, indices}); dim3 dims(numClasses, indices->lengthOf(), numClasses * 32 + 32); int* begins = reinterpret_cast(classesRangesBegs.specialBuffer()); int* lengths = reinterpret_cast(classesRangesLens.specialBuffer()); @@ -182,11 +182,14 @@ namespace helpers { Nd4jLong* outputTadOffsets = packZ.specialOffsets(); segmentMeanTadKernel<<sizeAt(0), 512, 2048, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } + NDArray::registerSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // void segmentMeanFunctor(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), segmentMeanFunctor_, (context, input, indices, output), NUMERIC_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // @@ -194,6 +197,8 @@ namespace helpers { static void unsortedSegmentMeanFunctor_(nd4j::LaunchContext* context, NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { auto stream = context->getCudaStream(); // NDArray classes = NDArrayFactory::create('c', {numOfClasses, 2}); + NDArray::prepareSpecialUse({output}, {input, indices}); + NDArray classesRangesBegs = NDArrayFactory::create('c', {numOfClasses}); NDArray classesRangesLens = NDArrayFactory::create('c', {numOfClasses}); // NDArray row = NDArrayFactory::create('c', {1, 2}, {(int)indices->lengthOf(), (int)0}); @@ -221,12 +226,15 @@ namespace helpers { dims.x = input->sizeAt(0); segmentMeanTadKernel<<>>(input->specialBuffer(), input->specialShapeInfo(), inputTads, inputTadOffsets, reinterpret_cast(indices->specialBuffer()), begins, lengths, numOfClasses, output->specialBuffer(), output->specialShapeInfo(), outputTads, outputTadOffsets); } + NDArray::registerSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // void unsortedSegmentMeanFunctor(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(input->dataType(), indices->dataType(), unsortedSegmentMeanFunctor_, (context, input, indices, numOfClasses, output), NUMERIC_TYPES, INDEXING_TYPES); + NDArray::prepareSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // @@ -349,8 +357,10 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // segmen mean bp main int segmentMeanFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return segmentMeanFunctorBP_, (context, input, indices, gradOut, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } // -------------------------------------------------------------------------------------------------------------- // @@ -399,7 +409,9 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // int unsortedSegmentMeanFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return unsortedSegmentMeanFunctorBP_, (context, input, indices, gradOut, numOfClasses, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu index 0c67b41d5..950abde67 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_min.cu @@ -192,7 +192,9 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // void segmentMinFunctor(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(input->dataType(), indices->dataType(), segmentMinFunctor_, (context, input, indices, output), NUMERIC_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // @@ -233,8 +235,10 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // void unsortedSegmentMinFunctor(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(input->dataType(), indices->dataType(), unsortedSegmentMinFunctor_, (context, input, indices, numOfClasses, output), NUMERIC_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices}); } template @@ -364,8 +368,10 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // // segmen min int segmentMinFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return segmentMinFunctorBP_, (context, input, indices, gradOut, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } template @@ -409,7 +415,9 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // int unsortedSegmentMinFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return unsortedSegmentMinFunctorBP_, (context, input, indices, gradOut, numOfClasses, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu index 78f21916d..3ae4ebcb8 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_prod.cu @@ -192,7 +192,9 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // void segmentProdFunctor(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), segmentProdFunctor_, (context, input, indices, output), NUMERIC_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // @@ -231,8 +233,10 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // void unsortedSegmentProdFunctor(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(input->dataType(), indices->dataType(), unsortedSegmentProdFunctor_, (context, input, indices, numOfClasses, output), NUMERIC_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // @@ -358,8 +362,10 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // int segmentProdFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return segmentProdFunctorBP_, (context, input, indices, gradOut, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } // -------------------------------------------------------------------------------------------------------------- // @@ -404,7 +410,9 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // int unsortedSegmentProdFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return unsortedSegmentProdFunctorBP_, (context, input, indices, gradOut, numOfClasses, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } // -------------------------------------------------------------------------------------------------------------- // diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu index 4141cefba..229d41cc9 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_sqrtn.cu @@ -146,8 +146,10 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // void unsortedSegmentSqrtNFunctor(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(input->dataType(), indices->dataType(), unsortedSegmentSqrtNFunctor_, (context, input, indices, numOfClasses, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // template @@ -270,7 +272,9 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // int unsortedSegmentSqrtNFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return unsortedSegmentSqrtNFunctorBP_, (context, input, indices, gradOut, numOfClasses, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } } } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu b/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu index 37dacee09..08b36253a 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/segment_sum.cu @@ -190,7 +190,9 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // void segmentSumFunctor(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(input->dataType(), indices->dataType(), segmentSumFunctor_, (context, input, indices, output), NUMERIC_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices}); } // -------------------------------------------------------------------------------------------------------------- // @@ -229,8 +231,10 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // void unsortedSegmentSumFunctor(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices}); BUILD_DOUBLE_SELECTOR(input->dataType(), indices->dataType(), unsortedSegmentSumFunctor_, (context, input, indices, numOfClasses, output), NUMERIC_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices}); } @@ -343,8 +347,10 @@ namespace helpers { // -------------------------------------------------------------------------------------------------------------- // int segmentSumFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return segmentSumFunctorBP_, (context, input, indices, gradOut, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } template @@ -381,7 +387,9 @@ namespace helpers { } // -------------------------------------------------------------------------------------------------------------- // int unsortedSegmentSumFunctorBP(nd4j::LaunchContext* context , NDArray* input, NDArray* indices, NDArray* gradOut, Nd4jLong numOfClasses, NDArray* output) { + NDArray::prepareSpecialUse({output}, {input, indices, gradOut}); BUILD_DOUBLE_SELECTOR(output->dataType(), indices->dataType(), return unsortedSegmentSumFunctorBP_, (context, input, indices, gradOut, numOfClasses, output), FLOAT_TYPES, INDEXING_TYPES); + NDArray::registerSpecialUse({output}, {input, indices, gradOut}); } } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp index 2e1dab1a3..b0488c23a 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests7.cpp @@ -1423,6 +1423,42 @@ TEST_F(DeclarableOpsTests7, TestSegmentMean_2) { delete result; } +TEST_F(DeclarableOpsTests7, TestSegmentMean_02) { + auto x = NDArrayFactory::create('c', {6, 3}, {1, 2, 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18.}); + auto idx = NDArrayFactory::create({0, 0, 1, 1, 2,2}); + auto exp = NDArrayFactory::create('c', {3, 3}, { 2.5, 3.5, 4.5, 8.5, 9.5, 10.5, 14.5, 15.5, 16.5}); + + nd4j::ops::segment_mean op; + + auto result = op.execute({&x, &idx}, {}, {}); + ASSERT_EQ(result->status(), Status::OK()); + ASSERT_EQ(result->size(), 1); + exp.printIndexedBuffer("Expect Mean"); + result->at(0)->printIndexedBuffer("Output Mean"); +// exp.printShapeInfo("Exp Shape"); + ASSERT_TRUE(exp.equalsTo(result->at(0))); + + delete result; +} + +TEST_F(DeclarableOpsTests7, TestSegmentMean_021) { + auto x = NDArrayFactory::create('c', {6, 3});//, {1, 2, 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18.}); + auto idx = NDArrayFactory::create({0, 0, 1, 1, 2,2}); + auto exp = NDArrayFactory::create('c', {3, 3}, { 2.5, 3.5, 4.5, 8.5, 9.5, 10.5, 14.5, 15.5, 16.5}); + + nd4j::ops::segment_mean op; + x.linspace(1.); + auto result = op.execute({&x, &idx}, {}, {}); + ASSERT_EQ(result->status(), Status::OK()); + ASSERT_EQ(result->size(), 1); + exp.printIndexedBuffer("Expect Mean"); + result->at(0)->printIndexedBuffer("Output Mean"); +// exp.printShapeInfo("Exp Shape"); + ASSERT_TRUE(exp.equalsTo(result->at(0))); + + delete result; +} + //////////////////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests7, TestSegmentMeanBP_2) { auto x = NDArrayFactory::create('c', {4, 4}, {1.8, 2.5, 4., 9.,2.1, 2.4, 3., 9.,2.1, 2.1, 0.7, 0.1,3., 4.2, 2.2, 1.}); From 650539528cc7e78aebd76e29d1d8cbb264407bf4 Mon Sep 17 00:00:00 2001 From: Alexander Stoyakin Date: Tue, 27 Aug 2019 19:56:04 +0300 Subject: [PATCH 26/56] [WIP] Updating failed tests to reflect code changes (#184) * Test updated to reflect changes in random generation * Invalid resource removed and fixed tests Signed-off-by: Alexander Stoyakin --- .../clustering/kmeans/KMeansTest.java | 6 +++--- .../models/fasttext/FastTextTest.java | 2 -- .../models/fasttext/supervised.model.bin | Bin 99274 -> 0 bytes 3 files changed, 3 insertions(+), 5 deletions(-) delete mode 100644 deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/resources/models/fasttext/supervised.model.bin diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java index fe4fac1b7..c9140942d 100644 --- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java +++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java @@ -269,9 +269,9 @@ public class KMeansTest extends BaseDL4JTest { double[] centroid1 = {2.44e8, 2.71e8, 2.98e8, 3.25e8}; double[] centroid2 = {5.14e8, 5.41e8, 5.68e8, 5.95e8}; - double[] centroid3 = {1.63e8, 1.9e8, 2.17e8, 2.44e8}; - double[] centroid4 = {6.76e8, 7.03e8, 7.3e8, 7.57e8}; - double[] centroid5 = {4.06e8, 4.33e8, 4.6e8, 4.87e8}; + double[] centroid3 = {1000000.0, 2.8E7, 5.5E7, 8.2E7}; + double[] centroid4 = {7.03E8, 7.3E8, 7.57E8, 7.84E8}; + double[] centroid5 = {3.79E8, 4.06E8, 4.33E8, 4.6E8}; assertArrayEquals(centroid1, clusterSet.getClusters().get(0).getCenter().getArray().toDoubleVector(), 1e-4); assertArrayEquals(centroid2, clusterSet.getClusters().get(1).getCenter().getArray().toDoubleVector(), 1e-4); diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/fasttext/FastTextTest.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/fasttext/FastTextTest.java index cd3737e04..b60af71d5 100644 --- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/fasttext/FastTextTest.java +++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/java/org/deeplearning4j/models/fasttext/FastTextTest.java @@ -103,7 +103,6 @@ public class FastTextTest extends BaseDL4JTest { } @Test - @Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912") public void testPredict() throws IOException { String text = "I like soccer"; @@ -119,7 +118,6 @@ public class FastTextTest extends BaseDL4JTest { } @Test - @Ignore("AB 2019/06/24 - Failing: Ignored to get to all passing baseline to prevent regressions via CI - see issue #7912") public void testPredictProbability() throws IOException { String text = "I like soccer"; diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/resources/models/fasttext/supervised.model.bin b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/test/resources/models/fasttext/supervised.model.bin deleted file mode 100644 index dcf5646840872ba7d78b30769b65a84542ba67bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 99274 zcmZ6y2Rv5q8$WLEz4vZYQL5)$cT-DCODa*6N)bgvQ%Z@9tVoeUHc5ESbw?V=2$7Mj zLZzhAF!Ou9-{0^5{d`{k^LpLSbFXuq`&{Q9?`yFtRx_md`1tnllJNga{QvqF;N}05 zB6&%Wmqd8!KYdLsv_8L^qp{G8m!HW?|LL=wm;Q5&|G$gO@L2HQ8~$56oloe$|GXb( zkN?v8Z*iZ4tJ6Ov&;3sSCH3DX54YXBosRrdUieRTubZ2vql1gfKQ*)X{xj_VUZ!UM zlto) z|DoaJ;&jaE-^}|D&42S}Gw%!k@8Qe;zt8x;nl(-i`;Pvrhr_>UEjzT&#o1&3KexLd zaog+U;oN9Um$&y4~4I5@b|H5#xTwU_1PXaY=b^Iw&*QBIldL0S|#Y~wXabvR+Sny zPGx=P?15_|(zNB69G0(a!K6Ap^0ns&+*soXF1_8%v%uT<=+gir920@<2O_b4pAfxJ zXaf2EUASIZkr;L=)4lRHK)ri5Yt--%7wthNT~-`sK0OaxT5LeE<29DmCV;@VdN|US z2qVU>=$kT?i5c#}Hzh?7VyRy` zYQ&-868qtqH-0-PPh(r3gTWOYazj&*KJrb)Iez>!z-aOOpEyxGUFQb((C`;Az1mXe2GSb`VjKclIxJ(T1=hlNs|cw<1B zO$?4hxoUlq=;Q-dG4H|1Qkotgd4UU!^y#AoajbKg9NLwAMGLnC6gj+#9klX+$r6uo z{?-~q4+(5{>qU!>t1xk$4EtE#9B*APq~ZY@q{%rM(^@UT#8-|yUOd7)I4VSn#%(cX zi8sE;-V1{_FRI7F5hFVMvcs};DHU@z1} zv(PZeN91mW!}j~msO9ky6O7W)`Y*+-LoA-YRmmJLPK5l>LdaY24IiG-!XLXQ;QX2A zP`PG4+=!`zKa;qiF2l!MW()Dr$PAE_nghc(ZbCzj3k3fX#OPpsS|%5R*Hym4$93~z z#SH_<>ze?%#-A9e8){_funH{moXE-)=zu=A9UCY0;`KTG;Hf7>)g|58txkJz$F)e# zTGel8HY!E$PSKzvM|8-sbt>eOi6NaUtb@7gf7qtMr?}xF!TY7rP^)VMUmgO!>KjJc z)f-`>-Um=NZ9v(ES_IP>@W8a1YpjvWHXKcYI$I5T{ihh+5FrhBVL4<;eT3=TLb0fR z8Dt%mqEq+u;=>ugaDl7>xgjA-%5}wwNXICKn@7QAubUt;Q>kEL`!SIC5DA87gh`Cu z2)y}G3gp~3uxYGet&UW1y_@cX%vCO~snDS}+yzMQ=ATSwttjnMZ^5&rS8!^sE}6JN zlE}B$qq2F(CoGS>cP@b3S9$t-!5665vlsC5PLQ+LqrzoMbf*3m zxNt;{EIM)qM`e}B6v9Wl1!M>}zz7db=|Rsq$rzdwqD_Agapu;&Hlt=x&` zrZ&uN9YMmedWIfuRot>kCD5|09s=C`v7IUq{qiq3%P0uW?8<`s!TGpFs|~-E|Hc01 zap>8%0A@p`*%>Sy$ST2GvNJJ zMYj2;1Q~N2WuH6qko&Y}dovr*9)d-= z{h)E~CL1gq33FT;Vf`viny~l@PB{OVc^mr{ZwLKh!~bNURCyA2#=_Zfwm2B3ESSUe z>=vfWZ<_$0#9Fu$BTg^9c!%zXHJ~hU9`iOh6W!wSSr_-qs1YYkOfF1;2yJC1_VYy$ zeA`{{=b<=ZKHdcDR#j4L*9;bQ!|0!Wg>kT52aa2Xh^>1Dqz!42%ag?H?|qKPWWAV!xvL@IL>X@U?1P`gs7UN0MTzt~3#?TS!;M0!^qQ~**p+#~j_ayqyO}$fAN~d@rSC!N zZ#FC!k*5iF_-RDj7uL+;G$SkA0~rsMX#OKLBDeez1bZxIs@5DuiB3I~PaTAX9qL4~ zAsSpy8Pcj)M>aiMmJ~XSvB@(nk(-%}o2)W`Rs?~+PA2D8mIj$Ivmd&G&+SKZjWI-EWX0E-Yh8p7=edR$kF-&E;_FCgRT>oQEJ{Xw&VB@ z99rK6Mg?~;vhfPa3UwkW)}^QPB4Bfa9&Vq=N7H@AnHR^FG3&IJBiF#NnH2_}a+!`nuFT7Pj4O5T#B$6M5ig3_UR@kZyMG9wLjlAp8n3+hn+ z&3Tw-`x-6UL}`S}cc|txV(ghu_#%9m(Jik9k98KP%G)pNGG2jCvuy!m){GarM5(S$ z6V9CW4vm%C0airgpRZx;mg(M5q*(>Azdxhww?wqMB}Q)rDv&vUD^bAdIrrvJ5T3B! z3_ov*O3=boJ(}tL3MRM*!0s25Kz-sU!_@!b z#%1s^*WYTQD@an`%oCV)Rg!!=?}58_#=(Mep+%AymeBcqxOdu0(0w!smSxsL?e#~vedBS?lkrcmFZ)f=Q!W`DFjS2pfl!Ngik#uSiTE} zq`#7%4ysFm@0ASvJ9|F*{>ov3%T6-&Yc;r48wBXerF_)NeI+^y)x+VHW3Z<8GCZ1J zi=yFOaA;Z@20!@=!B4XwFi4U7bPeLH8-KI8b z;LRjqQn6BPisQ`?~-J}HxnHG?2DaNq0HpyGfc_Z7If~~4a;Uq z(~oZjIo`o)WV2s2O!m0T)-6oKl{M#>3x{MuHZ2|oPUJAA9}l66;afc6I2%S*7NBif zA+EBH!erl6n6WAfHIoz}*k=t}VelJ_PO6csmRI0gggTurl?WSW36jnWigb&pFp2Wj zgqHouuvoJICh%EsdMn>Ref}M=h~k~u!9$S89f8cc3JCq9O_o*E;bVCn*6>Fzc!o$2 zkzO5`u&Ia5df39*DelL-UHu4>I=&yIQ|cwszbomRvY=PvvyX8^0VNE6kNi*V4p zj>$QqO$0I}sCK6|6~C`VZiIXQ=`TM)f(X(ptB%5zH^~qOD^Ob9fW*8zgeUEzq22X0 zzES!CQFYC%^Tw~>XS$6INK~aZy;pGPcOslUH50o8s+gPI&%v-ppX?qs!E<}t@$f|% z^2aO+Wq5df_gP!qvvV)#K9ZwGE9KzGA2DLAt4ik07=WJ+b*LCq0yzefWa~>Ua(`Mi zdlD?MN^+1r@@p8YhvZ1a{7&fYOoj7qI#6}qkF&~50Yc?&vrE6nVwk2RnZUdR$t%&= z$fttU5A@Ocvj&;O{R6Mu4`5|sWXA)<3L7zf3>$;)~j76hh;!$tLrP2?} z#NOlD*&ng*`EAS@dkqs@v}oeFKR7F-3}q89;?pns^zw-b@UZU$9^Vy;iRTueY_m3e zK5_}SsbD3_quCPzpEuk`zBztaA@GKs*Y^;W z=SULYNgU=?fgOiDvw~;08{rTug&+J+!V9l(*2}+@ z=iP~uidJC47bUW1zboFhkA&;s4mT&H;{B+4#{T+K@K~t9(JLy1d8^mrrtOi;KH3Zc zPxD#FS}9Wa z!m_?oHHluaA>Em99fvxGuvz3c)V_7VV|&k_ylw|`Fj<#$d5+=5#3*juTr1r2tN{wm z1gL+6ZNVWqYqtJy3M*-$MDM+i!;^mIB-c2#@akg%W|hm3F6)a}pG2WV z!;s{6?q2*1S}(yOy0 zn9m<%nJb6q&b^&D4WEY1WOw)sF_yD;fzI(b2Su7RLJy{= z{DxoYV&wXMVZvAKkE+xaH?9u?EAKwMRUiU)f84|Kl^o_#mm+yteH&U_t8jnvT3CES z77G^|p~&q&FyWjWZ53?;S21t6FItVk76|^l{r}mA0yti#Kt>a5n9># zdr|mfg*4GSQ_4MP83q2QzcaITug48j2XJjg9S9v3BXWX#;=_&;By*hf|q}@1x+7ln&jq=m@JOcprO*tWh(Wi^Jxln6c9xy*q|ceQ78jIky!w zH&#PeVG1ahyR(6hh3JWWA3;S+j`sD86s&KIh8;E8_@mn&dcV|xt!xoYjlYGb7ge** zn}p~l+qE$HjuQF9x0P9<+Ydj*6PXDc-Y}7U97voOfo7YsK-lFxb4F$jld@kE-^wQA zctjY}pHjl8vHJ-`*zbn8;HS9ooe&Y$H-x?BdccXk1xD@C^yZWl80wjh<;pK$zg{-< z=82LU4%+nU3TxEvIZi9|iq{&pXG*!mU(Y$=By z&JXeIZ+U8dDTFDTash_y9dS&-giT-i3vA3~sp<3cV0J=_rfBP9rNtLaDZT;=Ul`I| zC&MA~hb)!Mxr=(MW`lFIK6#NWk8ADoaEnYF_UTzOYJYA)uq>e32UWUrT?s6b4u@a% z>)Cs;_t3Ox2p<>=lcPtKDWBO}+`Xuq9WJ{M4dTIYil3i`&+LXleP#T;;S)^#_zM(g z@lm5yOEBSG4qW}r!+T{2gD|8j&hZIJV zj!jkr!<rX>zZtKFmP7VqFXmTL4PG1|*AeMrLTvMRxuv z1~xx`4I_JeaIup#{qg+(SWc^f!JZo2`}+$PCwk-hXK&!c2^RkRkf-66-OLYz$uJ|P z0`yc3$)=05Fl$N`hxqm56zdPTdqkG#Su0ZVR*jxJph?1#FLRs?9O2}oY+RMBPTomr zQjhO>xGmEce#P^VGmq7YVsMOj5mlc9)$ z55D5)yHI$ct3vhsvYC($J$kEKliI${fZi0oMk~H z)(i~$&cNyzQ8?%7jlP#y=6-<-c)x3g42^0Wt~-x!Dd8`W2q+ z-p;AGcXb?8YU-06mMv`3&XbI4#R$GUphJs)y~U)YgRI4=6Uad&vfgYG8)(vxcDohG z@@bEtDzyumm-#{5DM@lnP>5};;)3e*kMM5$Id-e=bBMiQ02LLvOhaic23%2LOGzNR z@45zA@_Z&%KC5S6t-Z`RkDh_3HXZsdNtxVxEe)=oiXitb0O)*KQs>sjsN1B#wXh3V z{>u+d)?J3JM-++WhAL?GDS=O)Pje^wDF7c2+u26*lf3c**dhcfIOSf1-{M-aid;_Gdmg zI6i@*v&MM0;tSl0A4EmvZ46^B0o=^{?4cQoG-@^<6&0b-+M59+PM$8mq=vUo7=vkf zHAdO3TjBX`Kc+!I6nl!^ExiOSt!Y|~N|dO*Jx?@90zjmyg9!)JROdioQ3t8#$9K%LmNDPrW2 zFHq~#j#n-{hY7{WjNoP)Ca#fT&UDJtOZHk=H|a0*zW)pZE#hR*RhLvw(80i0LYSDL zP3uig!|7s4R&!zsGx@e4w&iw#owWqbb(SN^3mWk1ytiO8)fW1)D>*z%H6F7tMYEN1 z7||1lg=Sn>yQ3aHt`Q(tUVlKLaegY2RK<8~UU5 zf5F6)I@CtCk@>PghMt`7i62Ax=?88ZG#|N*j}4tso2L^itCE56VSZ{;DMYp1uS4{~ zhhQn8&F*X8#~hThL;rm*n9#54G%)EG)=ZQqV^U>o>(A>LKPE+s(v*m)zZl&-s{j(K z$1!m5C0jI_g}b8_$<%g1lJ!oJl>C$-`?7ktpGw5YvsY3?Xu$;b!qRzA`KS)`g^r=) z@gcPTwFH)BhhwALMb>e<7Z`mEWBrQub1&41Ku^b8@W`6Qh%dRoT-Ta}u9MT@#|%yS zebHUGEF{FZr%Mp8T5V#o=r-rfjXLOk2o_Q-nt*dypya{-@>FV`!M{ICOi9*BwaA@114TG z$MjFqbWck=n|?S9PHY&#R8a}CBPZ|3#(;yj z9t@}5=ae)jAyrBNy!#HeRWhg@m;rcLn0k)N(d@I%46{y&O1xy5k(EI(F0={xwuVB~ z&T$BwycnikS0>y1{-X4AK@5$K!L55skSt5b(JkZbwmpp?oKpg$nJhfsTFB;WN)b69 zWgz21%m&jUPD!dF%}jg(ojjf6!j&Oxv%@2ZJEu;!EAiyM_b=F5r)sds6r|Z~5Ss7& z&S}ypL{(uv;=RKRerMH!UAPRW*(uN;u6JQkm=@8q8AJ;kL&$GLi0M24dyi?7q3LqC zu_**=k7P1GCdYv0XgRp;wug@{W#E}R3oeb_#3Pw~@KT5OT@?&zteP!~gv{&cZNC^o=E=~CYxj`ZuSmA@@Zr)W z9nf<}nFtgKP;T#6CNOIfQ@c%qG&C1OWo9)@-5ZVFPrVp}X(zxiT$~671wf2X1J-t^ z(N~Ne@45M?+;$MHv;@e$^eH&e_!{c1sR9XC4Z8IJAKdQu!!2c*5SAxSB_g(9bFU5b zKh=hf;`+EN_cgwX)1e_dmB<9c_wb}ggQN*Jpp?ZhjP#d)&-7VrfY%|eyZtz}OnL*t zP2;dY%oo!Jf8m`q*I>GSG1G8r8Om|w$*Yobc8R(&Z6#Tdl&wSr{-!~-F+W%*$kM&% z@=#85CYpmT4N&Vxx0GwRV#Nx0Z^2JCwQa@&=S8Til>+QkFMvJ$x-`Q<57yYWB35Uk z;gVK%Taz?tGpS+!%JB4<68@Makp|CF;^FS+H_YM@ee$g<53{aGku&Pjbjnn1GNfF= zJy|!x4LlTs^@0?ddkL0k_X(7|(t4SgS2H}sPHB%ERPM&=J#bIY_(AR-cnBg-U zU;4_?>zP;KeRvA&2~{MAdI`qJW^i}NY=Uf~aXh7Y16nOsfR{3G%7Ub6_)xFm(^Uxva5N7mF#=SrOpxFyP*6E8qYf@2)>3<(`ll1~Xb|io?ACaTUC-tbt ziU+Xx@bP()oM(^_;RvuplUmkIWkca&KFKm-f*T@GuKYIS$7<2aEIsHk^JVPkE`dn* z37moGTiEO^3nrhxp{d{j(DaidsXq>KEOOK5)yxjS^2l}!i5vpCGdJ;%U@OW@PDi)H zDx~YHJ~dC`GBriFnPM>!;^174ckJh5)ZEuF|AZj<$nfk#_TqHupB7MBWXgFbN}y0l zgyfvM0wU2N+`*CqxM8mfM{{W_Q@H#qT2ToSR^J59HDBQQ#tvL-@dg}2pRgzDv`AKp z7M*kQC-3|`${aKGg&QABVQr@nDgQ1+>$ZBMQbjc^saMI%J8d~C|%{H zOjM_-5=)&R7|f9&1p@l`?x-R)2#Lb}Zc)Z>k!D(%f|g168!R{?}h`taoBHR#5Z?{^&)pfgWqGiSG+!DiJU?kl23UzF+K z+S4g)%&+;l{oGMd3^&J&00DT#ld(Rmn#Y|Wr%zfN*CCnpw!nP`Z$8}d=jmT$=<*NG z*|D7hRINV&I%3?xIM;~DfBBmeO_Sl|c`f>POBxDozXKBz6>-fo38K{~hUd;GlGzz7 zPUy=)Y&Z{=w|`>ehB)@}$0}TY&I;Z%=n%`r!Zc_=fGivP3A56L>8ITSbZqApFj&*e z3Z>3~3AWYv_1J92TtJ!5UzCWh30kz^p&jI}l!q(QfjGMFA+-I~p>JIJ;nR!?$Qm4k zD^pZx#4MgJO70WX`eot5P6M!@A{0YKXy-H?G|A%Wkv7c&k*~sJeMK1`Jt#QQrJo+a8KoN@!)YZC-=S+j3KI=F8ef%>l-oQr!PRSEmbrHXH zUO^50K-BzghAVb_#MjFt@bR@zIC@r}nm)Y>33Fqh@R&9p29`-_FlSV5gs?#itJvl_ zn)JbFAhzg~f%)dM1^kk^SbI7fe@!2Q_ha>tv0s_&^K)dN{tKMB!$nUXULVTmA^3g1 zc2!V1iUTi?-W<7-J0~FSu`VAld7Jz<;0Z3b^Q|V2juxEQLtWm6G zj)z2|ll=ocS||_G1HVF}%rN>^YJ+Kf4XgJmkTqs#9I6T zkFi5oQVw)OW?sgNgARP0G4Y7h+>HzQI$Q14m@3{ap_)+YB>h{{VVXM zlNCC+36r91f9xAKfL-%jV1B6?&Y`NrK~I|m?r%WJXZi49#2gbUE`we)A61#9Lzg~4 zY#J{GWzICtjl>JEa_T|6wrdg=J>#Ryv{xuHZIm^T_yjwTZ-j808?eIX20Hx-ge_=5 zvgX}ne1h}}Ja%nI9jg*(($S{E`}g6glaf?9|32qfUww*Vo2_@zp!>7h;q=sN zSaeB*J`qpFX-&0wYhD!os@a1p1r><3u^1`6X$f8j#mEJ*77V|d$6cah%lO#jF|WN^ zU_efb^7|EV%EDCPXs7{=6Vs-xZM?O9Y$t5pVgmg6m$61$j0#%>!S&6TI9A)g!M=Ca zu%vn-Kwy6YY!y)?#F?Otw(;w60S^czJb=CfzsWELD9qiA~bK8$YlL%Kr{`4@gg zsYGd-*)L1|+q*DTLWWx$dg{sJ$eRwa@TpUfG>w>raS5 zm8Ak1k30y%jltmTu#nBMtwM=Yk({(*14!1|3k756naZzA@pZB%3e-k3zm58E_K{?K zmZVMEpUc706|T6zd;$)OU&LMT3TLOtk@C17sAI$8tmq=-hU(FW?g?z2*JqqrsX{+B zEMhjR^~3nR8fbRT#aBHp%+v#U9QW>g_IFhk_kn6PZ0PyHzV&a1GF^YnIb97w!&>xV zKV{o5`Z2Y`X)qjQNSWyN<;m$oVXS7vZ&;l&7nd)LXR8=7QW2knk~*b~gNZubHARa?JrE^B zujE+i&#{c0)GD~pE=G=QRHt#px@4ot68M(c%-&fw#2j=NC&rE=OyAZm@NLIa?q5D( z@?-ch^zrz!_?!~<6$F6uhysngt4Px1M97WwRjgI76m6Il#pv`EFlVO*qR8Ynu&XS< z#deZ({SH~|cofMv4#*M+7bN3d!!T!H12eh*H`B0BgZ$jP6>HC=!N{jySSBorXsAXN zEG6jaK5Zthgy#$R`8+c}`Yw8Q&qufLLvYXhA{Zw4ad%qr_HIKi$lP7S)1}li>DDUf zuvebc2Ha&MB!viTs7!-0Q*c?2D7govt{%%vB_+%N5CL2R?EwR*3}hY+tCk3+wNq}HFoL{pU4_A+{NcyMr;tN($_pzq0<)%YZbbGn?sE z^xb(6q#6StL+C4{{VHM|*O+saelyHOXCW|hz6@Fg!MI$nmn~VbhIx~bgWpmU;fMHK zSZyW9z2|lwZoW#NXEeBu*|=JPY_zxvUmTqwcV;JQcgW#(qa?hTxsd6V*Q2lW_=#iv z8#v$ll-;__0z00B;gaFxf=T`(FoAy;`>6N=>@qZl$h9NTdh#o}@UX~ai`TGk!ZkG7 z-vi<91(?2Gl-{$L0moc?86W*{IQc;abcD31!CQ6G-!4FuTE3%x%6;51DHRsFX5-BM zDs*Xzg1U(YB&c7Ujz`CU>bXeFbBkb0y;Ml%-|w(Ys~WQ^A3=BQXEdKyh2eE=+zZ~9 zfPdOyDE>2xow>*qEW2NG3{(_oScN7L_cr9dKC*%f44xodRjJ)Z zO{&=N77{yE>BnUP_{vd%6liy&EZ+~b*}NU?cC~`Z*I$gfe*~kaXg2pCZ~hE?y$Jhx zcAJYv0<@)SHKbd90IQrrJe_a}*{-?lc{~RBYbLQJB78J}QGmv)q42&VA6RjB&enw! zaBrp}>8jt%*`m9!pz8Y=9De#0ukq}zcE?1S3ZZ=bF-eki52?VmNDD0bbg1Cx&x;^; zVlBGAOM>p2+weVG6r8<8h>b!GQ*hIxLhYd&_evjo|S72Or6Fc6oPWP#>;1ar8;y(TZX80Z3bRw>f#NER6zZ8)0b2`45-;o0#tv{g|i7t;CY#YYPCr(z@;_uhb( z(ihC9kY~*B^fxf2!+|?ELz}4iJ%I%agFz}pm-45}lYS*bwjrdJJvpfbobJV<{7VfI zaN#MKG^o*)Kl4Ch(?ys%=7uuEdbC-n3yMM$FnY&Re6(MQPHA|GmfxMRL0y7gc^n7k z+ht(ZXIc9Dl>|9HZzin2^91(1RHGIfSu`!nhog~_)Z~yD$=w@MV0Y>l#JAqSKdC(! ze{C+@5zl4v|L)Y_4V2dXAvB(n)+aYd_oLACIOu6M~yAfs2U>8PfMb|D|KF@>C95 zXn1ovdoN*&=N*jx69UsqR%1)A4j69#0b{3G$T8s~(pCM8=S^dLz9SenIhDXQl|nG= z6eQ&ZL7Y~;54d%1B4&A>g0TzrY&y>$*r)p&r*}3m8*OHArPzu=`iTHpTx8SLZoV~0XC>fQMo=ru#>1lokK4%4z8iRT^(+GRs~ZAglW^e zARHN2C+(5mcx{CU{ctl3LpJKu7k5i>g|<5V5q1Sf!$nY`QHib-??#2)nb4l)1NXh9 z$v3$R*mrjaDr~#XU6YoDC%Q^-HV@ms{?0|1DNp+cnRI5NvSzjFwM6;4{A8 z__8buAW^*FMnV#5-Yx*49qJg7polj{<;e*fK~f)f9oLL8tbbkwdc^zT`4UBVX;FsF z^LkJs#}D$<{KW?#KvJ2RO%R(R$ z@(uZnM2U6z0hlzk6IFUe$&zX3ph)={=wv0}^BE7I`KtlFH2fN3PTht5mL=#cR?IRV z4d@St3%E;Nm~`I=#O>#`h~PFgx=CD}KwUFz<@xn4SNMsBvdwUL6$2NU79Q@dSt3a%wrxShR8cyp(hS!=DA78>^T6fifuOxQ;j?gns#(Is zX6z(Fmpl!=-iDJt+k@IFSyF+ipqryc)K9fC@-9lmetikl9EoP~>;upzZYt;V8*ebX z#Iv;)6wK4lUJUJz-r{HD21YBJXP-0;XZJYQL-A5+>JuwORfcz?vDht4o#%n3$K|Qq z7e1!iaWC_hXHPmN?SP@CHDF(CKyPbpM%zCIWMytJyP&}ozFcdCu!~pm*>_L4BKw|A z-+KpqJ~6OAxdbP!@?-8@9szT&6}ZuT9I`(ig19P4Qrw;d7ag_fLJdXQzE_!8T3&@7 zzaGeZ&C@-n&Bysu9hm%_M_}mR3zaQtV-i>0_ylE-pR)JKehPH?-qVp z@&=N0Lg2_TYYe&n0NKTH5H>dvdanne`g%cnc;OxBJ$DF%i$8(DI&V09eH7cXdAha* zk8#d>AzEqmg(qJckoN|2LC#a3EEY>4os(mj-hl?3Gc1R_)v8qUQ7NjI{=t;<`%ocdfN3%Q4F_MuVg-Cc zBa=uxZT%erm#=}Cg+3Tkoq#!mcQBz;j>OmwW8g9uv>O&Bemh%mqlq?q*77Q99F!u{ zL~U@AhDpJ5zcHR+e>y9Yeh%jZsSs6r0dllp6gJKYL|>7+%(g%gB2ptk(#LP1s_!dM zO$f!sd_135b91(LM;BvumLK+>;>lGrxs0Rv9=6gljy>d{L|!;vfJ&Qq+`G*b>V0&m z)6x;(7Dq88ZYD!kd%*C5Czvwcg+-~_WXh!gnArcC8Jar+&p?neP7T<;r3_$g6XfaI zFdO^I(WbNvmK>_(%%5k4b>_MFV7el-UXZ3H$9yq<@>Ou(ElNUb|1n+JwG)H8WWnCWutYdVJ1Yzw+WNZy-&EgI>VTHPKQ1)35THFD{%YlD)w-M zK3#g9=WF_2jr5-#grFDh@XcU4qj-X!{#Cq=sm}mqgNotPcZBEN@!ax`Ga$6l6<4Vh zb2VD@Xq$2{$~1;Czt8xyiZ`ahe3M%^t0faV`Gugz`6J#d_GfSKa8}EwOuYC#1P0Ri zF!KCbsH&7F0grmv7FgR;gJULdmG0J`zX=0lu%3`kA!zpJRCO|gFDRXaM*GMzT@|WhBwLVw74u-6Y~|@ z2d2PafH3jjU5si`RXiVZJ`$oW4}8*DXuz{E9o#BTm+^~|<(mSrgn59EkJLjbPoMRo zM}j&?#o?V}GDLb^JI@b07c4i(62A|!Fn!-LTva1V9}DDw<)$9I%$EjdBYM%J^$7cW z#EBjF>W??$!(rvCepW;L1qO`E(AgeBe|c%(^<&#eW{?P|8p z+ki-C7?PW94bW7)7fqDnV46xdV^Vhq9{%Q|kH1x;@D+cMdDww7KHdejik}xB%vElp}*F~WE5PVcRZ_ZSCdTc-iG(L%=hLZ$s z$}fh2Fk8m^%V+pnI2rZAf8k{hSBN%KA?`jRNfJ8k*xXe&q^Nve1QzRVo|k=i z02?QpGha_s;D@C9vm~N?uzX=Uepyw=HZ+uj=7GEL$@?ur*%x=lX5>tEWvpl9Fmnn{Vzp2e+ANfY z3!()OCBchjh{*wwOG>2c(@HpBYt78Ed<0_7(>SZW{lLi29qYGM!a9euc+hGFm+wF_ z4vN3WWvaEX)%zsNDR>L|d{4pPU<;m}8x9FU0nC(2Mfz}5fUan+!6Vyh@p$Nc2snG6 z%|0$g!rouQp@&5n#`S}Da-!7pV8Gn72MQSTOS4#G$z||y@m|!5H9}i+Wg~GLSowO8Gkn#n4>kt+8hBGmK2;E@W=*fbj#4FrK(#7#J5D#;!y_Qf3`N0IXMY?WJv*=+e+tmyYM(0s6p zE$n^(T9rD)CI2(mVrCLlncv1kkEh|pnmOQj!ww%!xr3|E>(CL`Mo52S171A8@Xh9r zaB+SlaE}Z#LS1=`eX|N>mP(*TB*2+vcOiAxE}XPXo6I-3%$D)a8eejmS;+HHI2&jT zPN7Ti;w@?FpA(9;5x-bfdp{6!d50#_e<4*U4NAU8V^E(ATKNPr6D7sT{BcPVe_V#F zQXWVBX`aY8`V}sPCgZvZwrt(06HsW9h!<*d@xXCeVlXclrBVgpr12=q3T|g6BqhM` z<9e79twT0BEWktE286sh2sb44h?QsoE}5oGQcSF%+Kd-F@=J;MXgq;)?L8>4NuAop z?#9y6@8Gfa6PhISvx0|z!=2sYtk4xLxculcT(c2p=D6(TJ}PfStz$`$w`MvnR9T6W zc`-fi{a0YSsR4;xnSoa;^~hMR9AZ zG9N1<{$hi(2pNicg3~p{hyvup*o#qY&Y2Ibj})m~zZo>=uS3n=KXC9|0_Hx^q1Ipc z@!ZjLc8P*Axp>hL_Bm-2&3Zwc?{2`In-K%4r&}45<-rhXZwx2T`G9jvKL|w~!nKK~ zA?Rim*sXkz@n=~0;PDQ_-iCry!94tUj)lsX1&rKYO>*mmIQ%uQgafyH8S4cOm?|Ph zJqO>w*PRCRZJr?9_@f5*^K2o1%O}jfra*cfX5x&2SzPHmpIMVH$+$*jGeC|G&G?wa zRl5BHrqSnUef0+_HY$*>HCv&-UY`o(xS>UUE(SfE#E5mAlPBk~a!PXAbjvr!35_db*Mox#5UA3&qI2 z>W^5$-4Ue|1jy_;71%X)99Q?K)65CdBrK+iVGn50(^V>@Swo#_O?!cq%U+)RR097= zWPy^^Cul$J$b4J46OyMYlI0f7OlR6@ESow5wlA>6V~<-v*IR^sT4_Xfr9XrVO>sGf zZcbqI&=m{fCt~OJIs6H=E+|rv$n+JO!qLb|X3)+8zuiN2fl(tGE84=gJ>JZNnFauZ zO~_3Zq}f)0)C-7HquJq1^`3Z4`rU=`6%-cTHm3LH2!ZJ5Ay}vRj&Hk3i}f$b;$3C_ z;r`QxB&xU%SBWoXw6^mhu7=7M0?~*rT{xoX202nt`>(UFfbChc_HJ9slSb%s=u3 zj(kmJRtWWB!=_Fkt47f@(gVkL&V>$_HMp-vg8q4-M1NGqv&x+Q7}|G&Ikt5OQqC$u z;;1BeD#&oXp-^}ET(-5T zumO70r!dkBHSo8z8VQ}z26Og?Vq~=p{mOZFuarbd#q34UFQiR{-|J#V*a)0Z$OOgs zmuPAtL&b-;Gs5nI3%uldV2c@LP3ND1kgdX0Y33Dn&jC05??^2E!$M5ryk(V~AGrU7 zF)8iw2bGC3^g>Jldu&-2uIP`!sF%^~>g~!T_S9zn(Gdmsxw|7Eek>H zju`D~P@*xiD!9L1mbQOxg82)UgL2bv7~ZEy@?vXY6ElV)PW@<;{F7NzAWoys2trYc zubKY@V{(tX)6**RF{JK4ytP1wHcUSW#Z^2|x?sbiTOACZ{tT(-55ea;U7Q>5%eE){ zWj1lWht6ZsJmG2}19`hX<;USh=#MxA6n+N;pXZF^byx!*A*X)ZhWt})$o zq#Ymc$zU#hjRc`B3n6J+IlH>g6N6^HfKi?cbCAoezrXbsms;eaYNIk;8Qcg*%e3es z&+GW_?^pajSDG5gzQKM2LHeO-J`>SD&KMt)q;32Jv3ylD8qF5xl?mKm;|Hdg5>Y7C9vv(J3bp1CkjGc@HxE` zclSu6i_$X~slA8}_j5pENdwM}&?oQI<=~U40IGULfZVJ1_*Rw${V9`~;=X1^eWxJJ zdtid2$4218{*SowNk2+n41_onDU$CdOH*>W+|1fbaLd4$K1vzF@A~?5+*}-w#=eAu zd#A(qK3y8wDn#=Jajwx5{``vuL{?=ko0Jj`$G4@ROtB7K zo0N7QjvppVyYMClGMjK8oC)U%vXzP}DtwRSRbl5NlhH*vygYl!^p#;Rp! zq3YRnxKpVbr*E$WH`AHuJIfiYXA2Xxoh~rVS&!5#^WYVj?8J@r!8zhfWQa@he)#;~ zS(x>piU~KZ!PM{3_+r5f=r8<%f2VUlr)4vntX#%S)l7u+`?FxDrzV+K5zAMck&ee+ zh!TmvA0WoM6knAc$9)YSP;&oEtW(mVL28FEsyP5stcB^C=sd=I{0bVYC*ayrZ-mNP zyp!^ktsmAVJ-0uAG3SHXIZD%ZJ9{)1u|%6ladwmaYV^Iq$F&=yVTQp6X6hzSoN`W@ zcHX>&Mb9`NwWkWwMEJ0OQ$5!kj=(ORU0_urMvu-mpr2j8fJFNkzP9(rhtK87rc1*} zw)CKXlNQ~0cI$`%QRp z6^Ge4Z4p9LQZb7s&SdeWsPpyvoX+wBcE4zVxbSwE8@!u2XkdkE%^#U=n`f}#{Q%b$ zQYDpN_36dY{aDRwM%(o!C^UWtWsfQ{;gW}QV!aN*YR+dKc_~K1I`r9njDZ2phiczwDqO&n9FMp+SX-1jaTK7WK0Vx?&H=lyuZ zvL1R()8W4|b=u~Y3SI|R;7Y?@w3}7H24!(zWpy`>@}hvoL)26 zW)H#SOV1%?fjc-gNs&<7N_1c7ir=PgkAYG}UxMG?xJ$o<$duHE7 ztxH_rWJ^ElyQ@HS@-Aj*v;_ao7ox78Gnu}#(V(?ffcy}aBVUBu+2s{~vAF#*8Vr@g zy_ig>GEpUV?z*JjI0|OT7BUJ4ow=UbZT@q;DBfAy#rR`IFgq_ziX7kU41EWr$X5?R z@VL8!8B)pDW`jJTw_5Srq)3=t;)yZ5C`LJT3S4;n5$)ZZAzM{}UOXm=>3hRK z_)HcrUU3n+g~*a)No6p(*cJ+8_CwGoQLqn_<1#Dr*o4;veI1SH6aQA2);$w)cHV&U zCT;rmya+qUn&70RlHluFgVp(?kZ4;3Gqu9dAg&kBW{u&_uxu=lmZW35{h9P%t~}Lg z?hqjM71kbZW+KkWk=@x*>^m!Gys)DJZ+?#ged8qlRTC{Le0UmqIT_H-5DQ3^UW)IZ z%TimdZOqsyU-)mDGJRVjN7Bp(plHSbe(!Pwg~VrgB{CR-TIwMr@-h?|e}OakT#qLE z4|_*F7&G{9aIx4~Tx7L|*;N5EvQ+BePK=4`7E6ubgQlqT)#d9`$wu_QK>SCtI;Plu0@dKHiA!A7MuC6Y{3JL{b+Q|hzLFkU^m_S z#4kMc8wI&skbFmV_Jd!~V5<&yE;8AWBVh$iYzuGU0v+P<)&%D%t-ul-LKs?zYYo~k zY9N@OEb|p|3;&__Ks8VHYCB4g?tq6uLiD!&zN`TCVA$|@GxRlIv5(h3B5PsY<^eE!Ps(UQV@vB9ZNO$mx>BZr7YOH$r zRye8Ui#(qtn0X`zj+VbcZG$AdCV3vBCEhYqat-nIEGN8q={=4eSEPY(873Oa)Z|~c7Mz`cU$4E!MmX1S_hSP%E8w#g|XoBbN=T)L6R8a2}L8EmVF!TZ_0qmMIJjQ z8g_xF-I6o~wdoz6CjIv(l({}- z1Q~}am~~f?8Y>IZ<*Axf^rk4?Jzx*Dhrgmmn*@Cy=mm~fbm)!b8m9Z5BH3qZ1{$9; z@tnv#oN~_}1r6Q#n-8eatLih5m-`aNdoDBOJzVb5N{ifg>VtoIJ7K2t99X~TF8)3s zL#$Q`k@zpmpvgd-4!x7ZKY#atO`Zn4x)g&qE(F55ZWmS)4Vj6JNjPiKKCCTOAeE74 zS=Tww_$q?U80j^Oz4mz>YEI5(7TGA!4DSeV%QMIIk6Jhki3+aQ`i>1o55Ty!9$yXV z!#$r!m{nqpYlJG%Q_=^Cmkvq(?++}_J%$;vKjD~20)Eu`&DYq~&MrHe0pCw^os8o; z*dDOKWha0Oif}ZB1Y#7hCxwSEM&Uq zkSAM(i9(+~jXQf1lm3kJW;#_M?=6>$Xi8xeKi16d{{4 zN104@DXRKW0#BThCF0FraG!tzc{SY`RSo3nsSVjI`}!yR3fCr@Ypd|IRvRX3v@kKE z#f<%lS$I%SpI-MD2kE4rsL9u&vU}WF!%GU_^f4Yi2hw2GtW@mLl_9nFuE6x#7QEh) z!0y%-reZyZ*i*)L_#Gn0AfR814jbG`my=T$T$^dnF}u zMNF8=uiK8NJUZB|Cz5dVsw*^GhG5L#FbbArKqfc0#LIn!z=jqGJgQ1f6?I9T(0+dO zqbH#CUl6PMK#02B`^|pbIK-MBn+&JVB!EeLA_n9LktQV-;`~k@`%aa!1>TxuxqCid z7QTbAq9g3Vw^C&8(s@jv9UpAoN?;OSjhy&gff^gf;76Jw7$w-kWak#nbH0n?$6c7b zIt4a$e>t3gRRl((Qy^i&kDRqJRru%2Huj2SHMq>mhoxLDO2Va*XPqO632lbd+$|V& zCz&(Niw|S&OAjUt2I0@fYp}|*k-40G2L?4AnejjcQnh&zIt~9poy{(==+_<8+H(i4 zay*@_>Z6!+A%&Gp6eUx)$dLVJ#aORzimg{&m`?-$(BxYqES)=y{Sw+FZ$=}>V5z}@ zp)T-oufejqLiCciVdko2;b=+^@Tn`zOxGg2S_iP;^)d**--IQ%vN1^N9@JXt5Q#Sr zxW2Oo{GI&|TiQ*a&0#ZB=aPjnnknE-~i(fSyd=>3I_{ zTw0C#8Vgf;Ll;e-@!{C=r(jT;%tn`bzM2<7F4ID#YSZI9|bbu$A7#E{q5*EU5V@;*CUT_|Kz2u zk|7yBHn2SHIWKO}V^rQ9hmZY4X~APzYO#JQ1UL`k>bF|tqmvXh(p!i&+ss*|i_h_w z$4Pj4hRb0MzJbFZZ{X807Cp>(tP}eHI_rgq*Nb>Iew_||zO$3<{-O`pRvlqYK9sP7 zA3E5`sebHil}X^4t`D;YRp{X;H~hzS%f^RNzA;DN2&0OLCORVOED(`lA+B6w5KO z7kqes*M5bSy?ppP`8))s36go)Cd>xCFK~Fj0~C&_lZ~}1B=X1#_-|B=*qv(Sy4=4| z$MgNE`Xs>Dhs*kHLhX`JcF<-Y=chEnj*I%FE!o>_BtV14 zm?#q0KhmTmD#w5^u3iR{C9!}1LxH-bg>rsWuZ(Kp6I}w>8|`q zffE>MT>-M{v=)u(cn?p{N5R>RV)T;D2-YZ1zy*W%k>5a3dhR?%cBK!)vL4{8{Tky$ zzhLx&Ly+dC!D`xmhl`UFSk)v2I>kbP8fL$OQPHEQdRUGw2-G28@+HW_?iB3(qe$+Y z)~B+M`Y`8-6yptiUI>pl&Im3$j0KGX#KJcfOU@0$GMQ>#-gR#nD>a}`m`Eso zQwH}x*P>NjBNMq=il$dIL$7ES+L;N{o?$8S`a%(NGu8tq$L)bniVGmpaw0fIu&^T3 zfKJ+|Oe{8pv#+kku)Wu_nTf*P?B;^&{N4%$wqif`PBFO#^Ha9~$D0D(%onisK@q#J z<~`W0$bkDqnOrcBXV1TLhI*L~%$fbSL66IdTs_gt2*)Jhxq)Kl)T~kn*&|Gzz8-;B zTz8_@p_~8hL?y>6W(w_u7Rqujf$TZer+j7Q_V^{rS)8NpghfwMzLbNX!(~<%)YQkR# zs`aWQpJQk&v&{j$uglA{W>z5V$nCHQ;g58W@ z-w-b3vgPtF(d@HV@vvfdH7uDWMqYk4XL>G~<9Ue#IWwaaiME6^RczmmyZl7xDTli- zvMwEe^1OhT6$-n;^vH~Dv3T;-0}ve&BeO-*;nlP&xUbF!vdsD*n&-#3-2cXCxp-h~ zu|D~GCJR<@IV7Kx@?_S7myFVZ=NPUdLKd8P&F1V{02k(b14xsjh933s_-+I*_SkM< zJRf7;f_1Cx-3ot!^?sJ#j97(Il^g0Gny91Bo)_$svMDnqnE z4!oSjhsqCPRH}^Y{kPpjzfmnLcWlK~GA~(08*y5{WEg(E(_`WnxZv}%qGWh~9d0h1 z##DSNX6A8xiRiUiFnwhZ_S^OHtOl7FUV=QO`mQ>(M5Ygxp}Wkql+q zY#}xx9);Yb=n3`)^T~E09$Zz0VjNeXtpA}|#vl}J*CYRC{e(c3OgweN8<*iBJSZwm1H}f+UKHF$ z#e;@K{!KQvaO}j+ogYE)84p_2AEC<$8G7_o3rL=iz{M8?=`tpnv35{e=yz}cACJCf zSczM>DsVQ$zBI${W9gV6*oQZMJcK6|Us2CPl@4U2^A0VQB9mhi@C?`Ey|XP2rf@tY zbvJ#wJEDvIGb4#NC;0{}RgQ*q4?(irzL3e9*os?2SMkgK#Xy7uH|x&ixO)~0@#l?) z;C51y$@KGL-#s)Si}wFU`Tf1@bYU&{tR+t-PnIBZPg~&Ipdcx1R-hYOB(h^xrs2oG z4RE9UByUadUJP8H1Jx-ylo>E2Gn6Odt-bdkYri7$oPROq&y~op@)KT4-M8{+FA%Re`}Vs#}vZR|mH|1cCh_8bGjiM>1F2!Hyu2)LtSK=hhEGP~wQ zvpw56e6i1RGf^vbqOS52nxwrM-F@?M(pMIu>$z_HoCi?-{t1}QH6-nvX7KHh2pr7b zh22eDAHcx^E#^Dm##a*LCT!sM4v3Q8EK|%gAHEi;t^%sg1(!5oU2KqO_AMjKnaYW8qo<0OxUi&($uJm%kVda z^7NWj$?0SHOwk=>=v#3HUhX!e3v&jcBT|eyPFM#1V&!NRMZj}gH41*vrE`C9OrZt8 z;j@E3Ji0Rk^Y3e7%yR?oZBU5UY=AwY@r<3$gtNzQq$BTxEIFy5fteXjQ0H+UUPS5B zFSeR+K+K3JiiYA+oQ?sr-r;a@4^HxQfs5N6aIM)3*uJp_##ctb>DQbVWp@MoGV^h| zxdQnZwGr5_IF|VgQTlRj9Yl_;hKog4*`)?LMB1|yMxAD0euX=$n2>|90r{Zv zL7s~5c0kPw^5jLA0Lh9{q4SPPl6bL?;Ii-*|N4S>*7?3O!+UxPZtnwaEJ>{d3!Qmz z!(k47+G_yX1stn@V>AD*Q^tYuK=jHQfO9LA$=x4z4OyT3c2qq-5ZeD3gn#k^tvdl=4J zn4QMjBftDt}KE0}u!AsCm0y zM|&)vR|nC0Ms(lGZtUbTQKl2UnB1gxP>g+mXO2JTC(f})t>neb&mJ}Ey5<#r(euVD zhW|K@zbN^1PMMWUK8?+lhP2^u6*7_`%xf+`9Jy15>^5@eM_*Q;%hwx&xp+P7v-^!F zdYWK7GKI%nZq{u9su&2J95-;+#3kSnv=$9Y8*m`ri?JNyvcjXP zxZkuGtoAlA55G@LZ47Q*_&~@VuDZW+_BfX z5H^H#;Q7#TDBaWqL!lq}*E@B|**?N(G|9u^_^-IV?j*?WX=Wpee?iTe5G;v}$~i5} z^?Aa);21rEhWAP_)JGA|9I=K9?_jL?pihQB&Sw6t4aIUH4WgPDj|012;U|6Wtuq*i z!@=fobwvU=mT|gx)?;XVs7c=QWvTD3B6w!C2=2NH5WQRdd=E~Har>-Krt3I^zmg`E z3-p8EdO2wC>jgT01o$sU?t`YU64=&i!4x+^__9Wb)=UwkI{9%B+^oiUDslSUPZqCj zRHT!I|6y7{DSu0@Fts#PB)c**aL%|a{aC9;R+&vhsfWfK2)?8vXNK2u*`sAyDojG+gI{p`#Nzr&)sM+Z;B6Xwess-eL3B27cL!Y}{cM z1KlxOc=l7{aFXkErt?w2TTlM|G`b$1A(l=55l?%_~@)#?b?gzuu?}FJ38~6;~F!|kK z_$&Mbvj-JO?`Ls(P3H!*oQi~V2UcOB|3fTX*@WgRdojg!4JxS!(6jdE(dCE*?As|s z4*&6ECKrY=LTv@;mu1B)Jg-JxJ1oJV?=HAQ#)vEy;`+nu{84jKKOQOVWHHgwVcD{CM1%-OGK? z8glBiGV22bXNTk0OImQib{~G>m~O>x;$(BJAz3D&!Y1gnfScuC_@btdmT??EbCNb$ z7uo}x8XvL+Y3U&HT8^H{)g;a>nOvtshg!My!flC>oPx<*Ky}&1G1S{gP-~(iAZy|7l1Q zv7(hZZ&I#<{|yD|ZC#1SxjE(Y+BmjMnfn{9~y`3tg$PbY^TA@JvPEf)_(@s+Z@kon-lC_?*pGxKkzH9M9|Pa6`8d4pf$V(iiWdr z*en!Ol(pI2DTbt8<_Vr&z`gzFb#nbEKQ_iUgbh)SK>;Jmp3a(=t*4s-7i4F_r(j>m zU)%?02d%K==4%{UH^iDN=;97N3uu&YXCKJ_gso;`C^jz;?`=8+zGf~UO{16A=i5b=hwW3}Swk^V2``&=YUI#RNUJUu1pQ3q8 zgN8bou+;n=Th%W_;_m8_O&285@ioUDIG_dBR)0aw@-E!F{}FZ=RAP^lDjrT%!Jrc_ zKv30*t=yFlIft$BkS%wIwfuwe`XX4)WigHh+(Dk;YtVi34wqjELWd11Br(VtMg&`M z((X4{uiXW%90$WO<}*Yje!~XKC2(?s9eiItZmH4|?}y_XEmJ{*FJf_+%^SbbXQXw4plb`}O?9f4tE?GWscl0XS05BxuUDxY$})2s>*AHsHk@= zJ1ZO$Ui5?U(kBpm?J3@q@aH%O%|IinaK#2!tlD@QcbHV7@$@2g`O#5y=XO)b3MKMh zgg%C=uS`I{@eBK3O$ctBoUB;k-#B-Eb!ADe$6Svx~0?;q~<#`^2o6 z|BCC-n)uh?%#$Hl(>?`EFB&3LS2NC9Wl$mJ3M+ZWr18^cytw8Z)|b0u+Iu51^+i2) zO-#i)=jk9C)Q66CDNU5Mb@7>jg~U4oCe?x9e(BTRIr8p&1Jk?X5GUprNeMp zNtqaER=~5?B`|0ziAnpvp<;LyGsEmdI&b#R z1js!jMaR-!^K9lmz;DBKs5yHUK+FJh(@CAYxZ(ma4ff0y0|h!&@-Q}W8uj0tTOj-V z3mdSVo3H!j$S*%p5_(RPn4FU!8|<5y^3icPTNZ}ljjANzC#UP~&cZ~+dwBSl9_don z!0(kCnX75;Oq{n7S!C#sC({(j1I-4USUrYeoVOn@T!&>s5$vZarJ%dn6An8nW7