From 25e5c23eae642c21c0bf43fc0df16113ea6dea29 Mon Sep 17 00:00:00 2001 From: raver119 Date: Mon, 26 Aug 2019 19:57:51 +0300 Subject: [PATCH] [WIP] Error handling (#169) * CUDA reverse rewrite + couple of tests Signed-off-by: raver119 * don't throw exception on invalid pointer Signed-off-by: raver119 * data types validation for fastpath exec mode + 2 tests Signed-off-by: raver119 * data types validation for fastpath exec mode + 2 tests Signed-off-by: raver119 * ismax allowed dtypes tweak Signed-off-by: raver119 * lastErrorCode + lastErrorMessage for native exceptions handling Signed-off-by: raver119 * exportable ErrorReference Signed-off-by: raver119 * check error codes in java Signed-off-by: raver119 * - consume lastErrorCode - fast_in dtype validation fix Signed-off-by: raver119 * - sg/cb allowed output type change - minor logging fix for data type validation Signed-off-by: raver119 --- libnd4j/blas/NativeOps.h | 44 +- libnd4j/blas/cpu/NativeOps.cpp | 1798 ++++++------ libnd4j/blas/cuda/NativeOps.cu | 2417 +++++++++-------- libnd4j/include/execution/ContextBuffers.h | 4 + .../ErrorReference.h} | 40 +- libnd4j/include/execution/LaunchContext.h | 4 + .../include/execution/cpu/ContextBuffers.cpp | 4 + .../include/execution/cpu/LaunchContext.cpp | 8 + .../include/execution/cuda/ContextBuffers.cu | 4 + .../include/execution/cuda/LaunchContext.cu | 4 + .../impl/ErrorReference.cpp} | 51 +- .../ops/declarable/generic/convo/ismax.cpp | 2 +- .../ops/declarable/generic/nlp/cbow.cpp | 3 +- .../ops/declarable/generic/nlp/skipgram.cpp | 2 +- .../ops/declarable/generic/nn/softmax.cpp | 2 +- .../ops/declarable/helpers/cuda/reverse.cu | 114 +- .../ops/declarable/impl/DeclarableOp.cpp | 199 +- .../layers_tests/JavaInteropTests.cpp | 26 + .../tests_cpu/layers_tests/NativeOpsTests.cpp | 75 - .../java/org/nd4j/nativeblas/NativeOps.java | 40 +- .../allocator/pointers/cuda/cudaEvent_t.java | 9 +- .../allocator/pointers/cuda/cudaStream_t.java | 5 +- .../linalg/jcublas/JCublasNDArrayFactory.java | 261 +- .../ops/executioner/CudaExecutioner.java | 184 +- .../java/org/nd4j/nativeblas/Nd4jCuda.java | 139 +- .../org/nd4j/nativeblas/Nd4jCudaPresets.java | 1 + .../cpu/nativecpu/CpuNDArrayFactory.java | 187 +- .../nativecpu/ops/NativeOpExecutioner.java | 209 +- .../java/org/nd4j/nativeblas/Nd4jCpu.java | 139 +- .../org/nd4j/nativeblas/Nd4jCpuPresets.java | 1 + .../test/java/org/nd4j/linalg/Nd4jTestsC.java | 98 +- 31 files changed, 3002 insertions(+), 3072 deletions(-) rename libnd4j/include/{helpers/ProviderRNG.h => execution/ErrorReference.h} (57%) rename libnd4j/include/{helpers/impl/ProviderRNG.cpp => execution/impl/ErrorReference.cpp} (52%) diff --git a/libnd4j/blas/NativeOps.h b/libnd4j/blas/NativeOps.h index 9ce90176f..9bca7bb10 100755 --- a/libnd4j/blas/NativeOps.h +++ b/libnd4j/blas/NativeOps.h @@ -79,6 +79,18 @@ bool verbose = false; extern "C" { +/** + * This function returns last error code stored, + * @return non-zero if something bad happened + */ +ND4J_EXPORT int lastErrorCode(); + +/** + * This function returns last error message, if last error code > 0 + * @return + */ +ND4J_EXPORT const char* lastErrorMessage(); + /** * * @param p @@ -557,38 +569,6 @@ ND4J_EXPORT void execScalarBoolTad(Nd4jPointer *extraPointers, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); - -/** -* Append an input array -* to the end of a flat array -* in a particular order -* @param offset the offset of the array to start at -* @param order the order -* @param result the result array -* @param resultShapeInfo the shape info for te array -* @param input the input for the array -* @param inputShapeInfo the shape information for that array -*/ -ND4J_EXPORT void flatten( - Nd4jPointer *extraPointers, - int offset, - char order, - void *result, Nd4jLong *resultShapeInfo, - void *dresult, Nd4jLong *dresultShapeInfo, - void *input, Nd4jLong *inputShapeInfo, - void *dinput, Nd4jLong *dinputShapeInfo); - -ND4J_EXPORT void concat( - Nd4jPointer *extraPointers, - int dimension, - int numArrays, - Nd4jPointer *data, Nd4jPointer *inputShapeInfo, - Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo, - void *result, Nd4jLong *resultShapeInfo, - void *dresult, Nd4jLong *dresultShapeInfo, - Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers); - - ND4J_EXPORT void specialConcat ( Nd4jPointer *extraPointers, int dimension, diff --git a/libnd4j/blas/cpu/NativeOps.cpp b/libnd4j/blas/cpu/NativeOps.cpp index f5d4996e4..86bc04fc4 100644 --- a/libnd4j/blas/cpu/NativeOps.cpp +++ b/libnd4j/blas/cpu/NativeOps.cpp @@ -102,8 +102,12 @@ void execIndexReduceScalar(Nd4jPointer *extraPointers, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - NativeOpExecutioner::execIndexReduceScalar(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + NativeOpExecutioner::execIndexReduceScalar(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -125,31 +129,36 @@ void execIndexReduce(Nd4jPointer *extraPointers,int opNum, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto hTADShapeInfo = tadPack.primaryShapeInfo(); + auto hTADOffsets = tadPack.primaryOffsets(); - auto hTADShapeInfo = tadPack.primaryShapeInfo(); - auto hTADOffsets = tadPack.primaryOffsets(); + auto hz = reinterpret_cast(hZ); - auto hz = reinterpret_cast(hZ); - - NativeOpExecutioner::execIndexReduce(nullptr, opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hz, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - hTADShapeInfo, - hTADOffsets); + NativeOpExecutioner::execIndexReduce(nullptr, opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hz, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + hTADShapeInfo, + hTADOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -175,31 +184,38 @@ void execBroadcast(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); - auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength); + auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); + auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPackX.primaryShapeInfo(); - auto hTADOffsets = tadPackX.primaryOffsets(); - auto hTADShapeInfoZ = tadPackZ.primaryShapeInfo(); - auto hTADOffsetsZ = tadPackZ.primaryOffsets(); + auto hTADShapeInfo = tadPackX.primaryShapeInfo(); + auto hTADOffsets = tadPackX.primaryOffsets(); + auto hTADShapeInfoZ = tadPackZ.primaryShapeInfo(); + auto hTADOffsetsZ = tadPackZ.primaryOffsets(); - NativeOpExecutioner::execBroadcast(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hY, - hYShapeInfo, - dY, - dYShapeInfo, - hZ, hZShapeInfo, - dZ, dZShapeInfo, - dimension, - dimensionLength, hTADShapeInfo, hTADOffsets, hTADShapeInfoZ, hTADOffsetsZ); + NativeOpExecutioner::execBroadcast(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hY, + hYShapeInfo, + dY, + dYShapeInfo, + hZ, hZShapeInfo, + dZ, dZShapeInfo, + dimension, + dimensionLength, hTADShapeInfo, hTADOffsets, hTADShapeInfoZ, hTADOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execBroadcastBool(Nd4jPointer *extraPointers, @@ -212,31 +228,39 @@ void execBroadcastBool(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); - auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, dimensionLength); + auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); + auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hZShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPackX.primaryShapeInfo(); - auto hTADOffsets = tadPackX.primaryOffsets(); - auto hTADShapeInfoZ = tadPackZ.primaryShapeInfo(); - auto hTADOffsetsZ = tadPackZ.primaryOffsets(); + auto hTADShapeInfo = tadPackX.primaryShapeInfo(); + auto hTADOffsets = tadPackX.primaryOffsets(); + auto hTADShapeInfoZ = tadPackZ.primaryShapeInfo(); + auto hTADOffsetsZ = tadPackZ.primaryOffsets(); - NativeOpExecutioner::execBroadcastBool(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hY, - hYShapeInfo, - dY, - dYShapeInfo, - hZ, hZShapeInfo, - dZ, dZShapeInfo, - dimension, - dimensionLength, hTADShapeInfo, hTADOffsets, hTADShapeInfoZ, hTADOffsetsZ); + NativeOpExecutioner::execBroadcastBool(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hY, + hYShapeInfo, + dY, + dYShapeInfo, + hZ, hZShapeInfo, + dZ, dZShapeInfo, + dimension, + dimensionLength, hTADShapeInfo, hTADOffsets, hTADShapeInfoZ, + hTADOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -261,21 +285,26 @@ void execPairwiseTransform( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - NativeOpExecutioner::execPairwiseTransform(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hY, - hYShapeInfo, - dY, - dYShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams); + try { + NativeOpExecutioner::execPairwiseTransform(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hY, + hYShapeInfo, + dY, + dYShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execPairwiseTransformBool( @@ -288,21 +317,27 @@ void execPairwiseTransformBool( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - NativeOpExecutioner::execPairwiseBoolTransform(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hY, - hYShapeInfo, - dY, - dYShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams); + + try { + NativeOpExecutioner::execPairwiseBoolTransform(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hY, + hYShapeInfo, + dY, + dYShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -323,18 +358,22 @@ void execReduceFloat( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - NativeOpExecutioner::execReduceFloatScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo); - + try { + NativeOpExecutioner::execReduceFloatScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceSame( @@ -346,18 +385,22 @@ void execReduceSame( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - NativeOpExecutioner::execReduceSameScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo); - + try { + NativeOpExecutioner::execReduceSameScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceBool( @@ -368,19 +411,22 @@ void execReduceBool( void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - NativeOpExecutioner::execReduceBoolScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo); - + try { + NativeOpExecutioner::execReduceBoolScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceLong( @@ -391,19 +437,22 @@ void execReduceLong( void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - NativeOpExecutioner::execReduceLongScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo); - + try { + NativeOpExecutioner::execReduceLongScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -424,28 +473,34 @@ void execReduceFloat2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPackX.primaryShapeInfo(); - auto hTADOffsets = tadPackX.primaryOffsets(); + auto hTADShapeInfo = tadPackX.primaryShapeInfo(); + auto hTADOffsets = tadPackX.primaryOffsets(); - NativeOpExecutioner::execReduceFloat(nullptr, opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - hTADShapeInfo, - hTADOffsets); + NativeOpExecutioner::execReduceFloat(nullptr, opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + hTADShapeInfo, + hTADOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceBool2(Nd4jPointer *extraPointers, @@ -457,28 +512,34 @@ void execReduceBool2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPack.primaryShapeInfo(); - auto hTADOffsets = tadPack.primaryOffsets(); + auto hTADShapeInfo = tadPack.primaryShapeInfo(); + auto hTADOffsets = tadPack.primaryOffsets(); - NativeOpExecutioner::execReduceBool(nullptr, opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - hTADShapeInfo, - hTADOffsets); + NativeOpExecutioner::execReduceBool(nullptr, opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + hTADShapeInfo, + hTADOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceSame2(Nd4jPointer *extraPointers, @@ -490,28 +551,34 @@ void execReduceSame2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPack.primaryShapeInfo(); - auto hTADOffsets = tadPack.primaryOffsets(); + auto hTADShapeInfo = tadPack.primaryShapeInfo(); + auto hTADOffsets = tadPack.primaryOffsets(); - NativeOpExecutioner::execReduceSame(nullptr, opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - hTADShapeInfo, - hTADOffsets); + NativeOpExecutioner::execReduceSame(nullptr, opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + hTADShapeInfo, + hTADOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduceLong2(Nd4jPointer *extraPointers, @@ -523,28 +590,34 @@ void execReduceLong2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPack.primaryShapeInfo(); - auto hTADOffsets = tadPack.primaryOffsets(); + auto hTADShapeInfo = tadPack.primaryShapeInfo(); + auto hTADOffsets = tadPack.primaryOffsets(); - NativeOpExecutioner::execReduceLong(nullptr, opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - hTADShapeInfo, - hTADOffsets); + NativeOpExecutioner::execReduceLong(nullptr, opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + hTADShapeInfo, + hTADOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -567,8 +640,13 @@ void execReduce3(Nd4jPointer *extraPointers, void *dY, Nd4jLong *dYShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - NativeOpExecutioner::execReduce3(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + NativeOpExecutioner::execReduce3(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, + dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -588,8 +666,13 @@ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, void *dY, Nd4jLong *dYShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - NativeOpExecutioner::execReduce3Scalar(nullptr, opNum,hX,hXShapeInfo,dX, dXShapeInfo,extraParams,hY,hYShapeInfo,dY,dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + NativeOpExecutioner::execReduce3Scalar(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, + hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** * @@ -617,19 +700,31 @@ void execReduce3Tad(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - if (extraPointers == nullptr || extraPointers[2] == 0) { - NativeOpExecutioner::execReduce3(LaunchContext::defaultContext(), opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); - } else { - // going tad-way - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + if (extraPointers == nullptr || extraPointers[2] == 0) { + NativeOpExecutioner::execReduce3(LaunchContext::defaultContext(), opNum, hX, hXShapeInfo, dX, dXShapeInfo, + extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, + yTadOnlyShapeInfo, yTadOffsets); + } else { + // going tad-way + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, + dimensionLength); - auto hTADShapeInfo = tadPack.primaryShapeInfo(); - auto hTADOffsets = tadPack.primaryOffsets(); + auto hTADShapeInfo = tadPack.primaryShapeInfo(); + auto hTADOffsets = tadPack.primaryOffsets(); - NativeOpExecutioner::execReduce3TAD(LaunchContext::defaultContext(), opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, hTADShapeInfo, hTADOffsets, nullptr, nullptr); + NativeOpExecutioner::execReduce3TAD(LaunchContext::defaultContext(), opNum, hX, hXShapeInfo, dX, + dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, + hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, hTADShapeInfo, + hTADOffsets, nullptr, nullptr); + } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } } @@ -654,36 +749,9 @@ void execScalar( void *hScalar, Nd4jLong *hScalarShapeInfo, void *dScalar, Nd4jLong *dScalarShapeInfo, void *extraParams) { - NativeOpExecutioner::execScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - hScalar, - hScalarShapeInfo, - dScalar, - dScalarShapeInfo, - extraParams); -} - -void execScalarBool( - Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hScalarShapeInfo, - void *dScalar, Nd4jLong *dScalarShapeInfo, - void *extraParams) { - - NativeOpExecutioner::execScalarBool(nullptr, - opNum, + try { + NativeOpExecutioner::execScalar(nullptr, + opNum, hX, hXShapeInfo, dX, @@ -696,7 +764,43 @@ void execScalarBool( hScalarShapeInfo, dScalar, dScalarShapeInfo, - extraParams); + extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } +} + +void execScalarBool( + Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hScalar, Nd4jLong *hScalarShapeInfo, + void *dScalar, Nd4jLong *dScalarShapeInfo, + void *extraParams) { + try { + NativeOpExecutioner::execScalarBool(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + hScalar, + hScalarShapeInfo, + dScalar, + dScalarShapeInfo, + extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -714,18 +818,23 @@ void execSummaryStatsScalar(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, bool biasCorrected) { - NativeOpExecutioner::execSummaryStatsScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - biasCorrected); + try { + NativeOpExecutioner::execSummaryStatsScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** * @@ -744,18 +853,23 @@ void execSummaryStats(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, bool biasCorrected) { - NativeOpExecutioner::execSummaryStats(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - biasCorrected); + try { + NativeOpExecutioner::execSummaryStats(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** * @@ -779,27 +893,31 @@ void execSummaryStatsTad(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, bool biasCorrected, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - NativeOpExecutioner::execSummaryStats(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - dimension, - dimensionLength, - tadShapeInfo, - tadOffsets, - biasCorrected); - + NativeOpExecutioner::execSummaryStats(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + dimension, + dimensionLength, + tadShapeInfo, + tadOffsets, + biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -820,20 +938,24 @@ void execTransformFloat( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - NativeOpExecutioner::execTransformFloat(nullptr, - opNum, - hX, - hXShapeInfo, - dZ, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams, - nullptr, - nullptr); + try { + NativeOpExecutioner::execTransformFloat(nullptr, + opNum, + hX, + hXShapeInfo, + dZ, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams, + nullptr, + nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execTransformSame( @@ -844,20 +966,24 @@ void execTransformSame( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - NativeOpExecutioner::execTransformSame(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams, - nullptr, - nullptr); + try { + NativeOpExecutioner::execTransformSame(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams, + nullptr, + nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execTransformBool( @@ -868,20 +994,24 @@ void execTransformBool( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - NativeOpExecutioner::execTransformBool(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams, - nullptr, - nullptr); + try { + NativeOpExecutioner::execTransformBool(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams, + nullptr, + nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execTransformAny( @@ -892,20 +1022,24 @@ void execTransformAny( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - NativeOpExecutioner::execTransformAny(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams, - nullptr, - nullptr); + try { + NativeOpExecutioner::execTransformAny(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams, + nullptr, + nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execTransformStrict( @@ -916,20 +1050,24 @@ void execTransformStrict( void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - NativeOpExecutioner::execTransformStrict(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - extraParams, - nullptr, - nullptr); + try { + NativeOpExecutioner::execTransformStrict(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + extraParams, + nullptr, + nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execReduce3All(Nd4jPointer *extraPointers, @@ -948,158 +1086,18 @@ void execReduce3All(Nd4jPointer *extraPointers, Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets) { - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - NativeOpExecutioner::execReduce3All(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParamsVals, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets); -} - - -template -void flattenGeneric(Nd4jPointer *extraPointers, - int offset, - char order, - void *vresult, - Nd4jLong *hZShapeInfo, - void *vinput, - Nd4jLong *inputShapeInfo) { - - auto hZ = reinterpret_cast(vresult); - auto input = reinterpret_cast(vinput); - - int numOnes = 0; - auto shape = shape::shapeOf(inputShapeInfo); - int wholeRank = shape::rank(inputShapeInfo); - for(int i = 0; i < wholeRank; i++) { - if(shape[i] == 1) - numOnes++; + NativeOpExecutioner::execReduce3All(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParamsVals, hY, + hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, + dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } - - - - //start at the given offset - hZ += offset; - char inputOrder = shape::order(inputShapeInfo); - auto len = shape::length(inputShapeInfo); - auto resultEleStride = shape::elementWiseStride(hZShapeInfo); - auto inputEleStride = shape::elementWiseStride(inputShapeInfo); - Nd4jLong numTads, stride; - int dimension, dimensionLength; - int rank = shape::rank(inputShapeInfo); - auto xStride = shape::stride(inputShapeInfo); - auto xShape = shape::shapeOf(inputShapeInfo); - - dimensionLength = 1; - if(order == 'f') { - dimension = 0; - } - else { - dimension = rank - 1; - } - stride = xStride[dimension]; - // numTads is product of length of all dimensions excluding - // the one we do the tad on - numTads = 1; - for (int i = 0; i < rank; i++) { - if (i != dimension) - numTads *= xShape[i]; - } - - if (inputOrder == order) { - if (resultEleStride == 1 && inputEleStride == 1) { - memcpy(hZ, input, len* sizeof(T)); - } - else if (resultEleStride >= 1 && inputEleStride >= 1) { - if (len < ELEMENT_THRESHOLD) { - - PRAGMA_OMP_SIMD - for (Nd4jLong i = 0; i < len; i++) { - hZ[i * resultEleStride] = input[i * inputEleStride]; - } - } - else { - - PRAGMA_OMP_PARALLEL_FOR_SIMD - for (Nd4jLong i = 0; i < len; i++) { - hZ[i * resultEleStride] = input[i * inputEleStride]; - } - } - } - else { - int idx = 0; - for(Nd4jLong i = 0; i < len; i++) - hZ[idx++] = input[shape::getIndexOffset(i, inputShapeInfo, len)]; - } - } - else { - int rank = shape::rank(inputShapeInfo); - auto xShape = shape::shapeOf(inputShapeInfo); - auto tadShape = xShape[dimension]; - - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(inputShapeInfo, dimension); - - PRAGMA_OMP_PARALLEL_FOR - for(int i = 0; i < numTads; i++) { - - Nd4jLong resultOffset; - - if (order == 'f') { - // 1. get c ordering coordinates - auto cIndexCoordinates = new Nd4jLong[rank - 1]; - Nd4jLong divisor = 1; - for (int dim = rank - 1; dim > 0; dim--) { - cIndexCoordinates[dim - 1] = (i / divisor) % xShape[dim]; - divisor *= xShape[dim]; - } - - - // 2. convert to f ordering index - int fIndex = 0; - Nd4jLong multiplier = 1; - for (int dim = 1; dim <= rank - 1; dim++) { - fIndex += cIndexCoordinates[dim - 1] * multiplier; - multiplier *= xShape[dim]; - } - - resultOffset = fIndex * tadShape; - delete[] cIndexCoordinates; - - } - else { - resultOffset = i * tadShape; - } - - auto tadOffset = tadPack.primaryOffsets()[i]; - for( int j = 0; j < tadShape; j++) { - - // TAD are returned in C ordering always - hZ[resultOffset + j] = input[tadOffset + j * stride]; - - } - } - } -} - - -/** - * Concatneate multi array of the same shape together - * along a particular dimension - */ -void concat( - Nd4jPointer *extraPointers, - int dimension, - int numArrays, - Nd4jPointer *data, Nd4jPointer *inputShapeInfo, - Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - Nd4jPointer *tadPointers, - Nd4jPointer *offsetPointers) { - - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - - BUILD_SINGLE_SELECTOR(zType, nd4j::SpecialMethods, ::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, hZ, hZShapeInfo), LIBND4J_TYPES); } /** @@ -1116,39 +1114,14 @@ void specialConcat( Nd4jLong *hZShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { + try { + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - - BUILD_SINGLE_SELECTOR(zType, nd4j::SpecialMethods, ::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, hZ, hZShapeInfo), LIBND4J_TYPES); -} - -/** -* Append an input array -* to the end of a flat array -* in a particular order -* @param offset the offset of the array to start at -* @param order the order -* @param hZ the hZ array -* @param hZShapeInfo the shape info for te array -* @param input the input for the array -* @param inputShapeInfo the shape information for that array -*/ -void flatten( - Nd4jPointer *extraPointers, - int offset, - char order, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *input, Nd4jLong *inputShapeInfo, - void *dinput, Nd4jLong *dinputShapeInfo) { - - auto xType = nd4j::ArrayOptions::dataType(inputShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - - if (xType != zType) - throw std::runtime_error("NativeOps::flatten requires all operands to have same data type"); - - BUILD_SINGLE_SELECTOR(xType, flattenGeneric, (extraPointers, offset, order, hZ, hZShapeInfo, input, inputShapeInfo), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(zType, nd4j::SpecialMethods,::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, hZ, hZShapeInfo), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -1324,7 +1297,13 @@ void setGridLimit(int gridSize) { nd4j::TadPack* tadOnlyShapeInfo(Nd4jLong *hXShapeInfo, int *dimension, int dimensionLength) { auto pack = new TadPack(); - *pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + try { + *pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } + return pack; } @@ -1421,9 +1400,14 @@ void pullRows(Nd4jPointer *extraPointers, Nd4jLong *tadOffsets, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + try { + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, pullRowsGeneric, (hX, hXShapeInfo, hZ, hZShapeInfo, n, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, pullRowsGeneric, (hX, hXShapeInfo, hZ, hZShapeInfo, n, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } template @@ -1474,9 +1458,14 @@ void tear(Nd4jPointer *extraPointers, Nd4jLong *hZShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + try { + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, tearGeneric, (hX, hXShapeInfo, targets, hZShapeInfo, tadShapeInfo, tadOffsets), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, tearGeneric, (hX, hXShapeInfo, targets, hZShapeInfo, tadShapeInfo, tadOffsets), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1488,9 +1477,14 @@ void average(Nd4jPointer *extras, int n, Nd4jLong length, bool propagate) { - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + try { + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::averageGeneric(hX, z, hZShapeInfo, n, length, propagate), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::averageGeneric(hX, z, hZShapeInfo, n, length, propagate), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void accumulate(Nd4jPointer *extras, @@ -1500,10 +1494,14 @@ void accumulate(Nd4jPointer *extras, void *dz, Nd4jLong *dZShapeInfo, int n, Nd4jLong length) { + try { + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - - BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::accumulateGeneric(hX, hz, hZShapeInfo, n, length), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::accumulateGeneric(hX, hz, hZShapeInfo, n, length), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void enableP2P(bool enable) { @@ -1613,14 +1611,20 @@ void shuffle(Nd4jPointer *extras, int *shuffleMap, Nd4jPointer *tadShapeInfo, Nd4jPointer *tadOffsets) { - auto xShape = reinterpret_cast(hXShapeInfo); - auto zShape = reinterpret_cast(hZShapeInfo); - auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); - auto tadOffset = reinterpret_cast(tadOffsets); + try { + auto xShape = reinterpret_cast(hXShapeInfo); + auto zShape = reinterpret_cast(hZShapeInfo); + auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); + auto tadOffset = reinterpret_cast(tadOffsets); - auto xType = nd4j::ArrayOptions::dataType(xShape[0]); + auto xType = nd4j::ArrayOptions::dataType(xShape[0]); - BUILD_SINGLE_SELECTOR(xType, shuffleGeneric, (hX, xShape, hz, zShape, N, shuffleMap, tadOnlyShapeInfo, tadOffset), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR(xType, shuffleGeneric, + (hX, xShape, hz, zShape, N, shuffleMap, tadOnlyShapeInfo, tadOffset), LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1633,27 +1637,6 @@ void setOmpMinThreads(int threads) { // TODO: to be implemented } -/* -void execMetaPredicateShape(Nd4jPointer *extras, - const int opTypeA, - const int opNumA, - const int opTypeB, - const int opNumB, - Nd4jLong N, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraA, - void *extraB, - double scalarA, - double scalarB) { - // no-op; -} -*/ - int getDevice() { return 0; } @@ -1671,31 +1654,35 @@ void execScalarTad(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); - - NativeOpExecutioner::execScalar(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - hScalars, - hScalarShapeInfo, - dScalars, - dScalarShapeInfo, - dimension, - shape::length(hDimensionShape), - tadShapeInfo, - tadOffsets, - tadShapeInfoZ, - tadOffsetsZ); + NativeOpExecutioner::execScalar(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + hScalars, + hScalarShapeInfo, + dScalars, + dScalarShapeInfo, + dimension, + shape::length(hDimensionShape), + tadShapeInfo, + tadOffsets, + tadShapeInfoZ, + tadOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execScalarBoolTad(Nd4jPointer *extraPointers, @@ -1711,44 +1698,53 @@ void execScalarBoolTad(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { + try { + auto dimension = reinterpret_cast(hDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto dimension = reinterpret_cast(hDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); - - NativeOpExecutioner::execScalarBool(nullptr, - opNum, - hX, - hXShapeInfo, - dX, - dXShapeInfo, - extraParams, - hZ, - hZShapeInfo, - dZ, - dZShapeInfo, - hScalars, - hScalarShapeInfo, - dScalars, - dScalarShapeInfo, - dimension, - dimensionLength, - tadShapeInfo, - tadOffsets, - tadShapeInfoZ, - tadOffsetsZ); + NativeOpExecutioner::execScalarBool(nullptr, + opNum, + hX, + hXShapeInfo, + dX, + dXShapeInfo, + extraParams, + hZ, + hZShapeInfo, + dZ, + dZShapeInfo, + hScalars, + hScalarShapeInfo, + dScalars, + dScalarShapeInfo, + dimension, + dimensionLength, + tadShapeInfo, + tadOffsets, + tadShapeInfoZ, + tadOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } const char * getDeviceName(int deviceId) { - if (!nameSet) { - name = reinterpret_cast(malloc(256 * sizeof(char))); + try { + if (!nameSet) { + name = reinterpret_cast(malloc(256 * sizeof(char))); - CHECK_ALLOC(name, "Failed to allocate new string buffer", 256); + CHECK_ALLOC(name, "Failed to allocate new string buffer", 256); - std::memset(name, 0, 256 * sizeof(char)); - nameSet = true; + std::memset(name, 0, 256 * sizeof(char)); + nameSet = true; - // TODO: provide proper CPU model name here - sprintf(name, "x86-compatible CPU"); + // TODO: provide proper CPU model name here + sprintf(name, "x86-compatible CPU"); + } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } @@ -1768,8 +1764,12 @@ void execAggregate(Nd4jPointer *extraPointers,int opNum, void *realArguments, int numRealArguments, nd4j::DataType dtype) { - - BUILD_SINGLE_SELECTOR(dtype, NativeOpExecutioner::execAggregate, (nullptr, opNum, arguments, numArguments, shapeArguments, numShapeArguments, indexArguments, numIndexArguments, intArrays, numIntArrays, realArguments, numRealArguments), FLOAT_TYPES); + try { + BUILD_SINGLE_SELECTOR(dtype, NativeOpExecutioner::execAggregate, (nullptr, opNum, arguments, numArguments, shapeArguments, numShapeArguments, indexArguments, numIndexArguments, intArrays, numIntArrays, realArguments, numRealArguments), FLOAT_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1841,7 +1841,12 @@ void batchExecutor(Nd4jPointer *extraPointers, int maxReals, void *ptrToArguments, nd4j::DataType dtype) { - BUILD_SINGLE_SELECTOR(dtype, _batchExecutor, (extraPointers, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments, dtype), FLOAT_TYPES); + try { + BUILD_SINGLE_SELECTOR(dtype, _batchExecutor, (extraPointers, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments, dtype), FLOAT_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execAggregateBatch(Nd4jPointer *extraPointers, @@ -1855,7 +1860,12 @@ void execAggregateBatch(Nd4jPointer *extraPointers, int maxReals, void *ptrToArguments, nd4j::DataType dtype) { - BUILD_SINGLE_SELECTOR(dtype, _batchExecutor, (extraPointers, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments, dtype), FLOAT_TYPES); + try { + BUILD_SINGLE_SELECTOR(dtype, _batchExecutor, (extraPointers, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments, dtype), FLOAT_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1865,7 +1875,12 @@ void execRandom(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - NativeOpExecutioner::execRandom(nullptr, opNum, state, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + NativeOpExecutioner::execRandom(nullptr, opNum, state, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execRandom3(Nd4jPointer *extraPointers, @@ -1878,8 +1893,12 @@ void execRandom3(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - - NativeOpExecutioner::execRandom(nullptr, opNum, state, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + NativeOpExecutioner::execRandom(nullptr, opNum, state, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execRandom2(Nd4jPointer *extraPointers, @@ -1890,19 +1909,25 @@ void execRandom2(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - - NativeOpExecutioner::execRandom(nullptr, opNum, state, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + NativeOpExecutioner::execRandom(nullptr, opNum, state, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } Nd4jPointer initRandom(Nd4jPointer *extraPointers, long seed, long bufferSize, Nd4jPointer ptrToBuffer) { - graph::RandomGenerator* generator = new graph::RandomGenerator(seed, seed); -// auto ptrBuf = reinterpret_cast(ptrToBuffer); -// auto buffer = new nd4j::random::RandomBuffer(seed, bufferSize, reinterpret_cast(ptrBuf)); -// -// nd4j::random::Xoroshiro128 generator(buffer); -// generator.refreshBuffer(); -// - return (Nd4jPointer) generator; + try { + auto generator = new graph::RandomGenerator(seed, seed); + + return (Nd4jPointer) generator; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + + return nullptr; + } } void refreshBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer ptrRandom) { @@ -1953,7 +1978,12 @@ void sort(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, bool descending) { - NativeOpExecutioner::execSort(hX, hXShapeInfo, descending); + try { + NativeOpExecutioner::execSort(hX, hXShapeInfo, descending); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortTad(Nd4jPointer *extraPointers, @@ -1964,7 +1994,12 @@ void sortTad(Nd4jPointer *extraPointers, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending) { - NativeOpExecutioner::execSort(hX, hXShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); + try { + NativeOpExecutioner::execSort(hX, hXShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortCooIndices(Nd4jPointer *extraPointers, @@ -1972,7 +2007,12 @@ void sortCooIndices(Nd4jPointer *extraPointers, void *values, Nd4jLong length, int rank) { - NativeOpExecutioner::execSortCooIndices(indices, values, length, rank); + try { + NativeOpExecutioner::execSortCooIndices(indices, values, length, rank); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { @@ -1983,7 +2023,7 @@ Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInf Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length) { auto hZ = new Nd4jLong[2];errno = 0; - +try { #if defined(_WIN32) || defined(_WIN64) _mmap(hZ, static_cast(length), fileName); #else @@ -1992,7 +2032,7 @@ Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong le nd4j_printf("Errno: %i\n", errno); throw std::runtime_error("Failed to open file for MMAP"); } - void * ptr = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + void *ptr = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); // check for failed allocation if (ptr == MAP_FAILED) @@ -2004,7 +2044,11 @@ Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong le #endif return hZ; - +} catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; +} } void munmapFile(Nd4jPointer *extraPointers, Nd4jLong *ptrMap, Nd4jLong length) { @@ -2019,7 +2063,13 @@ void munmapFile(Nd4jPointer *extraPointers, Nd4jLong *ptrMap, Nd4jLong length) { } nd4j::graph::ResultWrapper* executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer) { - return nd4j::graph::GraphExecutioner::executeFlatBuffer(flatBufferPointer); + try { + return nd4j::graph::GraphExecutioner::executeFlatBuffer(flatBufferPointer); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getResultWrapperSize(nd4j::graph::ResultWrapper* ptr) { @@ -2061,8 +2111,14 @@ FORCEINLINE int estimateThresholdGeneric(Nd4jPointer *extraPointers, Nd4jPointer int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer hX, Nd4jLong *hXShapeInfo, int N, float threshold) { - auto xType = ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, return estimateThresholdGeneric, (extraPointers, hX, N, threshold), FLOAT_TYPES); + try { + auto xType = ArrayOptions::dataType(hXShapeInfo); + BUILD_SINGLE_SELECTOR(xType, return estimateThresholdGeneric, (extraPointers, hX, N, threshold), FLOAT_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 0; + } } Nd4jLong getShapeListSize(nd4j::ShapeList* list) { @@ -2122,9 +2178,15 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D } nd4j::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs); + return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::DeclarableOp *op, Nd4jPointer* inputShapes, int numInputShapes, double *tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { @@ -2147,16 +2209,28 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D } nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); + return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - auto context = reinterpret_cast(opContext); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + auto context = reinterpret_cast(opContext); - return op->execute(context); + return op->execute(context); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 20; + } } Nd4jStatus realExec(nd4j::ops::DeclarableOp* op, Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { @@ -2234,34 +2308,6 @@ Nd4jStatus realExec(nd4j::ops::DeclarableOp* op, Nd4jPointer* extraPointers, Nd4 outputs[e]->streamline(shape::order(reinterpret_cast(outputShapes[e]))); } -/* - if (!isInplace) { - if (hZ->size() != numOutputs) { - return ND4J_STATUS_BAD_OUTPUT; - } - - for (int e = 0; e < numOutputs; e++) { - auto buffer = (T *) outputBuffers[e]; - auto shape = (int *) outputShapes[e]; - nd4j::NDArray tmp(buffer, shape); - - if (tmp.lengthOf() != hZ->at(e)->lengthOf()) { - nd4j_printf("Provided output array for [%s] has length of %i, but actual hZ has length of %i\n", op->getOpName()->c_str(), tmp.lengthOf(), hZ->at(e)->lengthOf()); - return ND4J_STATUS_BAD_OUTPUT; - } - - tmp.assign(hZ->at(e)); - } - } else { - // if op is inplace, our ResultSet holds pointers - hZ->purge(); - } - - - delete hZ; - -*/ - for (auto v: inputs) delete v; @@ -2273,16 +2319,28 @@ Nd4jStatus realExec(nd4j::ops::DeclarableOp* op, Nd4jPointer* extraPointers, Nd4 int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer) { - auto graph = nd4j::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); + try { + auto graph = nd4j::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); - nd4j::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); + nd4j::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); - return ND4J_STATUS_OK; + return ND4J_STATUS_OK; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } static VariablesSet* executeStoredGraphT(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) { @@ -2478,7 +2536,13 @@ Nd4jStatus execCustomOpWithScope_(Nd4jPointer *extraPointers, nd4j::graph::Graph } Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs) { - return execCustomOpWithScope_(extraPointers, reinterpret_cast(state), opHash, scopes, numScopes, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs); + try { + return execCustomOpWithScope_(extraPointers, reinterpret_cast(state), opHash, scopes, numScopes, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } void deleteResultWrapper(Nd4jPointer ptr) { @@ -2704,73 +2768,98 @@ void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, int* hIindexes, int* dIindexes) { + try { - int numThreads = omp_get_max_threads(); + int numThreads = omp_get_max_threads(); - PRAGMA_OMP_PARALLEL_THREADS(numThreads) - { - for (int i = 0; i < numOfSubArrs; ++i) { + PRAGMA_OMP_PARALLEL_THREADS(numThreads) + { + for (int i = 0; i < numOfSubArrs; ++i) { - int threadIndex = omp_get_thread_num(); - const auto xIndex = hIindexes[i]; - const bool isOwner = xIndex < numThreads ? threadIndex == xIndex : threadIndex == xIndex % numThreads; + int threadIndex = omp_get_thread_num(); + const auto xIndex = hIindexes[i]; + const bool isOwner = xIndex < numThreads ? threadIndex == xIndex : threadIndex == xIndex % numThreads; - if (!isOwner) - continue; - - NDArray inSubArr(reinterpret_cast(hX) + (hXOffsets[hIindexes[i]] * DataTypeUtils::sizeOf(hXShapeInfo)), hXShapeInfo); - NDArray updSubArr(reinterpret_cast(hY) + (hYOffsets[i] * DataTypeUtils::sizeOf(hXShapeInfo)), hYShapeInfo); - - if (inSubArr.lengthOf() != updSubArr.lengthOf()) { - continue; - } - - switch (opCode) { - case 0: - inSubArr.applyPairwiseTransform(pairwise::Add, &updSubArr, &inSubArr, nullptr); - break; - case 1: - inSubArr.applyPairwiseTransform(pairwise::Subtract, &updSubArr, &inSubArr, nullptr); - break; - case 2: - inSubArr.applyPairwiseTransform(pairwise::Multiply, &updSubArr, &inSubArr, nullptr); - break; - case 3: - inSubArr.applyPairwiseTransform(pairwise::Divide, &updSubArr, &inSubArr, nullptr); - break; - case 4: - inSubArr.applyPairwiseTransform(pairwise::ReverseSubtract, &updSubArr, &inSubArr, nullptr); - break; - case 5: - inSubArr.applyPairwiseTransform(pairwise::ReverseDivide, &updSubArr, &inSubArr, nullptr); - break; - case 6: - inSubArr.applyPairwiseTransform(pairwise::CopyPws, &updSubArr, &inSubArr, nullptr); - break; - default: + if (!isOwner) continue; + + NDArray inSubArr( + reinterpret_cast(hX) + (hXOffsets[hIindexes[i]] * DataTypeUtils::sizeOf(hXShapeInfo)), + hXShapeInfo); + NDArray updSubArr(reinterpret_cast(hY) + (hYOffsets[i] * DataTypeUtils::sizeOf(hXShapeInfo)), + hYShapeInfo); + + if (inSubArr.lengthOf() != updSubArr.lengthOf()) { + continue; + } + + switch (opCode) { + case 0: + inSubArr.applyPairwiseTransform(pairwise::Add, &updSubArr, &inSubArr, nullptr); + break; + case 1: + inSubArr.applyPairwiseTransform(pairwise::Subtract, &updSubArr, &inSubArr, nullptr); + break; + case 2: + inSubArr.applyPairwiseTransform(pairwise::Multiply, &updSubArr, &inSubArr, nullptr); + break; + case 3: + inSubArr.applyPairwiseTransform(pairwise::Divide, &updSubArr, &inSubArr, nullptr); + break; + case 4: + inSubArr.applyPairwiseTransform(pairwise::ReverseSubtract, &updSubArr, &inSubArr, nullptr); + break; + case 5: + inSubArr.applyPairwiseTransform(pairwise::ReverseDivide, &updSubArr, &inSubArr, nullptr); + break; + case 6: + inSubArr.applyPairwiseTransform(pairwise::CopyPws, &updSubArr, &inSubArr, nullptr); + break; + default: + continue; + } } } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } } void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo) { - auto p = reinterpret_cast(debugInfo); - NDArray array(buffer, shapeInfo); - nd4j::DebugHelper::retrieveDebugStatistics(p, &array); + try { + auto p = reinterpret_cast(debugInfo); + NDArray array(buffer, shapeInfo); + nd4j::DebugHelper::retrieveDebugStatistics(p, &array); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { - auto buf = reinterpret_cast(p); - int cnt = 0; - for (int i = 0; i < len; i++) - cnt += buf[cnt]; + try { + auto buf = reinterpret_cast(p); + int cnt = 0; + for (int i = 0; i < len; i++) + cnt += buf[cnt]; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } nd4j::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty) { - auto buffer = new ConstantDataBuffer(); - *buffer = nd4j::ConstantShapeHelper::getInstance()->bufferForShapeInfo(ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); - return buffer; + try { + auto buffer = new ConstantDataBuffer(); + *buffer = nd4j::ConstantShapeHelper::getInstance()->bufferForShapeInfo( + ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); + return buffer; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } void deleteShapeBuffer(nd4j::ConstantDataBuffer* ptr) { @@ -2790,7 +2879,13 @@ nd4j::ConstantDataBuffer* constantBufferDouble(nd4j::DataType dtype, double *dat } nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor) { - return nd4j::ConstantHelper::getInstance()->constantBuffer(*descriptor, dtype); + try { + return nd4j::ConstantHelper::getInstance()->constantBuffer(*descriptor, dtype); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jPointer getConstantDataBufferPrimary(nd4j::ConstantDataBuffer* dbf) { @@ -2808,7 +2903,13 @@ Nd4jLong getConstantDataBufferSizeOf(nd4j::ConstantDataBuffer* dbf) { nd4j::graph::Context* createGraphContext(int nodeId) { - return new nd4j::graph::Context(nodeId); + try { + return new nd4j::graph::Context(nodeId); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } nd4j::graph::RandomGenerator* getGraphContextRandomGenerator(nd4j::graph::Context* ptr) { return &ptr->randomGenerator(); @@ -2872,32 +2973,38 @@ int dataTypeFromNpyHeader(void *header) { } Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { - cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); - unsigned int shapeSize = arr.shape.size(); - std::vector shape(shapeSize); - bool _empty = false; - for(unsigned int i = 0; i < shapeSize; i++) { - shape[i] = arr.shape[i]; + try { + cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); + unsigned int shapeSize = arr.shape.size(); + std::vector shape(shapeSize); + bool _empty = false; + for (unsigned int i = 0; i < shapeSize; i++) { + shape[i] = arr.shape[i]; - if (arr.shape[i] == 0) - _empty = true; + if (arr.shape[i] == 0) + _empty = true; + } + + auto dtype = cnpy::dataTypeFromHeader(reinterpret_cast(npyArray)); + + Nd4jLong *shapeBuffer; + if (shape.size() == 1 && shape[0] == 0) { + // scalar case + shapeBuffer = nd4j::ShapeBuilders::createScalarShapeInfo(dtype); + } else if (_empty) { + if (shapeSize > 0) + shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); + else + shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype); + } else { + shapeBuffer = nd4j::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); + } + return reinterpret_cast(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; } - - auto dtype = cnpy::dataTypeFromHeader(reinterpret_cast(npyArray)); - - Nd4jLong *shapeBuffer; - if (shape.size() == 1 && shape[0] == 0) { - // scalar case - shapeBuffer = nd4j::ShapeBuilders::createScalarShapeInfo(dtype); - } else if (_empty) { - if (shapeSize > 0) - shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); - else - shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype); - } else { - shapeBuffer = nd4j::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); - } - return reinterpret_cast(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); } void sortByKey(Nd4jPointer *extraPointers, @@ -2906,10 +3013,15 @@ void sortByKey(Nd4jPointer *extraPointers, void *y, Nd4jLong *yShapeInfo, void *dy, Nd4jLong *dyShapeInfo, bool descending) { - auto xType = ArrayOptions::dataType(xShapeInfo); - auto yType = ArrayOptions::dataType(yShapeInfo); + try { + auto xType = ArrayOptions::dataType(xShapeInfo); + auto yType = ArrayOptions::dataType(yShapeInfo); - BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortByKey(x, xShapeInfo, y, yShapeInfo, descending), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortByKey(x, xShapeInfo, y, yShapeInfo, descending), LIBND4J_TYPES, LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortByValue(Nd4jPointer *extraPointers, @@ -2918,11 +3030,15 @@ void sortByValue(Nd4jPointer *extraPointers, void *y, Nd4jLong *yShapeInfo, void *dy, Nd4jLong *dyShapeInfo, bool descending) { + try { + auto xType = ArrayOptions::dataType(xShapeInfo); + auto yType = ArrayOptions::dataType(yShapeInfo); - auto xType = ArrayOptions::dataType(xShapeInfo); - auto yType = ArrayOptions::dataType(yShapeInfo); - - BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortByValue(x, xShapeInfo, y, yShapeInfo, descending), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortByValue(x, xShapeInfo, y, yShapeInfo, descending), LIBND4J_TYPES, LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortTadByKey(Nd4jPointer *extraPointers, @@ -2933,10 +3049,15 @@ void sortTadByKey(Nd4jPointer *extraPointers, int *dimension, int dimensionLength, bool descending) { - auto xType = ArrayOptions::dataType(xShapeInfo); - auto yType = ArrayOptions::dataType(yShapeInfo); + try { + auto xType = ArrayOptions::dataType(xShapeInfo); + auto yType = ArrayOptions::dataType(yShapeInfo); - BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByKey(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByKey(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortTadByValue(Nd4jPointer *extraPointers, @@ -2947,24 +3068,35 @@ void sortTadByValue(Nd4jPointer *extraPointers, int *dimension, int dimensionLength, bool descending) { - auto xType = ArrayOptions::dataType(xShapeInfo); - auto yType = ArrayOptions::dataType(yShapeInfo); + try { + auto xType = ArrayOptions::dataType(xShapeInfo); + auto yType = ArrayOptions::dataType(yShapeInfo); - BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByValue(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByValue(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } const char* runLightBenchmarkSuit(bool printOut) { - nd4j::LightBenchmarkSuit suit; - auto result = suit.runSuit(); + try { + nd4j::LightBenchmarkSuit suit; + auto result = suit.runSuit(); - if (printOut) - nd4j_printf("%s\n", result.data()); + if (printOut) + nd4j_printf("%s\n", result.data()); - auto chars = new char[result.length()+1]; - std::memcpy(chars, result.data(), result.length()); - chars[result.length()] = (char) 0x0; + auto chars = new char[result.length() + 1]; + std::memcpy(chars, result.data(), result.length()); + chars[result.length()] = (char) 0x0; - return chars; + return chars; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getCachedMemory(int deviceId) { @@ -2972,17 +3104,23 @@ Nd4jLong getCachedMemory(int deviceId) { } const char* runFullBenchmarkSuit(bool printOut) { - nd4j::FullBenchmarkSuit suit; - auto result = suit.runSuit(); + try { + nd4j::FullBenchmarkSuit suit; + auto result = suit.runSuit(); - if (printOut) - nd4j_printf("%s\n", result.data()); + if (printOut) + nd4j_printf("%s\n", result.data()); - auto chars = new char[result.length()+1]; - std::memcpy(chars, result.data(), result.length()); - chars[result.length()] = (char) 0x0; + auto chars = new char[result.length() + 1]; + std::memcpy(chars, result.data(), result.length()); + chars[result.length()] = (char) 0x0; - return chars; + return chars; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } nd4j::LaunchContext* defaultLaunchContext() { @@ -3017,8 +3155,14 @@ Nd4jPointer lcSolverHandle(OpaqueLaunchContext* lc) { return nullptr; } +int lastErrorCode() { + return nd4j::LaunchContext::defaultContext()->errorReference()->errorCode(); +} + +const char* lastErrorMessage() { + return nd4j::LaunchContext::defaultContext()->errorReference()->errorMessage(); +} -BUILD_SINGLE_TEMPLATE(template void flattenGeneric,(Nd4jPointer*, int, char, void*, Nd4jLong*, void*, Nd4jLong*), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void pullRowsGeneric, (void *, Nd4jLong*, void*, Nd4jLong*, const int, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*, Nd4jLong*), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void tearGeneric, (void *, Nd4jLong*, Nd4jPointer*, Nd4jLong*, Nd4jLong*, Nd4jLong*), LIBND4J_TYPES); BUILD_SINGLE_TEMPLATE(template void shuffleGeneric, (void**, Nd4jLong**, void**, Nd4jLong**, int, int*, Nd4jLong**, Nd4jLong**), LIBND4J_TYPES); diff --git a/libnd4j/blas/cuda/NativeOps.cu b/libnd4j/blas/cuda/NativeOps.cu index e75aa422c..626b0ea26 100755 --- a/libnd4j/blas/cuda/NativeOps.cu +++ b/libnd4j/blas/cuda/NativeOps.cu @@ -68,21 +68,6 @@ int minThreads = 32; __constant__ char deviceConstantMemory[49152]; -typedef struct { - long streamId; - long callId; -} __syncInfo; - -typedef __syncInfo SyncInfo; - - -// this method isn't used, left here for legacy and caution purposes -// TLDR: don't use this way, it sucks -void CUDART_CB syncCallback(cudaStream_t stream, cudaError_t status, void *data){ - SyncInfo *sync = reinterpret_cast(data); - - //printf("Finished stream: [%i], kernel call: [%i]\n", sync->streamId, sync->callId); -} // this method just does type conversion in fancy way int getDeviceId(Nd4jPointer ptrToDeviceId) { @@ -250,9 +235,14 @@ void execPairwiseTransform( Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execPairwiseTransform(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execPairwiseTransform(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -265,9 +255,14 @@ void execPairwiseTransformBool(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execPairwiseBoolTransform(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execPairwiseBoolTransform(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, + dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -279,9 +274,14 @@ void execSummaryStatsScalar(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, bool biasCorrected) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execSummaryStatsScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, biasCorrected); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execSummaryStatsScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo, biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -295,24 +295,30 @@ void execBroadcastBool(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { + try { + //Nd4jLong *tadOnlyShapeInfo = reinterpret_cast(extraPointers[0]); + //Nd4jLong *tadOffsets = reinterpret_cast(extraPointers[1]); + //Nd4jLong *tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[2]); + //Nd4jLong *tadOffsetsZ = reinterpret_cast(extraPointers[3]); - //Nd4jLong *tadOnlyShapeInfo = reinterpret_cast(extraPointers[0]); - //Nd4jLong *tadOffsets = reinterpret_cast(extraPointers[1]); - //Nd4jLong *tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[2]); - //Nd4jLong *tadOffsetsZ = reinterpret_cast(extraPointers[3]); + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); - - auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); - auto tadOnlyShapeInfo = reinterpret_cast(extraPointers[10]); - auto tadOffsets = reinterpret_cast(extraPointers[11]); - auto tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[12]); - auto tadOffsetsZ = reinterpret_cast(extraPointers[13]); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execBroadcastBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); + auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); + auto tadOnlyShapeInfo = reinterpret_cast(extraPointers[10]); + auto tadOffsets = reinterpret_cast(extraPointers[11]); + auto tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[12]); + auto tadOffsetsZ = reinterpret_cast(extraPointers[13]); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execBroadcastBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, + dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, + tadOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -338,38 +344,33 @@ void execBroadcast( void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { -/* - cudaEvent_t start; - cudaEventCreateWithFlags(&start, cudaEventDisableTiming); - timespec tsX; - timespec tsY; - clock_gettime(CLOCK_REALTIME, &tsX); -*/ - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); - auto tadOnlyShapeInfo = reinterpret_cast(extraPointers[10]); - auto tadOffsets = reinterpret_cast(extraPointers[11]); - auto tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[12]); - auto tadOffsetsZ = reinterpret_cast(extraPointers[13]); + auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); + auto tadOnlyShapeInfo = reinterpret_cast(extraPointers[10]); + auto tadOffsets = reinterpret_cast(extraPointers[11]); + auto tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[12]); + auto tadOffsetsZ = reinterpret_cast(extraPointers[13]); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(hYShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(hYShapeInfo); + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("F3 opNum:[%i]\n", opNum); + if (nd4j::Environment::getInstance()->isDebugAndVerbose()) + printf("F3 opNum:[%i]\n", opNum); - //Nd4jLong *tadOnlyShapeInfo = reinterpret_cast(extraPointers[0]); - //Nd4jLong *tadOffsets = reinterpret_cast(extraPointers[1]); - //Nd4jLong *tadOnlyShapeInfoZ = reinterpret_cast(extraPointers[2]); - //Nd4jLong *tadOffsetsZ = reinterpret_cast(extraPointers[3]); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execBroadcast(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execBroadcast(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, + tadOnlyShapeInfo, tadOffsets, tadOnlyShapeInfoZ, tadOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -390,9 +391,14 @@ void execReduceFloat(Nd4jPointer *extraPointers, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceFloatScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceFloatScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -403,9 +409,14 @@ void execReduceSame(Nd4jPointer *extraPointers, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceSameScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceSameScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -418,13 +429,22 @@ void execReduceSame2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceSame(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceSame(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), + tadPack.specialOffsets()); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -437,13 +457,22 @@ void execReduceLong2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceLong(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceLong(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), + tadPack.specialOffsets()); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -454,30 +483,37 @@ void execReduceLong(Nd4jPointer *extraPointers, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { + try { + auto stream = reinterpret_cast(extraPointers[1]); + auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); + auto dTADShapeInfo = reinterpret_cast(extraPointers[10]); - auto stream = reinterpret_cast(extraPointers[1]); - auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); - auto dTADShapeInfo = reinterpret_cast(extraPointers[10]); + if (nd4j::Environment::getInstance()->isDebugAndVerbose()) + printf("LF7 opNum:[%i]\n", opNum); - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("LF7 opNum:[%i]\n", opNum); + auto reductionPointer = reinterpret_cast(extraPointers[4]); - auto reductionPointer = reinterpret_cast(extraPointers[4]); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); + if (zType != nd4j::DataType::INT64) + throw datatype_exception::build("execReduceLong wrong Z data type", nd4j::DataType::INT64, zType); - if (zType != nd4j::DataType::INT64) - throw datatype_exception::build("execReduceLong wrong Z data type", nd4j::DataType::INT64, zType); + auto xLength = shape::length(hXShapeInfo); + auto blockWidth = 256; + auto numBlocks = CudaLaunchHelper::getReductionBlocks(xLength, blockWidth); + dim3 launchDims(numBlocks, blockWidth, 32768); - auto xLength = shape::length(hXShapeInfo); - auto blockWidth = 256; - auto numBlocks = CudaLaunchHelper::getReductionBlocks(xLength, blockWidth); - dim3 launchDims(numBlocks, blockWidth, 32768); + BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceLongFunction, + ::execReduceScalar(launchDims, stream, opNum, dX, dXShapeInfo, hXShapeInfo, extraParams, + dZ, dZShapeInfo, hXShapeInfo, nullptr, 0, reductionPointer, + dTADShapeInfo), LIBND4J_TYPES, LONG_TYPES); - BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceLongFunction, ::execReduceScalar(launchDims, stream, opNum, dX, dXShapeInfo, hXShapeInfo, extraParams, dZ, dZShapeInfo, hXShapeInfo, nullptr, 0, reductionPointer, dTADShapeInfo), LIBND4J_TYPES, LONG_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "execReduceLong(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "execReduceLong(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -490,13 +526,22 @@ void execReduceBool2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), + tadPack.specialOffsets()); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -507,30 +552,37 @@ void execReduceBool(Nd4jPointer *extraPointers, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { + try { + auto stream = reinterpret_cast(extraPointers[1]); + auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); + auto dTADShapeInfo = reinterpret_cast(extraPointers[10]); - auto stream = reinterpret_cast(extraPointers[1]); - auto hTADShapeInfo = reinterpret_cast(extraPointers[9]); - auto dTADShapeInfo = reinterpret_cast(extraPointers[10]); + if (nd4j::Environment::getInstance()->isDebugAndVerbose()) + printf("BF7 opNum:[%i]\n", opNum); - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("BF7 opNum:[%i]\n", opNum); + auto reductionPointer = reinterpret_cast(extraPointers[4]); - auto reductionPointer = reinterpret_cast(extraPointers[4]); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); + if (zType != nd4j::DataType::BOOL) + throw std::runtime_error("execReduceBool requires Z operand to have BOOL type"); - if (zType != nd4j::DataType::BOOL) - throw std::runtime_error("execReduceBool requires Z operand to have BOOL type"); + auto xLength = shape::length(hXShapeInfo); + auto blockWidth = 256; + auto numBlocks = CudaLaunchHelper::getReductionBlocks(xLength, blockWidth); + dim3 launchDims(numBlocks, blockWidth, 32768); - auto xLength = shape::length(hXShapeInfo); - auto blockWidth = 256; - auto numBlocks = CudaLaunchHelper::getReductionBlocks(xLength, blockWidth); - dim3 launchDims(numBlocks, blockWidth, 32768); + BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceBoolFunction, + ::execReduceScalar(launchDims, stream, opNum, dX, dXShapeInfo, hXShapeInfo, extraParams, + dZ, dZShapeInfo, hZShapeInfo, nullptr, 0, reductionPointer, + dTADShapeInfo), LIBND4J_TYPES, BOOL_TYPES); - BUILD_DOUBLE_SELECTOR(xType, zType, functions::reduce::ReduceBoolFunction, ::execReduceScalar(launchDims, stream, opNum, dX, dXShapeInfo, hXShapeInfo, extraParams, dZ, dZShapeInfo, hZShapeInfo, nullptr, 0, reductionPointer, dTADShapeInfo), LIBND4J_TYPES, BOOL_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "execReduceBool(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "execReduceBool(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -554,13 +606,22 @@ void execIndexReduce(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execIndexReduce(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execIndexReduce(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), + tadPack.specialOffsets()); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -582,13 +643,22 @@ void execReduceFloat2(Nd4jPointer *extraPointers, void *dZ, Nd4jLong *dZShapeInfo, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduceFloat(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), tadPack.specialOffsets()); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduceFloat(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, dimension, dimensionLength, tadPack.specialShapeInfo(), + tadPack.specialOffsets()); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } /** @@ -607,9 +677,14 @@ void execIndexReduceScalar( void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo){ - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execIndexReduceScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execIndexReduceScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -619,12 +694,17 @@ void execTransformSame(Nd4jPointer *extraPointers,int opNum, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { + try { + auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[0] : nullptr); + auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[1] : nullptr); - auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[0] : nullptr); - auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[1] : nullptr); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execTransformSame(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execTransformSame(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -634,12 +714,17 @@ void execTransformBool(Nd4jPointer *extraPointers,int opNum, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { + try { + auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[0] : nullptr); + auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[1] : nullptr); - auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[0] : nullptr); - auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[1] : nullptr); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execTransformBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execTransformBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -649,12 +734,18 @@ void execTransformAny(Nd4jPointer *extraPointers,int opNum, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { + try { + auto stream = reinterpret_cast(extraPointers[1]); + auto streamSpecial = reinterpret_cast(extraPointers[4]); + LaunchContext lc(stream, streamSpecial, extraPointers[5], extraPointers[3], + reinterpret_cast(extraPointers[6])); - auto stream = reinterpret_cast(extraPointers[1]); - auto streamSpecial = reinterpret_cast(extraPointers[4]); - LaunchContext lc(stream, streamSpecial, extraPointers[5], extraPointers[3], reinterpret_cast(extraPointers[6])); - - NativeOpExecutioner::execTransformAny(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams, nullptr, nullptr); + NativeOpExecutioner::execTransformAny(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraParams, nullptr, nullptr); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -664,12 +755,17 @@ void execTransformStrict(Nd4jPointer *extraPointers,int opNum, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { + try { + auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[10] : nullptr); + auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[11] : nullptr); - auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[10] : nullptr); - auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[11] : nullptr); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execTransformStrict(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execTransformStrict(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -679,55 +775,19 @@ void execTransformFloat(Nd4jPointer *extraPointers,int opNum, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraParams) { + try { + auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[10] : nullptr); + auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[11] : nullptr); - auto tadShapeInfo = reinterpret_cast(extraPointers != nullptr ? extraPointers[10] : nullptr); - auto tadOffsets = reinterpret_cast(extraPointers != nullptr ? extraPointers[11] : nullptr); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execTransformFloat(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execTransformFloat(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraParams, tadShapeInfo, tadOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } - -/** - * Append an input array - * to the end of a flat array - * in a particular order - * @param offset the offset of the array to start at - * @param order the order - * @param dZ the dZ array - * @param dZShapeInfo the shape info for te array - * @param input the input for the array - * @param inputShapeInfo the shape information for that array - */ -void flatten(Nd4jPointer *extraPointers, - int offset, - char order, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hInput, Nd4jLong *hInputShapeInfo, - void *dInput, Nd4jLong *dInputShapeInfo) { - - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - auto hYShapeInfo = reinterpret_cast(extraPointers[7]); - - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("F22 opNum:[7]\n"); - - // int *allocPointer = reinterpret_cast(extraPointers[3]); - - dim3 launchDims(256, 256, 2048); - - if (nd4j::Environment::getInstance()->isVerbose() && launchDims.x == 1) - printf("AF222 opNum:[7]\n"); - - auto type = nd4j::ArrayOptions::dataType(hInputShapeInfo); - BUILD_SINGLE_SELECTOR(type, flattenKernelGeneric, (launchDims, stream, extraPointers, offset, order, dZ, dZShapeInfo, dInput, dInputShapeInfo), LIBND4J_TYPES); - - DEBUG_KERNEL(stream, -1); -} - - - void checkP2P() { int curDevice = 0; @@ -821,23 +881,28 @@ bool isP2PAvailable() { void initializeDevicesAndFunctions() { - int devCnt = 0; - cudaGetDeviceCount(&devCnt); - deviceProperties = new cudaDeviceProp[devCnt]; - for (int i = 0; i < devCnt; i++) { - cudaSetDevice(i); - cudaGetDeviceProperties(&deviceProperties[i], i); + try { + int devCnt = 0; + cudaGetDeviceCount(&devCnt); + deviceProperties = new cudaDeviceProp[devCnt]; + for (int i = 0; i < devCnt; i++) { + cudaSetDevice(i); + cudaGetDeviceProperties(&deviceProperties[i], i); - cudaDeviceSetLimit(cudaLimitStackSize, 4096); - } + cudaDeviceSetLimit(cudaLimitStackSize, 4096); + } - cudaSetDevice(0); + cudaSetDevice(0); - checkP2P(); + checkP2P(); - // enabling p2p gpu access if it's supported - if (supportedP2P && devCnt > 1) - enableP2P(allowedP2P); + // enabling p2p gpu access if it's supported + if (supportedP2P && devCnt > 1) + enableP2P(allowedP2P); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void initializeFunctions(Nd4jPointer *functions) { @@ -867,8 +932,10 @@ Nd4jPointer mallocHost(Nd4jLong memorySize, int flags) { Nd4jPointer pointer; // cudaHostAllocMapped |cudaHostAllocPortable auto res = cudaHostAlloc(reinterpret_cast(&pointer), memorySize, cudaHostAllocDefault); - if (res != 0) - throw nd4j::cuda_exception::build("cudaHostAlloc(...) failed", res); + if (res != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(res); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaHostAlloc failed"); + } return pointer; } @@ -884,8 +951,11 @@ Nd4jPointer mallocHost(Nd4jLong memorySize, int flags) { Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags) { Nd4jPointer pointer; auto res = cudaMalloc(reinterpret_cast(&pointer), memorySize); - if (res != 0) - throw nd4j::cuda_exception::build("cudaMalloc(...) failed", res); + if (res != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(res); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaMalloc failed"); + } + return pointer; } @@ -896,8 +966,11 @@ Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags) { */ int freeHost(Nd4jPointer pointer) { auto res = cudaFreeHost(reinterpret_cast(pointer)); - if (res != 0) - throw nd4j::cuda_exception::build("cudaFreeHost(...) failed", res); + if (res != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(res); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaFreeHost failed"); + } + return 1L; } @@ -909,10 +982,14 @@ int freeHost(Nd4jPointer pointer) { */ int freeDevice(Nd4jPointer pointer, int deviceId) { auto res = cudaFree(reinterpret_cast(pointer)); - if (res != 0) - throw nd4j::cuda_exception::build("cudaFree(...) failed", res); - return 1L; + // we're intentionally skipping + if (res != 0 && res != 1) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(res); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaFree failed"); + } + + return res == 0 ? 1L : 0L; } @@ -921,22 +998,13 @@ Nd4jPointer createContext() { } Nd4jPointer createStream() { - /* - Nd4jPointer nativeStream = (Nd4jPointer) malloc(sizeof(cudaStream_t)); - CHECK_ALLOC(nativeStream, "Failed to allocate memory for new CUDA stream", sizeof(cudaStream_t)); - - cudaError_t dZ = cudaStreamCreate(reinterpret_cast(&nativeStream)); - checkCudaErrors(dZ); - if (dZ != 0) - throw std::runtime_error("cudaStreamCreate(...) failed"); - - return nativeStream; - */ auto stream = new cudaStream_t(); auto dZ = cudaStreamCreate(stream); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaStreamCreate(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaStreamCreate failed"); + } return stream; } @@ -947,9 +1015,10 @@ Nd4jPointer createEvent() { CHECK_ALLOC(nativeEvent, "Failed to allocate new CUDA event buffer", sizeof(cudaEvent_t)); auto dZ = cudaEventCreateWithFlags(reinterpret_cast(&nativeEvent), cudaEventDisableTiming); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaEventCreateWithFlags(...) failed", dZ); - + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaEventCreateWithFlags failed"); + } return nativeEvent; } @@ -959,8 +1028,10 @@ int registerEvent(Nd4jPointer event, Nd4jPointer stream) { auto pStream = reinterpret_cast(stream); auto dZ = cudaEventRecord(*pEvent, *pStream); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaEventRecord(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaEventRecord failed"); + } return 1; } @@ -1048,8 +1119,11 @@ int memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4j kind = cudaMemcpyDeviceToDevice; } break; - default: - throw nd4j::cuda_exception::build("UNDEFINED MEMCPY!\n", 119); + default: { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("UNDEFNED MEMCPY"); + return 0; + } } auto dZ = cudaMemcpyAsync(reinterpret_cast(dst), const_cast(reinterpret_cast(src)), static_cast(size), kind, *pStream); @@ -1058,7 +1132,8 @@ int memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4j printf("Failed on [%lu] -> [%lu], size: [%i], direction: [%i], dZ: [%i]\n", src, dst, size, flags, static_cast(dZ)); fflush(stdout); fflush(stderr); - throw nd4j::cuda_exception::build("cudaMemcpyAsync(...) failed", dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaMemcpyAsync failed"); } return 1; @@ -1066,8 +1141,10 @@ int memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4j int memsetSync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { auto dZ = cudaMemset(reinterpret_cast(dst), value, static_cast(size)); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaMemset(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaMemset failed"); + } return 1; } @@ -1076,8 +1153,10 @@ int memsetAsync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointe auto pStream = reinterpret_cast(reserved); auto dZ = cudaMemsetAsync(reinterpret_cast(dst), value, static_cast(size), *pStream); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaMemsetAsync(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaMemsetAsync failed"); + } return 1; } @@ -1085,8 +1164,10 @@ int memsetAsync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointe int destroyEvent(Nd4jPointer event) { auto pEvent = reinterpret_cast(&event); auto dZ = cudaEventDestroy(*pEvent); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaEvenDestroy(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaEventDestroy failed"); + } return 1; } @@ -1095,8 +1176,10 @@ int streamSynchronize(Nd4jPointer stream) { auto pStream = reinterpret_cast(stream); auto dZ = cudaStreamSynchronize(*pStream); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaStreamSynchronize(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaStreamSynchronize failed"); + } return 1L; } @@ -1105,8 +1188,10 @@ int eventSynchronize(Nd4jPointer event) { auto pEvent = reinterpret_cast(&event); auto dZ = cudaEventSynchronize(*pEvent); - if (dZ != 0) - throw nd4j::cuda_exception::build("cudaEventSynchronize(...) failed", dZ); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaEventSynchronize failed"); + } return 1L; } @@ -1162,268 +1247,6 @@ const char * getDeviceName(int device) { return deviceProperties[device].name; } -/////////////////////////////////////////////////////////////////// -template -__global__ static void concatCuda(const int numOfArrs, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo) { - - __shared__ int arrIdx, blocksPerArr; - __shared__ T *x, *z; - __shared__ Nd4jLong *zShapeInfo, *xShapeInfo, arrLen, arrLenZ, arrLenPerBlock, start, end; - - if (threadIdx.x == 0) { - blocksPerArr = (gridDim.x - gridDim.x % numOfArrs) / numOfArrs; // floor - arrIdx = blockIdx.x / blocksPerArr; - if (arrIdx >= numOfArrs) - arrIdx = numOfArrs - 1; - x = reinterpret_cast(reinterpret_cast(pVx)[arrIdx]); - z = reinterpret_cast(reinterpret_cast(pVz)[arrIdx]); - xShapeInfo = reinterpret_cast(pxShapeInfo)[arrIdx]; - zShapeInfo = reinterpret_cast(pzShapeInfo)[arrIdx]; - - arrLen = shape::length(xShapeInfo); - arrLenZ = shape::length(zShapeInfo); - arrLenPerBlock = (arrLen + blocksPerArr - arrLen % blocksPerArr) / blocksPerArr; // ceil - - start = arrLenPerBlock * (blockIdx.x % blocksPerArr); - end = (start + arrLenPerBlock) > arrLen ? arrLen : (start + arrLenPerBlock); - } - __syncthreads(); - - for (Nd4jLong i = threadIdx.x + start; i < end; i += blockDim.x) { - auto zOffset = shape::getIndexOffset(i, zShapeInfo, arrLenZ); - auto xOffset = shape::getIndexOffset(i, xShapeInfo, arrLen); - //printf("z[%i][%lld] = x[%i][%lld]\n", arrIdx, zOffset, arrIdx, xOffset); - z[zOffset] = x[xOffset]; - } -} -template -__host__ static void concatCudaLauncher(const int numOfArrs, cudaStream_t *stream, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo) { - //int blocks = numOfArrs * 16; // >> 1 << 2); - //nd4j_printf("gridDim.x is %i\n", blocks); - //if (blocks > 8192) - // blocks = 8192; // restrict grid dims to 8K max - concatCuda<<>>(numOfArrs, pVx, pxShapeInfo, pVz, pzShapeInfo); - nd4j::DebugHelper::checkErrorCode(stream, "concat(...) failed"); -} -BUILD_SINGLE_TEMPLATE(template void concatCudaLauncher, (const int numOfArrs, cudaStream_t *stream, void* pVx, void* pxShapeInfo, void* pVz, void* pzShapeInfo), LIBND4J_TYPES); - -static void -specialBufferAndShapeWithOffset(void* vZ, Nd4jLong* hZShapeInfo, Nd4jLong* dZShapeInfo, std::vector const& idx, void*& outBuffer, Nd4jLong*& outShape) { - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - const int rank = shape::rank(hZShapeInfo); - Nd4jLong* newShape = new Nd4jLong[shape::shapeInfoLength(rank)]; - //ALLOCATE(newShape, nullptr, , Nd4jLong) - auto shapeSize = shape::shapeInfoByteLength(rank); - memcpy(newShape, hZShapeInfo, shapeSize); - - auto shapeOf = shape::shapeOf(newShape); - auto stridesOf = shape::stride(newShape); - - Nd4jLong offset(0), subArrLen(1); - int n(2), first, last, stride; - - for (int d = rank - 1; d >= 0; --d) { - - if (idx[n * d] != idx[n * d + 1]) { - auto axeDim = shape::sizeAt(hZShapeInfo, d); - first = idx[n * d] >= 0 ? idx[n * d] : idx[n * d] + axeDim + 1; - last = idx[n * d + 1] >= 0 ? idx[n * d + 1] : idx[n * d + 1] + axeDim + 1; - stride = 1; - - shapeOf[d] = (last - first + stride - 1) / stride; // ceil (last - first) / stride; - offset += first * stridesOf[d]; - - if(shapeOf[d] != 1) - stridesOf[d] *= stride; - } - - subArrLen *= shapeOf[d]; - } - - // check if there is possibility to set ews = 1 - //shape::setEws(newShape, subArrLen); - - //makeBothBuffersActual(); - outBuffer = (void*)((int8_t*)vZ + offset * DataTypeUtils::sizeOfElement(zType)); - cudaError_t err = cudaMalloc(&outShape, shapeSize); - if (err != 0) { - printf("Cannot allocate memory with error %d\n", err); - throw std::runtime_error("Cannot allocate memory for shape"); - } - cudaMemcpy(outShape, newShape, shapeSize, cudaMemcpyHostToDevice); - delete [] newShape; -} - -/** - * Concatneate multi array of the same shape together - * along a particular dimension - */ -void concat( - Nd4jPointer *extraPointers, - int dimension, - int numArrays, - Nd4jPointer *data, Nd4jPointer *inputShapeInfo, - Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { - - auto stream = reinterpret_cast(extraPointers[1]); - - auto hXShapeInfo = hZShapeInfo; - auto hShapePointers = reinterpret_cast(inputShapeInfo); - auto dShapePointers = reinterpret_cast(dinputShapeInfo); - // numArrays will be used as number of TADs, so each block process 1 input - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - auto axis = dimension; - - const int rank = shape::rank(hZShapeInfo); //reinterpret_cast(inputShapeInfo[0])); - const int rank2 = 2 * rank; - std::vector> indices(numArrays, std::vector(rank2,0)); - - // take into account indices for first array - auto axisSize = shape::sizeAt(reinterpret_cast(inputShapeInfo[0]), axis); - indices[0][2 * axis + 1] = axisSize; - - for(int i = 1; i < numArrays; ++i) { - indices[i][2 * axis] = indices[i-1][2 * axis + 1]; // index start from - indices[i][2 * axis + 1] = indices[i-1][2 * axis + 1] + shape::sizeAt(reinterpret_cast(inputShapeInfo[i]), axis); // index end with (excluding) - } - - std::vector outSubArrsBuffs(numArrays); - std::vector outSubArrsShapes(numArrays); - for(int i = 0; i < numArrays; ++i) { - specialBufferAndShapeWithOffset(dZ, hZShapeInfo, dZShapeInfo, indices[i], outSubArrsBuffs[i], outSubArrsShapes[i]); - } - - LaunchContext context(stream); - PointersManager manager(&context, "concat"); - void* dOutBuffers = manager.replicatePointer(outSubArrsBuffs.data(), outSubArrsBuffs.size() * sizeof(void*)); - void* dInBuffers = manager.replicatePointer(ddata, numArrays * sizeof(void*)); - void* dInShapeInfo = manager.replicatePointer(dShapePointers, numArrays * sizeof(Nd4jLong*)); - void* dOutShapeInfo = manager.replicatePointer(outSubArrsShapes.data(), outSubArrsShapes.size() * sizeof(Nd4jLong*)); - - BUILD_SINGLE_SELECTOR(zType, concatCudaLauncher, (numArrays, stream, dInBuffers, dInShapeInfo, dOutBuffers, dOutShapeInfo), LIBND4J_TYPES); - manager.synchronize(); - - cudaError_t err; - for(int i = 0; i < numArrays; ++i) { - err = cudaFree(outSubArrsShapes[i]); - if (err != 0) { - printf("Error %d occured when shape %i was deallocating.\n", err, i); - throw std::runtime_error("Cannot deallocate memory for shapes."); - } - } -} - -/** - * Concatneate multi array of the same shape together - * along a particular dimension - */ -// void concat( -// Nd4jPointer *extraPointers, -// int dimension, -// int numArrays, -// Nd4jPointer *data, Nd4jPointer *inputShapeInfo, -// Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo, -// void *hZ, Nd4jLong *hZShapeInfo, -// void *dZ, Nd4jLong *dZShapeInfo, -// Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { -// -// cudaStream_t *stream = reinterpret_cast(extraPointers[1]); -// auto hXShapeInfo = hZShapeInfo; -// auto hShapePointers = reinterpret_cast(inputShapeInfo); -// // numArrays will be used as number of TADs, so each block process 1 input -// -// int smem = 8192; -// bool isVstack = false; -// bool isScalar = true; -// bool isHstack = false; -// -// for (int i = 0; i < numArrays; i++) { -// if (!shape::isScalar(hShapePointers[i])) { -// isScalar = false; -// break; -// } -// } -// -// if (!isScalar && dimension == 0 && shape::rank(hZShapeInfo) == 2 && shape::order(hZShapeInfo) == 'c' ) { -// isVstack = true; -// for (int i = 0; i < numArrays; i++) { -// if (!shape::isVector(hShapePointers[i]) || shape::elementWiseStride(hShapePointers[i]) <= 0 || -// shape::order(hShapePointers[i]) != 'c') { -// isVstack = false; -// break; -// } -// } -// } -// -// // let's try to fit N-dimensional vstack -// if (!isVstack && !isScalar && dimension == 0 && shape::order(hXShapeInfo) == 'c') { -// auto length0 = shape::length(hShapePointers[0]); -// isVstack = true; -// for (int i = 0; i < numArrays; i++) { -// if (shape::elementWiseStride(hShapePointers[i]) <= 0 || shape::order(hShapePointers[i]) != 'c' || length0 != shape::length(hShapePointers[i])) { -// isVstack = false; -// break; -// } -// } -// } -// -// if (!isScalar && !isVstack && dimension == 1 && shape::isVector(hZShapeInfo)) { -// isHstack = true; -// for (int i = 0; i < numArrays; i++) { -// if (!shape::isVector(hShapePointers[i]) || shape::elementWiseStride(hShapePointers[i]) <= 0) { -// isHstack = false; -// break; -// } -// } -// } -// -// if (isScalar) { -// if (nd4j::Environment::getInstance()->isDebugAndVerbose()) -// printf("Going scalar concat\n"); -// -// dim3 launchDims(128, 128, 16384); -// auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); -// BUILD_SINGLE_SELECTOR(zType, concatKernelScalarGeneric, (launchDims, stream, numArrays, reinterpret_cast(ddata[0]), dZ), LIBND4J_TYPES); -// -// } else if (isVstack) { -// if (nd4j::Environment::getInstance()->isDebugAndVerbose()) -// printf("Going VStack concat\n"); -// -// dim3 launchDims(128, 512, 16384); -// auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); -// BUILD_SINGLE_SELECTOR(zType, concatKernelVStackGeneric, (launchDims, stream, numArrays, reinterpret_cast(ddata[0]), reinterpret_cast(dinputShapeInfo[0]), dZ, dZShapeInfo), LIBND4J_TYPES); -// -// } else if (isHstack) { -// if (nd4j::Environment::getInstance()->isDebugAndVerbose()) -// printf("Going HStack concat\n"); -// -// dim3 launchDims(128, 128, 16384); -// auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); -// BUILD_SINGLE_SELECTOR(zType, concatKernelHStackGeneric, (launchDims, stream, numArrays, reinterpret_cast(ddata[0]), reinterpret_cast(dinputShapeInfo[0]), dZ, dZShapeInfo), LIBND4J_TYPES); -// } else { -// if (nd4j::Environment::getInstance()->isDebugAndVerbose()) -// printf("Going generic concat\n"); -// -// auto devZTadShape = reinterpret_cast(extraPointers[10]); -// auto devZOffsets = reinterpret_cast(extraPointers[11]); -// -// dim3 launchDims(128, 128, 8192); -// auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); -// BUILD_SINGLE_SELECTOR(zType, concatKernelGeneric, (launchDims, stream, numArrays, reinterpret_cast(ddata[0]), reinterpret_cast(dinputShapeInfo[0]), dZ, dZShapeInfo, reinterpret_cast(tadPointers[0]), reinterpret_cast(offsetPointers[0]), devZTadShape, devZOffsets), LIBND4J_TYPES); -// } -// if (nd4j::Environment::getInstance()->isDebugAndVerbose()) -// printf("sharedMemory requested for concatFloat: [%i], registers: [%i]\n", smem, funcAttributes[31].numRegs); -// -// cudaError_t res = cudaStreamSynchronize(*stream); -// checkCudaErrors(res); -// nd4j::DebugHelper::checkErrorCode(stream, "Legacy ConcatFloat(...) failed"); -//} - - - void specialConcat( Nd4jPointer *extraPointers, int dimension, @@ -1432,8 +1255,14 @@ void specialConcat( Nd4jPointer *inputShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers) { - - BUILD_SINGLE_SELECTOR(ArrayOptions::dataType(dZShapeInfo), nd4j::SpecialMethods ,::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, dZ, dZShapeInfo), LIBND4J_TYPES); + try { + BUILD_SINGLE_SELECTOR(ArrayOptions::dataType(dZShapeInfo), nd4j::SpecialMethods, + ::concatCpuGeneric(dimension, numArrays, data, inputShapeInfo, dZ, dZShapeInfo), + LIBND4J_TYPES); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1441,9 +1270,15 @@ void specialConcat( * This method saves */ nd4j::TadPack* tadOnlyShapeInfo(Nd4jLong *dXShapeInfo, int *dimension, int dimensionLength) { - auto pack = new TadPack(); - *pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(dXShapeInfo, dimension, dimensionLength); - return pack; + try { + auto pack = new TadPack(); + *pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(dXShapeInfo, dimension, dimensionLength); + return pack; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong* getPrimaryShapeInfo(nd4j::TadPack* pack) { @@ -1489,11 +1324,11 @@ int memcpyConstantAsync(Nd4jLong dst, Nd4jPointer src, Nd4jLong size, int flags, } break; } - //cudaError_t dZ = cudaMemcpyAsync((void *) dst, (const void *) src, (size_t) size, kind, *pStream); - cudaError_t dZ = cudaMemcpyToSymbolAsync(deviceConstantMemory, const_cast(src), size, dst, kind, *pStream); - checkCudaErrors(dZ); - if (dZ != 0) - throw std::runtime_error("cudaMemcpyToSymbolAsync(...) failed"); + auto dZ = cudaMemcpyToSymbolAsync(deviceConstantMemory, const_cast(src), size, dst, kind, *pStream); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaMemcpyToSymbolAsync failed"); + } return 1; } @@ -1502,8 +1337,10 @@ Nd4jPointer getConstantSpace() { Nd4jPointer dConstAddr; cudaError_t dZ = cudaGetSymbolAddress(reinterpret_cast(&dConstAddr), deviceConstantMemory); - if (dZ != 0) - throw std::runtime_error("cudaGetSymbolAddress(...) failed"); + if (dZ != 0) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(dZ); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage("cudaGetSymbolAddress failed"); + } return dConstAddr; } @@ -1519,13 +1356,19 @@ void pullRows(Nd4jPointer *extraPointers, Nd4jLong *tadOffsets, Nd4jLong *zTadShapeInfo, Nd4jLong *zTadOffsets) { + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + dim3 launchDims(64, 256, 1024); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + BUILD_SINGLE_SELECTOR(xType, pullRowsKernelGeneric, + (launchDims, stream, dX, dZ, n, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets), + LIBND4J_TYPES); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - dim3 launchDims(64, 256, 1024); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - BUILD_SINGLE_SELECTOR(xType, pullRowsKernelGeneric, (launchDims, stream, dX, dZ, n, indexes, tadShapeInfo, tadOffsets, zTadShapeInfo, zTadOffsets), LIBND4J_TYPES); - - DEBUG_KERNEL(stream, -1); + DEBUG_KERNEL(stream, -1); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1537,25 +1380,31 @@ void average(Nd4jPointer *extras, int n, Nd4jLong length, bool propagate) { + try { + cudaStream_t *stream = reinterpret_cast(extras[1]); + int mode = getDeviceId(extras[3]); - cudaStream_t * stream = reinterpret_cast(extras[1]); - int mode = getDeviceId(extras[3]); + auto dX = reinterpret_cast(dx); - auto dX = reinterpret_cast(dx); + if (nd4j::Environment::getInstance()->isDebugAndVerbose()) + printf("averageFloat called\n"); - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("averageFloat called\n"); - - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - // launching on gpu - if (mode == 0) { - dim3 launchDims(256, 256, 4096); - BUILD_SINGLE_SELECTOR(xType, averagingKernelGeneric, (launchDims, stream, dX, dz, n, length, propagate), LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "AverageFloat(...) failed"); - } else { - // launching on host memory - BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::averageGeneric(x, z, zShapeInfo, n, length, propagate), LIBND4J_TYPES); - } + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + // launching on gpu + if (mode == 0) { + dim3 launchDims(256, 256, 4096); + BUILD_SINGLE_SELECTOR(xType, averagingKernelGeneric, (launchDims, stream, dX, dz, n, length, propagate), + LIBND4J_TYPES); + nd4j::DebugHelper::checkErrorCode(stream, "AverageFloat(...) failed"); + } else { + // launching on host memory + BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::averageGeneric(x, z, zShapeInfo, n, length, propagate), + LIBND4J_TYPES); + } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void accumulate(Nd4jPointer *extras, @@ -1565,25 +1414,31 @@ void accumulate(Nd4jPointer *extras, void *dz, Nd4jLong *dzShapeInfo, int n, Nd4jLong length) { + try { + auto stream = reinterpret_cast(extras[1]); + int mode = getDeviceId(extras[3]); - auto stream = reinterpret_cast(extras[1]); - int mode = getDeviceId(extras[3]); + auto dX = reinterpret_cast(dx); - auto dX = reinterpret_cast(dx); + if (nd4j::Environment::getInstance()->isDebugAndVerbose()) + printf("accumulateFloat called\n"); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - if (nd4j::Environment::getInstance()->isDebugAndVerbose()) - printf("accumulateFloat called\n"); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - - // launching on gpu - if (mode == 0) { - dim3 launchDims(n, 256, 16384); - BUILD_SINGLE_SELECTOR(xType, accumulateKernelGeneric, (launchDims, stream, dX, dz, n,length), LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "AccumulateFloat(...) failed"); - } else { - // launching on host memory - BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::accumulateGeneric(x, z, zShapeInfo, n, length), LIBND4J_TYPES); - } + // launching on gpu + if (mode == 0) { + dim3 launchDims(n, 256, 16384); + BUILD_SINGLE_SELECTOR(xType, accumulateKernelGeneric, (launchDims, stream, dX, dz, n, length), + LIBND4J_TYPES); + nd4j::DebugHelper::checkErrorCode(stream, "AccumulateFloat(...) failed"); + } else { + // launching on host memory + BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::accumulateGeneric(x, z, zShapeInfo, n, length), + LIBND4J_TYPES); + } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -1596,50 +1451,29 @@ void shuffle(Nd4jPointer *extras, int *shuffleMap, Nd4jPointer *tadShapeInfo, Nd4jPointer *tadOffsets) { + try { + cudaStream_t *stream = reinterpret_cast(extras[1]); - cudaStream_t *stream = reinterpret_cast(extras[1]); + auto dX = reinterpret_cast(dx); + auto dZ = reinterpret_cast(dz); + auto xShape = reinterpret_cast(xShapeInfo); + auto dxShape = reinterpret_cast(dXShapeInfo); + auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); + auto tadOffset = reinterpret_cast(tadOffsets); - auto dX = reinterpret_cast(dx); - auto dZ = reinterpret_cast(dz); - auto xShape = reinterpret_cast(xShapeInfo); - auto dxShape = reinterpret_cast(dXShapeInfo); - auto tadOnlyShapeInfo = reinterpret_cast(tadShapeInfo); - auto tadOffset = reinterpret_cast(tadOffsets); + auto xType = nd4j::ArrayOptions::dataType(xShape[0]); + dim3 launchDims(256, 512, 8192); + BUILD_SINGLE_SELECTOR(xType, shuffleKernelGeneric, + (launchDims, stream, dX, dxShape, dZ, N, shuffleMap, tadOnlyShapeInfo, tadOffset), + LIBND4J_TYPES); - auto xType = nd4j::ArrayOptions::dataType(xShape[0]); - dim3 launchDims(256, 512, 8192); - BUILD_SINGLE_SELECTOR(xType, shuffleKernelGeneric, (launchDims, stream, dX, dxShape, dZ, N, shuffleMap, tadOnlyShapeInfo, tadOffset), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "shuffle(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "shuffle(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } -/* -void execMetaPredicateShape(Nd4jPointer *extras, - const int opTypeA, - const int opNumA, - const int opTypeB, - const int opNumB, - Nd4jLong N, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraA, - void *extraB, - double scalarA, - double scalarB) { - - cudaStream_t *stream = reinterpret_cast(extras[1]); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, functions::grid::GRIDShaped, ::execMetaPredicateShaped(stream, extras, opTypeA, opNumA, opTypeB, opNumB, N, dX, dXShapeInfo, dY, dYShapeInfo, dZ, dZShapeInfo, extraA, extraB, scalarA, scalarB), LIBND4J_TYPES); - // functions::grid::GRIDShaped::execMetaPredicateShaped(stream, extras, opTypeA, opNumA, opTypeB, opNumB, N, dX, dXShapeInfo, dy, dYShapeInfo, dz, zShapeInfo, extraA, extraB, scalarA, scalarB); - - DEBUG_KERNEL(stream, opNumA); -} -*/ - bool isExperimentalEnabled() { return nd4j::Environment::getInstance()->isExperimentalBuild(); } @@ -1670,9 +1504,14 @@ void execSummaryStats(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, bool biasCorrected) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execSummaryStats(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, biasCorrected); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execSummaryStats(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo, biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1686,11 +1525,18 @@ void execSummaryStatsTad(Nd4jPointer *extraPointers, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape, bool biasCorrected, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execSummaryStats(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, biasCorrected); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execSummaryStats(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, + hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadShapeInfo, + tadOffsets, biasCorrected); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1703,9 +1549,14 @@ void execReduce3(Nd4jPointer *extraPointers, void *dY, Nd4jLong *dYShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduce3(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduce3(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1721,35 +1572,35 @@ void execReduce3Tad(Nd4jPointer *extraPointers, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - // if (extraPointers == nullptr || extraPointers[2] == 0) - // NativeOpExecutioner::execReduce3(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); - // else { - // // going tad-ways - // auto tadShapeInfo = reinterpret_cast (extraPointers[0]); - // auto tadOffsets = reinterpret_cast(extraPointers[1]); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, + reinterpret_cast(hDimension), + shape::length(hDimensionShape)); + auto tadLength = shape::length(tadPack.primaryShapeInfo()); + auto yLength = shape::length(hYShapeInfo); + auto xLength = shape::length(hXShapeInfo); - // NativeOpExecutioner::execReduce3TAD(nullptr, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets); - // } + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - // nd4j_printf("Starting...\n",""); + if (tadLength == yLength || tadLength == xLength) { + // nd4j_printf("== way\n",""); + NativeOpExecutioner::execReduce3(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, + dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, + tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); + } else + NativeOpExecutioner::execReduce3TAD(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, + hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, + dimension, dimensionLength, tadOnlyShapeInfo, yTadOffsets, + yTadOnlyShapeInfo, yTadOffsets); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, reinterpret_cast(hDimension), shape::length(hDimensionShape)); - auto tadLength = shape::length(tadPack.primaryShapeInfo()); - auto yLength = shape::length(hYShapeInfo); - auto xLength = shape::length(hXShapeInfo); - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - - if (tadLength == yLength || tadLength == xLength) { - // nd4j_printf("== way\n",""); - NativeOpExecutioner::execReduce3(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, - dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, - tadOnlyShapeInfo, tadOffsets, yTadOnlyShapeInfo, yTadOffsets); - } else - NativeOpExecutioner::execReduce3TAD(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, tadOnlyShapeInfo, yTadOffsets, yTadOnlyShapeInfo, yTadOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1761,9 +1612,14 @@ void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, void *dY, Nd4jLong *dYShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduce3Scalar(&lc, opNum,hX,hXShapeInfo,dX, dXShapeInfo,extraParams,hY,hYShapeInfo,dY,dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduce3Scalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hY, + hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1776,9 +1632,15 @@ void execScalarBool(Nd4jPointer *extraPointers, void *hScalar, Nd4jLong *hScalarShapeInfo, void *dScalar, Nd4jLong *dScalarShapeInfo, void *extraParams) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execScalarBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, hScalar, hScalarShapeInfo, dScalar, dScalarShapeInfo, extraParams); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execScalarBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, hScalar, hScalarShapeInfo, dScalar, dScalarShapeInfo, + extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1794,11 +1656,19 @@ void execScalarBoolTad(Nd4jPointer *extraPointers, void *hDimension, Nd4jLong *hDimensionShape, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execScalarBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, dZ, dZShapeInfo, hScalars, hScalarShapeInfo, dScalars, dScalarShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execScalarBool(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParams, hZ, hZShapeInfo, + dZ, dZShapeInfo, hScalars, hScalarShapeInfo, dScalars, dScalarShapeInfo, + dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, + tadOffsetsZ); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1811,9 +1681,14 @@ void execScalar(Nd4jPointer *extraPointers, void *hScalar, Nd4jLong *hScalarShapeInfo, void *dScalar, Nd4jLong *dScalarShapeInfo, void *extraParams) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, hScalar, hScalarShapeInfo, dScalar, dScalarShapeInfo, extraParams); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execScalar(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, + hScalar, hScalarShapeInfo, dScalar, dScalarShapeInfo, extraParams); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1830,27 +1705,36 @@ void execScalarTad(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(hScalarShapeInfo); - auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(hScalarShapeInfo); + auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - if (yType != xType && yType != nd4j::DataType::BOOL && !isExperimentalEnabled()) - throw nd4j::datatype_exception::build("execScalar both operands must have same data type", xType, yType); + if (yType != xType && yType != nd4j::DataType::BOOL && !isExperimentalEnabled()) + throw nd4j::datatype_exception::build("execScalar both operands must have same data type", xType, yType); - dim3 launchDims(256, 256, 16384); + dim3 launchDims(256, 256, 16384); #ifdef __ND4J_EXPERIMENTAL__ - BUILD_PAIRWISE_SELECTOR(xType, yType, zType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dX, dXShapeInfo, dZ, dZShapeInfo, dScalars, extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_PAIRWISE_SELECTOR(xType, yType, zType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dX, dXShapeInfo, dZ, dZShapeInfo, dScalars, extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES, LIBND4J_TYPES); #else - BUILD_SINGLE_SELECTOR_THRICE(xType, functions::scalar::ScalarTransform, ::executeCudaAlongDimension(launchDims, stream, opNum, dX, dXShapeInfo, dZ, dZShapeInfo, dScalars, extraParams, dimension, dimensionLength, tadShapeInfo, tadOffsets, tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES); + BUILD_SINGLE_SELECTOR_THRICE(xType, functions::scalar::ScalarTransform, + ::executeCudaAlongDimension(launchDims, stream, opNum, dX, dXShapeInfo, dZ, + dZShapeInfo, dScalars, extraParams, dimension, + dimensionLength, tadShapeInfo, tadOffsets, + tadShapeInfoZ, tadOffsetsZ), LIBND4J_TYPES); #endif - DEBUG_KERNEL(stream, opNum); + DEBUG_KERNEL(stream, opNum); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void execAggregate(Nd4jPointer *extraPointers, @@ -1866,16 +1750,23 @@ void execAggregate(Nd4jPointer *extraPointers, void *realArguments, int numRealArguments, nd4j::DataType dtype) { + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + int numBlocks = getDeviceId(extraPointers[2]); + int numThreads = getDeviceId(extraPointers[3]); + int shmem = getDeviceId(extraPointers[4]); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - int numBlocks = getDeviceId(extraPointers[2]); - int numThreads = getDeviceId(extraPointers[3]); - int shmem = getDeviceId(extraPointers[4]); + dim3 launchDims = dim3(numBlocks, numThreads, shmem); - dim3 launchDims = dim3(numBlocks, numThreads, shmem); - - BUILD_SINGLE_SELECTOR(dtype, functions::aggregate::AggregatedFunction, ::aggregateKernelGeneric(launchDims, stream, opNum, arguments, numArguments, shapes, numShapes, indexArguments, numIndexArguments, intArrays, numIntArrays, realArguments, numRealArguments), FLOAT_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "execAggregateFloat(...) failed"); + BUILD_SINGLE_SELECTOR(dtype, functions::aggregate::AggregatedFunction, + ::aggregateKernelGeneric(launchDims, stream, opNum, arguments, numArguments, shapes, + numShapes, indexArguments, numIndexArguments, intArrays, + numIntArrays, realArguments, numRealArguments), FLOAT_TYPES); + nd4j::DebugHelper::checkErrorCode(stream, "execAggregateFloat(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void batchExecutor(Nd4jPointer *extraPointers, @@ -1897,17 +1788,25 @@ void execAggregateBatch(Nd4jPointer *extraPointers, int maxIntArrays, int maxIntArraySize, int maxIdx, int maxReals, void *ptrToArguments, nd4j::DataType dtype) { - // not implemented yet - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - int numBlocks = getDeviceId(extraPointers[2]); - int numThreads = getDeviceId(extraPointers[3]); - int shmem = getDeviceId(extraPointers[4]); + try { + // not implemented yet + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + int numBlocks = getDeviceId(extraPointers[2]); + int numThreads = getDeviceId(extraPointers[3]); + int shmem = getDeviceId(extraPointers[4]); - dim3 launchDims = dim3(numAggregates, numThreads, shmem); + dim3 launchDims = dim3(numAggregates, numThreads, shmem); - BUILD_SINGLE_SELECTOR(dtype, functions::aggregate::AggregatedFunction, ::aggregateBatchKernelGeneric(launchDims, stream, opNum, numAggregates, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments), FLOAT_TYPES); + BUILD_SINGLE_SELECTOR(dtype, functions::aggregate::AggregatedFunction, + ::aggregateBatchKernelGeneric(launchDims, stream, opNum, numAggregates, maxArgs, + maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, + ptrToArguments), FLOAT_TYPES); - DEBUG_KERNEL(stream, opNum); + DEBUG_KERNEL(stream, opNum); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1917,9 +1816,13 @@ void execRandom(Nd4jPointer *extraPointers, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1929,9 +1832,14 @@ void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, + dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -1943,9 +1851,14 @@ void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, void *extraArguments) { - - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execRandom(&lc, opNum, stateHost, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, + dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -2053,13 +1966,19 @@ void tear(Nd4jPointer *extras, Nd4jLong *zShapeInfo, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets) { + try { + cudaStream_t *stream = reinterpret_cast(extras[1]); + dim3 launchDims(512, 512, 512); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + BUILD_SINGLE_SELECTOR(xType, tearKernelGeneric, + (launchDims, stream, dX, dXShapeInfo, targets, zShapeInfo, tadShapeInfo, tadOffsets), + LIBND4J_TYPES); - cudaStream_t *stream = reinterpret_cast(extras[1]); - dim3 launchDims(512, 512, 512); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - BUILD_SINGLE_SELECTOR(xType, tearKernelGeneric, (launchDims, stream, dX, dXShapeInfo, targets, zShapeInfo, tadShapeInfo, tadOffsets), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "tearFloat(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "tearFloat(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -2146,56 +2065,72 @@ void prescanArrayRecursive(Nd4jPointer *extras, int *dZ, int *dX, int numElement void encodeThresholdP1(Nd4jPointer *extras, void *dx, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { + try { + cudaStream_t *stream = reinterpret_cast(extras[1]); - cudaStream_t *stream = reinterpret_cast(extras[1]); + int blockSize = 1024; + int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); - int blockSize = 1024; - int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); + dim3 launchDims(numBlocks, blockSize, 1024); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + BUILD_SINGLE_SELECTOR(xType, encoderKernelP1Generic, (launchDims, stream, dx, N, dz, threshold), LIBND4J_TYPES); - dim3 launchDims(numBlocks, blockSize, 1024); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, encoderKernelP1Generic, (launchDims, stream, dx, N, dz, threshold), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP1Float(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP1Float(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *dx, Nd4jLong N, int *dz) { - - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - //encoderKernelP2Float<<>>(dx, N, dz); - prescanArrayRecursive(extraPointers, dz, dx + 1, (int) N, 0); - nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP2Int(...) failed"); + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + //encoderKernelP2Float<<>>(dx, N, dz); + prescanArrayRecursive(extraPointers, dz, dx + 1, (int) N, 0); + nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP2Int(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void encodeThresholdP3(Nd4jPointer *extraPointers, void *dx, Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + int blockSize = 1024; + int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); - int blockSize = 1024; - int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); + dim3 launchDims(numBlocks, blockSize, 4096); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + BUILD_SINGLE_SELECTOR(xType, encoderKernelP3Generic, (launchDims, stream, dx, offsets, N, dz), LIBND4J_TYPES); - dim3 launchDims(numBlocks, blockSize, 4096); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, encoderKernelP3Generic, (launchDims, stream, dx, offsets, N, dz), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP3Float(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP3Float(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void decodeThreshold(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo){ + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + // we probably want to have smaller blocks here, memory writes are misaligned anyway + int blockSize = 128; + int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); - // we probably want to have smaller blocks here, memory writes are misaligned anyway - int blockSize = 128; - int numBlocks = N / blockSize + (N % blockSize ? 1 : 0); + dim3 launchDims(numBlocks, blockSize, 1024); + auto zType = nd4j::ArrayOptions::dataType(zShapeInfo); + BUILD_SINGLE_SELECTOR(zType, decoderKernelGeneric, (launchDims, stream, dx, N, dz), LIBND4J_TYPES); - dim3 launchDims(numBlocks, blockSize, 1024); - auto zType = nd4j::ArrayOptions::dataType(zShapeInfo); - BUILD_SINGLE_SELECTOR(zType, decoderKernelGeneric, (launchDims, stream, dx, N, dz), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "decodeThresholdFloat(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "decodeThresholdFloat(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } //////////////////////////////////////////////////////////////////////// @@ -2212,11 +2147,18 @@ void execReduce3All(Nd4jPointer *extraPointers, void *dDimension, Nd4jLong *dDimensionShape, Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets) { - auto dimension = reinterpret_cast(dDimension); - int dimensionLength = static_cast(shape::length(hDimensionShape)); + try { + auto dimension = reinterpret_cast(dDimension); + int dimensionLength = static_cast(shape::length(hDimensionShape)); - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - NativeOpExecutioner::execReduce3All(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParamsVals, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets); + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + NativeOpExecutioner::execReduce3All(&lc, opNum, hX, hXShapeInfo, dX, dXShapeInfo, extraParamsVals, hY, + hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, dimension, + dimensionLength, xTadShapeInfo, xOffsets, yTadShapeInfo, yOffsets); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -2224,57 +2166,65 @@ void sort(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dX, Nd4jLong *dXShapeInfo, bool descending) { + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - - auto xLength = shape::length(xShapeInfo); - auto xEWS = shape::elementWiseStride(xShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + auto xLength = shape::length(xShapeInfo); + auto xEWS = shape::elementWiseStride(xShapeInfo); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - // check if xLength is a power of 2, and use bitonic sort, if that's the case - if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; + // check if xLength is a power of 2, and use bitonic sort, if that's the case + if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; - dim3 launchDims(numBlocks, numThreads, 32768); + dim3 launchDims(numBlocks, numThreads, 32768); - for (int k = 2; k <= xLength; k = 2*k) { - for (int j = k >> 1; j > 0; j = j >> 1) { - BUILD_SINGLE_SELECTOR(xType, bitonicSortStepGeneric, (launchDims, stream, dX, dXShapeInfo, j, k, xLength, descending), LIBND4J_TYPES); - } + for (int k = 2; k <= xLength; k = 2 * k) { + for (int j = k >> 1; j > 0; j = j >> 1) { + BUILD_SINGLE_SELECTOR(xType, bitonicSortStepGeneric, + (launchDims, stream, dX, dXShapeInfo, j, k, xLength, descending), + LIBND4J_TYPES); + } + } + } else { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; + + numBlocks = nd4j::math::nd4j_min(512, numBlocks); + dim3 launchDims(numBlocks, numThreads, 32768); + + int max = 2, dg = 0; + while (max < xLength) { + max <<= 1; + dg++; + } + max <<= 1; + + for (int window = 2; window < max; window <<= 1) { + int n = window; + int rev = 0; + do { + int half = n >> 1; + BUILD_SINGLE_SELECTOR(xType, bitonicArbitraryStepGeneric, + (launchDims, stream, dX, dXShapeInfo, n, xLength, rev, descending), + LIBND4J_TYPES); + n >>= 1; + rev = 1; + } while (n > 1); + } } - } else { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; - numBlocks = nd4j::math::nd4j_min(512, numBlocks); - dim3 launchDims(numBlocks, numThreads, 32768); - - int max = 2, dg = 0; - while (max < xLength) { - max <<= 1; - dg++; - } - max <<= 1; - - for (int window = 2; window < max; window<<=1) { - int n = window; - int rev = 0; - do{ - int half = n >> 1; - BUILD_SINGLE_SELECTOR(xType, bitonicArbitraryStepGeneric, (launchDims, stream, dX, dXShapeInfo, n, xLength, rev, descending), LIBND4J_TYPES); - n>>=1; - rev = 1; - } while(n > 1); - } + nd4j::DebugHelper::checkErrorCode(stream, "sort(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } - - nd4j::DebugHelper::checkErrorCode(stream, "sort(...) failed"); } @@ -2284,55 +2234,64 @@ void sortByKey(Nd4jPointer *extraPointers, void *y, Nd4jLong *yShapeInfo, void *dy, Nd4jLong *dyShapeInfo, bool descending) { + try { + auto stream = reinterpret_cast(extraPointers[1]); - auto stream = reinterpret_cast(extraPointers[1]); - - auto xLength = shape::length(xShapeInfo); - auto xEWS = shape::elementWiseStride(xShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(yShapeInfo); + auto xLength = shape::length(xShapeInfo); + auto xEWS = shape::elementWiseStride(xShapeInfo); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(yShapeInfo); - // check if xLength is a power of 2, and use bitonic sort, if that's the case - if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; + // check if xLength is a power of 2, and use bitonic sort, if that's the case + if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; - dim3 launchDims(numBlocks, numThreads, 32768); + dim3 launchDims(numBlocks, numThreads, 32768); - for (int k = 2; k <= xLength; k = 2*k) { - for (int j = k >> 1; j > 0; j = j >> 1) { - BUILD_DOUBLE_SELECTOR(xType, yType, bitonicSortStepGenericKey, (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, j, k, xLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + for (int k = 2; k <= xLength; k = 2 * k) { + for (int j = k >> 1; j > 0; j = j >> 1) { + BUILD_DOUBLE_SELECTOR(xType, yType, bitonicSortStepGenericKey, + (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, j, k, xLength, descending), + LIBND4J_TYPES, LIBND4J_TYPES); + } + } + } else { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; + + numBlocks = nd4j::math::nd4j_min(512, numBlocks); + dim3 launchDims(numBlocks, numThreads, 32768); + + int max = 2, dg = 0; + while (max < xLength) { + max <<= 1; + dg++; + } + max <<= 1; + + for (int window = 2; window < max; window <<= 1) { + int n = window; + int rev = 0; + do { + int half = n >> 1; + BUILD_DOUBLE_SELECTOR(xType, yType, bitonicArbitraryStepGenericKey, + (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, n, xLength, rev, descending), + LIBND4J_TYPES, LIBND4J_TYPES); + n >>= 1; + rev = 1; + } while (n > 1); } } - } else { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; - numBlocks = nd4j::math::nd4j_min(512, numBlocks); - dim3 launchDims(numBlocks, numThreads, 32768); - - int max = 2, dg = 0; - while (max < xLength) { - max <<= 1; - dg++; - } - max <<= 1; - - for (int window = 2; window < max; window<<=1) { - int n = window; - int rev = 0; - do{ - int half = n >> 1; - BUILD_DOUBLE_SELECTOR(xType, yType, bitonicArbitraryStepGenericKey, (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, n, xLength, rev, descending), LIBND4J_TYPES, LIBND4J_TYPES); - n>>=1; - rev = 1; - } while(n > 1); - } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } } @@ -2342,54 +2301,63 @@ void sortByValue(Nd4jPointer *extraPointers, void *y, Nd4jLong *yShapeInfo, void *dy, Nd4jLong *dyShapeInfo, bool descending) { - auto stream = reinterpret_cast(extraPointers[1]); + try { + auto stream = reinterpret_cast(extraPointers[1]); - auto xLength = shape::length(xShapeInfo); - auto xEWS = shape::elementWiseStride(xShapeInfo); - auto xType = nd4j::ArrayOptions::dataType(yShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(xShapeInfo); + auto xLength = shape::length(xShapeInfo); + auto xEWS = shape::elementWiseStride(xShapeInfo); + auto xType = nd4j::ArrayOptions::dataType(yShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(xShapeInfo); - // check if xLength is a power of 2, and use bitonic sort, if that's the case - if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; + // check if xLength is a power of 2, and use bitonic sort, if that's the case + if ((xLength != 0) && ((xLength & (xLength - 1)) == 0) && (xLength <= 1024 * 1024 * 10)) { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; - dim3 launchDims(numBlocks, numThreads, 32768); + dim3 launchDims(numBlocks, numThreads, 32768); - for (int k = 2; k <= xLength; k = 2*k) { - for (int j = k >> 1; j > 0; j = j >> 1) { - BUILD_DOUBLE_SELECTOR(xType, yType, bitonicSortStepGenericKey, (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, j, k, xLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); + for (int k = 2; k <= xLength; k = 2 * k) { + for (int j = k >> 1; j > 0; j = j >> 1) { + BUILD_DOUBLE_SELECTOR(xType, yType, bitonicSortStepGenericKey, + (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, j, k, xLength, descending), + LIBND4J_TYPES, LIBND4J_TYPES); + } + } + } else { + int numThreads = nd4j::math::nd4j_min(512, xLength); + int numBlocks = xLength / numThreads; + if (xLength % numThreads > 0 || numBlocks == 0) + numBlocks++; + + numBlocks = nd4j::math::nd4j_min(512, numBlocks); + dim3 launchDims(numBlocks, numThreads, 32768); + + int max = 2, dg = 0; + while (max < xLength) { + max <<= 1; + dg++; + } + max <<= 1; + + for (int window = 2; window < max; window <<= 1) { + int n = window; + int rev = 0; + do { + int half = n >> 1; + BUILD_DOUBLE_SELECTOR(xType, yType, bitonicArbitraryStepGenericKey, + (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, n, xLength, rev, descending), + LIBND4J_TYPES, LIBND4J_TYPES); + n >>= 1; + rev = 1; + } while (n > 1); } } - } else { - int numThreads = nd4j::math::nd4j_min(512, xLength); - int numBlocks = xLength / numThreads; - if (xLength % numThreads > 0 || numBlocks == 0) - numBlocks++; - - numBlocks = nd4j::math::nd4j_min(512, numBlocks); - dim3 launchDims(numBlocks, numThreads, 32768); - - int max = 2, dg = 0; - while (max < xLength) { - max <<= 1; - dg++; - } - max <<= 1; - - for (int window = 2; window < max; window<<=1) { - int n = window; - int rev = 0; - do{ - int half = n >> 1; - BUILD_DOUBLE_SELECTOR(xType, yType, bitonicArbitraryStepGenericKey, (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, n, xLength, rev, descending), LIBND4J_TYPES, LIBND4J_TYPES); - n>>=1; - rev = 1; - } while(n > 1); - } + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } } @@ -2403,15 +2371,23 @@ void sortTadByKey(Nd4jPointer *extraPointers, int *dimension, int dimensionLength, bool descending) { - auto stream = reinterpret_cast(extraPointers[1]); - auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext(): reinterpret_cast(extraPointers[0]); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(yShapeInfo); - BUILD_DOUBLE_SELECTOR(xType, yType, oesTadGenericKey, (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, nullptr, dimensionLength, tadPack.platformShapeInfo(), tadPack.platformOffsets(), descending), LIBND4J_TYPES, LIBND4J_TYPES); + try { + auto stream = reinterpret_cast(extraPointers[1]); + auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext() + : reinterpret_cast(extraPointers[0]); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(yShapeInfo); + BUILD_DOUBLE_SELECTOR(xType, yType, oesTadGenericKey, + (launchDims, stream, dX, dXShapeInfo, dy, dyShapeInfo, nullptr, dimensionLength, tadPack.platformShapeInfo(), tadPack.platformOffsets(), descending), + LIBND4J_TYPES, LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "sortTadKey(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "sortTadKey(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortTadByValue(Nd4jPointer *extraPointers, @@ -2422,16 +2398,24 @@ void sortTadByValue(Nd4jPointer *extraPointers, int *dimension, int dimensionLength, bool descending) { - auto stream = reinterpret_cast(extraPointers[1]); - auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext(): reinterpret_cast(extraPointers[0]); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048); - auto xType = nd4j::ArrayOptions::dataType(yShapeInfo); - auto yType = nd4j::ArrayOptions::dataType(xShapeInfo); + try { + auto stream = reinterpret_cast(extraPointers[1]); + auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext() + : reinterpret_cast(extraPointers[0]); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + dim3 launchDims((int) tadPack.numberOfTads(), 256, 2048); + auto xType = nd4j::ArrayOptions::dataType(yShapeInfo); + auto yType = nd4j::ArrayOptions::dataType(xShapeInfo); - BUILD_DOUBLE_SELECTOR(xType, yType, oesTadGenericKey, (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, nullptr, dimensionLength, tadPack.platformShapeInfo(), tadPack.platformOffsets(), descending), LIBND4J_TYPES, LIBND4J_TYPES); + BUILD_DOUBLE_SELECTOR(xType, yType, oesTadGenericKey, + (launchDims, stream, dy, dyShapeInfo, dX, dXShapeInfo, nullptr, dimensionLength, tadPack.platformShapeInfo(), tadPack.platformOffsets(), descending), + LIBND4J_TYPES, LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "sortTadValue(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "sortTadValue(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } @@ -2443,15 +2427,23 @@ void sortTad(Nd4jPointer *extraPointers, Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, bool descending) { - // to be implemented - auto stream = reinterpret_cast(extraPointers[1]); - auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext(): reinterpret_cast(extraPointers[0]); - auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); - dim3 launchDims((int) tadPack.numberOfTads(), 512, 33768); - auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); - BUILD_SINGLE_SELECTOR(xType, oesTadGeneric, (launchDims, stream, dX, dXShapeInfo, nullptr, dimensionLength, tadShapeInfo, tadOffsets, descending), LIBND4J_TYPES); + try { + // to be implemented + auto stream = reinterpret_cast(extraPointers[1]); + auto context = extraPointers[0] == 0 ? LaunchContext::defaultContext() + : reinterpret_cast(extraPointers[0]); + auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(xShapeInfo, dimension, dimensionLength); + dim3 launchDims((int) tadPack.numberOfTads(), 512, 33768); + auto xType = nd4j::ArrayOptions::dataType(xShapeInfo); + BUILD_SINGLE_SELECTOR(xType, oesTadGeneric, + (launchDims, stream, dX, dXShapeInfo, nullptr, dimensionLength, tadShapeInfo, tadOffsets, descending), + LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "sortTad(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "sortTad(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void sortCooIndices(Nd4jPointer *extraPointers, Nd4jLong *indices, void *values, Nd4jLong length, int rank) { @@ -2464,21 +2456,29 @@ Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, Nd4jLong N, int *dz, float threshold) { + try { - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - int *resultPointer = reinterpret_cast(extraPointers[2]); - int *reductionPointer = reinterpret_cast(extraPointers[3]); + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + int *resultPointer = reinterpret_cast(extraPointers[2]); + int *reductionPointer = reinterpret_cast(extraPointers[3]); - dim3 launchDims(512, 512, 32768); - auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); - BUILD_SINGLE_SELECTOR(xType, cudaEncodeBitmapGeneric, (launchDims, stream, dx, N, dz, resultPointer, reductionPointer, threshold), LIBND4J_TYPES); + dim3 launchDims(512, 512, 32768); + auto xType = nd4j::ArrayOptions::dataType(hXShapeInfo); + BUILD_SINGLE_SELECTOR(xType, cudaEncodeBitmapGeneric, + (launchDims, stream, dx, N, dz, resultPointer, reductionPointer, threshold), + LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "encodeBitmapFloat(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "encodeBitmapFloat(...) failed"); - Nd4jLong dZ = (Nd4jLong) resultPointer[0]; - resultPointer[0] = 0; + Nd4jLong dZ = (Nd4jLong) resultPointer[0]; + resultPointer[0] = 0; - return dZ; + return dZ; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 0; + } } @@ -2486,13 +2486,17 @@ void decodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo) { + try { + cudaStream_t *stream = reinterpret_cast(extraPointers[1]); + dim3 launchDims(512, 512, 16384); + auto xType = nd4j::ArrayOptions::dataType(zShapeInfo); + BUILD_SINGLE_SELECTOR(xType, cudaDecodeBitmapGeneric, (launchDims, stream, dx, N, dz), LIBND4J_TYPES); - cudaStream_t *stream = reinterpret_cast(extraPointers[1]); - dim3 launchDims(512, 512, 16384); - auto xType = nd4j::ArrayOptions::dataType(zShapeInfo); - BUILD_SINGLE_SELECTOR(xType, cudaDecodeBitmapGeneric, (launchDims, stream, dx, N, dz), LIBND4J_TYPES); - - nd4j::DebugHelper::checkErrorCode(stream, "decodeBitmapFloat(...) failed"); + nd4j::DebugHelper::checkErrorCode(stream, "decodeBitmapFloat(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length) { @@ -2505,7 +2509,13 @@ void munmapFile(Nd4jPointer *extraPointers, Nd4jLong* ptrMap, Nd4jLong length) { nd4j::graph::ResultWrapper* executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer) { - return nd4j::graph::GraphExecutioner::executeFlatBuffer(flatBufferPointer); + try { + return nd4j::graph::GraphExecutioner::executeFlatBuffer(flatBufferPointer); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getResultWrapperSize(nd4j::graph::ResultWrapper* ptr) { @@ -2560,9 +2570,16 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D } nd4j::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs); + return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, + iArgs, numIArgs, bArgs, numBArgs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::DeclarableOp* op, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { @@ -2584,9 +2601,15 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D } nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); + return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getShapeListSize(nd4j::ShapeList* list) { @@ -2681,39 +2704,57 @@ static FORCEINLINE Nd4jStatus realExec(nd4j::ops::DeclarableOp* op, Nd4jPointer* int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); + return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, + numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) { - auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); - auto context = reinterpret_cast(opContext); + try { + auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); + auto context = reinterpret_cast(opContext); - auto result = op->execute(context); + auto result = op->execute(context); - auto res = cudaStreamSynchronize(*context->launchContext()->getCudaStream()); - if (res != 0) - throw nd4j::cuda_exception::build("customOp execution failed", res); + auto res = cudaStreamSynchronize(*context->launchContext()->getCudaStream()); + if (res != 0) + throw nd4j::cuda_exception::build("customOp execution failed", res); - for (auto v:context->fastpath_in()) { - v->syncToDevice(); + for (auto v:context->fastpath_in()) { + v->syncToDevice(); + } + + for (auto v:context->fastpath_out()) { + v->syncToDevice(); + } + + return result; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; } - - for (auto v:context->fastpath_out()) { - v->syncToDevice(); - } - - return result; } int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer) { + try { + auto graph = nd4j::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); - auto graph = nd4j::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); + nd4j::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); - nd4j::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); - - return ND4J_STATUS_OK; + return ND4J_STATUS_OK; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } @@ -2764,7 +2805,13 @@ static VariablesSet* executeStoredGraphT(Nd4jPointer *extraPointers, Nd4jLong gr } VariablesSet* executeStoredGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) { - return executeStoredGraphT(extraPointers, graphId, inputBuffers, inputShapes, inputIndices, numInputs); + try { + return executeStoredGraphT(extraPointers, graphId, inputBuffers, inputShapes, inputIndices, numInputs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getVariablesSetSize(nd4j::graph::VariablesSet* set) { @@ -2800,10 +2847,15 @@ void* getVariableBuffer(nd4j::graph::Variable* variable) { } int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId) { + try { + nd4j::graph::GraphHolder::getInstance()->dropGraphAny(graphId); - nd4j::graph::GraphHolder::getInstance()->dropGraphAny(graphId); - - return ND4J_STATUS_OK; + return ND4J_STATUS_OK; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } void deletePointerArray(Nd4jPointer pointer) { @@ -2918,8 +2970,15 @@ Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, nd4j::graph::GraphS Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs) { - - return execCustomOpWithScope(extraPointers, reinterpret_cast(state), opHash, scopes, numScopes, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs); + try { + return execCustomOpWithScope(extraPointers, reinterpret_cast(state), opHash, scopes, + numScopes, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, + numOutputs); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return 1; + } } void deleteResultWrapper(Nd4jPointer ptr) { @@ -2937,181 +2996,186 @@ int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer dX, Nd4jLong *dXSh * void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer dX, long N, int dstType, Nd4jPointer dZ); */ void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer dX, Nd4jLong N, int dstType, Nd4jPointer dZ) { - auto dx = reinterpret_cast(dX); - auto dz = reinterpret_cast(dZ); + try { + auto dx = reinterpret_cast(dX); + auto dz = reinterpret_cast(dZ); - if (srcType == ND4J_FLOAT8) { - if (dstType == ND4J_FLOAT8) { - // convertKernel(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { + if (srcType == ND4J_FLOAT8) { + if (dstType == ND4J_FLOAT8) { + // convertKernel(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { - } else if (dstType == ND4J_FLOAT32) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_INT8) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - //convertKernel(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { - // TODO: eventually we might want to add it - } else if (dstType == ND4J_FLOAT32) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_UINT8) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { - // TODO: still might want to add - } else if (dstType == ND4J_FLOAT32) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_FLOAT16) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { - // TODO: .... ^^^ - } else if (dstType == ND4J_FLOAT32) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_THRESHOLD) { - //nd4j::convertToThreshold(nullptr, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_INT16) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { - // TODO... - } else if (dstType == ND4J_FLOAT32) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else { - printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_FLOAT24) { + } else if (dstType == ND4J_FLOAT32) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_INT8) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + //convertKernel(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { + // TODO: eventually we might want to add it + } else if (dstType == ND4J_FLOAT32) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_UINT8) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { + // TODO: still might want to add + } else if (dstType == ND4J_FLOAT32) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_FLOAT16) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { + // TODO: .... ^^^ + } else if (dstType == ND4J_FLOAT32) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_THRESHOLD) { + //nd4j::convertToThreshold(nullptr, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_INT16) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { + // TODO... + } else if (dstType == ND4J_FLOAT32) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else { + printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_FLOAT24) { - } else if (srcType == ND4J_FLOAT32) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { + } else if (srcType == ND4J_FLOAT32) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { - } else if (dstType == ND4J_DOUBLE) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_THRESHOLD) { - //nd4j::convertToThreshold(nullptr, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else if (srcType == ND4J_DOUBLE) { - if (dstType == ND4J_FLOAT8) { - //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT8) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_INT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_UINT16) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_FLOAT24) { + } else if (dstType == ND4J_DOUBLE) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_THRESHOLD) { + //nd4j::convertToThreshold(nullptr, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_DOUBLE) { + if (dstType == ND4J_FLOAT8) { + //nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT8) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_INT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_UINT16) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_FLOAT24) { - } else if (dstType == ND4J_FLOAT32) { - nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - // - } else if (dstType == ND4J_THRESHOLD) { - //nd4j::convertToThreshold(nullptr, dx, N, dz); + } else if (dstType == ND4J_FLOAT32) { + nd4j::TypeCast::convertGenericCuda(extras, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + // + } else if (dstType == ND4J_THRESHOLD) { + //nd4j::convertToThreshold(nullptr, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } + } else if (srcType == ND4J_THRESHOLD) { + if (dstType == ND4J_FLOAT16) { + //nd4j::convertFromThreshold(nullptr, dx, N, dz); + } else if (dstType == ND4J_FLOAT32) { + //nd4j::convertFromThreshold(nullptr, dx, N, dz); + } else if (dstType == ND4J_DOUBLE) { + //nd4j::convertFromThreshold(nullptr, dx, N, dz); + } else { + nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } } else { nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); } - } else if (srcType == ND4J_THRESHOLD) { - if (dstType == ND4J_FLOAT16) { - //nd4j::convertFromThreshold(nullptr, dx, N, dz); - } else if (dstType == ND4J_FLOAT32) { - //nd4j::convertFromThreshold(nullptr, dx, N, dz); - } else if (dstType == ND4J_DOUBLE) { - //nd4j::convertFromThreshold(nullptr, dx, N, dz); - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); - } - } else { - nd4j_printf("Unsupported types conversion: [%i] -> [%i]\n", srcType, dstType); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); } } @@ -3209,20 +3273,31 @@ void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, int* hIindexes, int* dIndexes) { + try { + auto stream = reinterpret_cast(extraPointers[1]); - auto stream = reinterpret_cast(extraPointers[1]); + nd4j::DataType type = ArrayOptions::dataType(hXShapeInfo); - nd4j::DataType type = ArrayOptions::dataType(hXShapeInfo); - - BUILD_SINGLE_SELECTOR(type, scatterUpdateCudaLauncher, (stream, opCode, numOfSubArrs, dX, dXShapeInfo, dXOffsets, dY, dYShapeInfo, dYOffsets, dIndexes), LIBND4J_TYPES); - nd4j::DebugHelper::checkErrorCode(stream, "scatterUpdate(...) failed"); + BUILD_SINGLE_SELECTOR(type, scatterUpdateCudaLauncher, + (stream, opCode, numOfSubArrs, dX, dXShapeInfo, dXOffsets, dY, dYShapeInfo, dYOffsets, dIndexes), + LIBND4J_TYPES); + nd4j::DebugHelper::checkErrorCode(stream, "scatterUpdate(...) failed"); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo) { - LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); - auto p = reinterpret_cast(debugInfo); - NDArray array(buffer, specialBuffer, shapeInfo, &lc); - nd4j::DebugHelper::retrieveDebugStatistics(p, &array); + try { + LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); + auto p = reinterpret_cast(debugInfo); + NDArray array(buffer, specialBuffer, shapeInfo, &lc); + nd4j::DebugHelper::retrieveDebugStatistics(p, &array); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } void __global__ tryPointerKernel(void* p, int len) { @@ -3239,26 +3314,37 @@ void __global__ tryPointerKernel(void* p, int len) { } void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { + try { + cudaStream_t stream; + cudaStreamCreate(&stream); - cudaStream_t stream; - cudaStreamCreate(&stream); + tryPointerKernel << < 256, 512, len + 64, stream >> > (p, len); + auto e = cudaStreamSynchronize(stream); - tryPointerKernel<<<256, 512, len+64, stream>>>(p, len); - auto e = cudaStreamSynchronize(stream); + if (e != 0) + throw nd4j::cuda_exception::build("tryPointer failed", e); - if (e != 0) - throw nd4j::cuda_exception::build("tryPointer failed", e); - - cudaStreamDestroy(stream); + cudaStreamDestroy(stream); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + } } int dataTypeFromNpyHeader(void *header) { return (int) cnpy::dataTypeFromHeader(reinterpret_cast(header)); } nd4j::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty) { - auto buffer = new ConstantDataBuffer(); - *buffer = nd4j::ConstantShapeHelper::getInstance()->bufferForShapeInfo(ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); - return buffer; + try { + auto buffer = new ConstantDataBuffer(); + *buffer = nd4j::ConstantShapeHelper::getInstance()->bufferForShapeInfo( + ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); + return buffer; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } void deleteShapeBuffer(nd4j::ConstantDataBuffer* ptr) { @@ -3359,60 +3445,79 @@ void deleteRandomGenerator(nd4j::graph::RandomGenerator* ptr) { Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { - cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); - unsigned int shapeSize = arr.shape.size(); - std::vector shape(shapeSize); - bool _empty = false; - for(unsigned int i = 0; i < shapeSize; i++) { - shape[i] = arr.shape[i]; + try { + cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); + unsigned int shapeSize = arr.shape.size(); + std::vector shape(shapeSize); + bool _empty = false; + for (unsigned int i = 0; i < shapeSize; i++) { + shape[i] = arr.shape[i]; - if (arr.shape[i] == 0) - _empty = true; + if (arr.shape[i] == 0) + _empty = true; + } + + auto dtype = cnpy::dataTypeFromHeader(reinterpret_cast(npyArray)); + + Nd4jLong *shapeBuffer; + if (shape.size() == 1 && shape[0] == 0) { + // scalar case + shapeBuffer = nd4j::ShapeBuilders::createScalarShapeInfo(dtype); + } else if (_empty) { + if (shapeSize > 0) + shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); + else + shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype); + } else { + shapeBuffer = nd4j::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); + } + return reinterpret_cast(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, + true)); + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; } - - auto dtype = cnpy::dataTypeFromHeader(reinterpret_cast(npyArray)); - - Nd4jLong *shapeBuffer; - if (shape.size() == 1 && shape[0] == 0) { - // scalar case - shapeBuffer = nd4j::ShapeBuilders::createScalarShapeInfo(dtype); - } else if (_empty) { - if (shapeSize > 0) - shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); - else - shapeBuffer = nd4j::ShapeBuilders::emptyShapeInfo(dtype); - } else { - shapeBuffer = nd4j::ShapeBuilders::createShapeInfo(dtype, arr.fortranOrder ? 'f' : 'c', shape); - } - return reinterpret_cast(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); } const char* runLightBenchmarkSuit(bool printOut) { - nd4j::LightBenchmarkSuit suit; - auto result = suit.runSuit(); + try { + nd4j::LightBenchmarkSuit suit; + auto result = suit.runSuit(); - if (printOut) - nd4j_printf("%s\n", result.data()); + if (printOut) + nd4j_printf("%s\n", result.data()); - auto chars = new char[result.length()+1]; - std::memcpy(chars, result.data(), result.length()); - chars[result.length()] = (char) 0x0; + auto chars = new char[result.length() + 1]; + std::memcpy(chars, result.data(), result.length()); + chars[result.length()] = (char) 0x0; - return chars; + return chars; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } const char* runFullBenchmarkSuit(bool printOut) { - nd4j::FullBenchmarkSuit suit; - auto result = suit.runSuit(); + try { + nd4j::FullBenchmarkSuit suit; + auto result = suit.runSuit(); - if (printOut) - nd4j_printf("%s\n", result.data()); + if (printOut) + nd4j_printf("%s\n", result.data()); - auto chars = new char[result.length()+1]; - std::memcpy(chars, result.data(), result.length()); - chars[result.length()] = (char) 0x0; + auto chars = new char[result.length() + 1]; + std::memcpy(chars, result.data(), result.length()); + chars[result.length()] = (char) 0x0; - return chars; + return chars; + } catch (std::exception &e) { + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorCode(1); + nd4j::LaunchContext::defaultContext()->errorReference()->setErrorMessage(e.what()); + return nullptr; + } } Nd4jLong getCachedMemory(int deviceId) { @@ -3449,4 +3554,12 @@ Nd4jPointer lcBlasHandle(OpaqueLaunchContext* lc) { Nd4jPointer lcSolverHandle(OpaqueLaunchContext* lc) { return lc->getCusolverHandle(); +} + +int lastErrorCode() { + return nd4j::LaunchContext::defaultContext()->errorReference()->errorCode(); +} + +const char* lastErrorMessage() { + return nd4j::LaunchContext::defaultContext()->errorReference()->errorMessage(); } \ No newline at end of file diff --git a/libnd4j/include/execution/ContextBuffers.h b/libnd4j/include/execution/ContextBuffers.h index 130354070..67c428d27 100644 --- a/libnd4j/include/execution/ContextBuffers.h +++ b/libnd4j/include/execution/ContextBuffers.h @@ -23,6 +23,7 @@ #include #include +#include namespace nd4j { class ND4J_EXPORT ContextBuffers { @@ -32,6 +33,7 @@ namespace nd4j { void* _allocationPointer = nullptr; void* _execStream = nullptr; void* _specialStream = nullptr; + sd::ErrorReference _errorReference; bool _allocated = false; bool _initialized = false; @@ -60,6 +62,8 @@ namespace nd4j { void setScalarBuffer(void* pointer); void setAllocationBuffer(void* pointer); + sd::ErrorReference* errorReference(); + void triggerOwnership(bool isOwner); int deviceId(); diff --git a/libnd4j/include/helpers/ProviderRNG.h b/libnd4j/include/execution/ErrorReference.h similarity index 57% rename from libnd4j/include/helpers/ProviderRNG.h rename to libnd4j/include/execution/ErrorReference.h index e82f6ac98..2b68d5855 100644 --- a/libnd4j/include/helpers/ProviderRNG.h +++ b/libnd4j/include/execution/ErrorReference.h @@ -15,32 +15,32 @@ ******************************************************************************/ // -// Created by Yurii Shyrma on 27.01.2018 +// @author raver119@gmail.com // -#ifndef LIBND4J_PROVIDERRNG_H -#define LIBND4J_PROVIDERRNG_H +#ifndef DEV_TESTS_ERRORREFERENCE_H +#define DEV_TESTS_ERRORREFERENCE_H -#include -#include - -namespace nd4j { - -class ProviderRNG { - - protected: - random::RandomBuffer* _rng; - static std::mutex _mutex; - ProviderRNG(); +#include +#include +namespace sd { + class ND4J_EXPORT ErrorReference { + private: + int _errorCode = 0; + std::string _errorMessage; public: - ProviderRNG(const ProviderRNG&) = delete; - void operator=(const ProviderRNG&) = delete; - random::RandomBuffer* getRNG() const; - static ProviderRNG& getInstance(); -}; + ErrorReference() = default; + ~ErrorReference() = default; + int errorCode(); + const char* errorMessage(); + void setErrorCode(int errorCode); + void setErrorMessage(std::string message); + void setErrorMessage(const char* message); + }; } -#endif //LIBND4J_PROVIDERRNG_H + +#endif //DEV_TESTS_ERRORREFERENCE_H diff --git a/libnd4j/include/execution/LaunchContext.h b/libnd4j/include/execution/LaunchContext.h index 23165fa0e..5fae2162c 100644 --- a/libnd4j/include/execution/LaunchContext.h +++ b/libnd4j/include/execution/LaunchContext.h @@ -37,6 +37,7 @@ #include #include #include +#include @@ -97,9 +98,12 @@ class ND4J_EXPORT LaunchContext { int getDeviceID() const {return _deviceID;} void setDeviceID(int deviceID) { _deviceID = deviceID; } + sd::ErrorReference* errorReference(); static bool isInitialized(); static void releaseBuffers(); + + static LaunchContext* defaultContext(); diff --git a/libnd4j/include/execution/cpu/ContextBuffers.cpp b/libnd4j/include/execution/cpu/ContextBuffers.cpp index 3bf0a01eb..0038990c2 100644 --- a/libnd4j/include/execution/cpu/ContextBuffers.cpp +++ b/libnd4j/include/execution/cpu/ContextBuffers.cpp @@ -99,4 +99,8 @@ namespace nd4j { ContextBuffers& ContextBuffers::operator=(ContextBuffers&& other) { return *this; } + + sd::ErrorReference* ContextBuffers::errorReference() { + return &_errorReference; + } } \ No newline at end of file diff --git a/libnd4j/include/execution/cpu/LaunchContext.cpp b/libnd4j/include/execution/cpu/LaunchContext.cpp index 3ee460350..60e29c7ca 100644 --- a/libnd4j/include/execution/cpu/LaunchContext.cpp +++ b/libnd4j/include/execution/cpu/LaunchContext.cpp @@ -23,7 +23,11 @@ #include #include +#ifdef IOS_BUILD nd4j::ContextBuffers contextBuffers = nd4j::ContextBuffers(); +#else +thread_local nd4j::ContextBuffers contextBuffers = nd4j::ContextBuffers(); +#endif namespace nd4j { @@ -65,4 +69,8 @@ namespace nd4j { void LaunchContext::releaseBuffers() { // } + + sd::ErrorReference* LaunchContext::errorReference() { + return contextBuffers.errorReference(); + } } \ No newline at end of file diff --git a/libnd4j/include/execution/cuda/ContextBuffers.cu b/libnd4j/include/execution/cuda/ContextBuffers.cu index 84db0c284..895bb6623 100644 --- a/libnd4j/include/execution/cuda/ContextBuffers.cu +++ b/libnd4j/include/execution/cuda/ContextBuffers.cu @@ -220,5 +220,9 @@ namespace nd4j { bool ContextBuffers::isInitialized() { return _initialized; } + + sd::ErrorReference* ContextBuffers::errorReference() { + return &_errorReference; + } } diff --git a/libnd4j/include/execution/cuda/LaunchContext.cu b/libnd4j/include/execution/cuda/LaunchContext.cu index 1292f756c..9d9f2c506 100644 --- a/libnd4j/include/execution/cuda/LaunchContext.cu +++ b/libnd4j/include/execution/cuda/LaunchContext.cu @@ -168,4 +168,8 @@ LaunchContext::LaunchContext() { bool LaunchContext::isInitialized() { return contextBuffers.isInitialized(); } + + sd::ErrorReference* LaunchContext::errorReference() { + return contextBuffers.errorReference(); + } } \ No newline at end of file diff --git a/libnd4j/include/helpers/impl/ProviderRNG.cpp b/libnd4j/include/execution/impl/ErrorReference.cpp similarity index 52% rename from libnd4j/include/helpers/impl/ProviderRNG.cpp rename to libnd4j/include/execution/impl/ErrorReference.cpp index 216aa3a32..7b3409aa1 100644 --- a/libnd4j/include/helpers/impl/ProviderRNG.cpp +++ b/libnd4j/include/execution/impl/ErrorReference.cpp @@ -15,37 +15,32 @@ ******************************************************************************/ // -// Created by Yurii Shyrma on 27.01.2018 +// @author raver119@gmail.com // -#include -#include +#include -namespace nd4j { - -ProviderRNG::ProviderRNG() { +namespace sd { + int ErrorReference::errorCode() { + return _errorCode; + } - Nd4jLong *buffer = new Nd4jLong[100000]; - std::lock_guard lock(_mutex); - #ifndef __CUDABLAS__ - // at this moment we don't have streams etc, so let's just skip this for now - _rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, 123, 100000, (Nd4jPointer) buffer); - #endif - // if(_rng != nullptr) -} - -ProviderRNG& ProviderRNG::getInstance() { - - static ProviderRNG instance; - return instance; -} - -random::RandomBuffer* ProviderRNG::getRNG() const { - - return _rng; -} - -std::mutex ProviderRNG::_mutex; - + const char* ErrorReference::errorMessage() { + // since we're fetching error message - error code will be assumed consumed & nullified + _errorCode = 0; + return _errorMessage.c_str(); + } + + void ErrorReference::setErrorCode(int errorCode) { + _errorCode = errorCode; + } + + void ErrorReference::setErrorMessage(std::string message) { + _errorMessage = message; + } + + void ErrorReference::setErrorMessage(const char* message) { + _errorMessage = std::string(message); + } } diff --git a/libnd4j/include/ops/declarable/generic/convo/ismax.cpp b/libnd4j/include/ops/declarable/generic/convo/ismax.cpp index ad5a485e1..13de73e81 100644 --- a/libnd4j/include/ops/declarable/generic/convo/ismax.cpp +++ b/libnd4j/include/ops/declarable/generic/convo/ismax.cpp @@ -45,7 +45,7 @@ DECLARE_SYN(IsMax, ismax); DECLARE_TYPES(ismax) { getOpDescriptor() ->setAllowedInputTypes(0, DataType::ANY) - ->setAllowedOutputTypes(0, DataType::BOOL); + ->setAllowedOutputTypes(0, DataType::ANY); } diff --git a/libnd4j/include/ops/declarable/generic/nlp/cbow.cpp b/libnd4j/include/ops/declarable/generic/nlp/cbow.cpp index 2ae69e296..21906f4eb 100644 --- a/libnd4j/include/ops/declarable/generic/nlp/cbow.cpp +++ b/libnd4j/include/ops/declarable/generic/nlp/cbow.cpp @@ -84,7 +84,8 @@ namespace nd4j { ->setAllowedInputTypes(11, nd4j::DataType::INT64) ->setAllowedInputTypes(12, nd4j::DataType::INT32) ->setAllowedInputTypes(13, nd4j::DataType::INT32) - ->setAllowedInputTypes(14, {ALL_FLOATS}); + ->setAllowedInputTypes(14, {ALL_FLOATS}) + ->setAllowedOutputTypes(nd4j::DataType::ANY); } } } diff --git a/libnd4j/include/ops/declarable/generic/nlp/skipgram.cpp b/libnd4j/include/ops/declarable/generic/nlp/skipgram.cpp index 78c6e3818..a97e1a79e 100644 --- a/libnd4j/include/ops/declarable/generic/nlp/skipgram.cpp +++ b/libnd4j/include/ops/declarable/generic/nlp/skipgram.cpp @@ -79,7 +79,7 @@ namespace nd4j { ->setAllowedInputTypes(9, {ALL_FLOATS}) ->setAllowedInputTypes(10, nd4j::DataType::INT64) ->setAllowedInputTypes(11, {ALL_FLOATS}) - ->setAllowedOutputTypes(nd4j::DataType::INT8); + ->setAllowedOutputTypes(nd4j::DataType::ANY); } /* diff --git a/libnd4j/include/ops/declarable/generic/nn/softmax.cpp b/libnd4j/include/ops/declarable/generic/nn/softmax.cpp index 08dba09f2..d96f97c10 100644 --- a/libnd4j/include/ops/declarable/generic/nn/softmax.cpp +++ b/libnd4j/include/ops/declarable/generic/nn/softmax.cpp @@ -70,7 +70,7 @@ CONFIGURABLE_OP_IMPL(softmax_bp, 2, 1, true, 0, 0) { DECLARE_TYPES(softmax_bp) { getOpDescriptor() - ->setAllowedInputTypes(DataType::ANY) + ->setAllowedInputTypes({ALL_FLOATS}) ->setAllowedOutputTypes({ALL_FLOATS}); } diff --git a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu index fab9577d6..cf891feab 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/reverse.cu @@ -30,51 +30,9 @@ namespace nd4j { namespace ops { namespace helpers { - template - inline void __device__ indexSwap(T* arr, Nd4jLong idx1, Nd4jLong idx2) { - T tmp = arr[idx1]; - arr[idx1] = arr[idx2]; - arr[idx2] = tmp; - } -// template -// void reverseArray(nd4j::LaunchContext * context, void* inArr, Nd4jLong *inShapeBuffer, void *result, Nd4jLong *zShapeBuffer, int numOfElemsToReverse = 0); - - ///////////////////////////////////////////////////////////////////////////////////// - template - static __global__ void reverseArrayInplaceKernel(void *input, Nd4jLong *inputShape, Nd4jLong numOfElemsToReverse) { - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; - const auto step = gridDim.x * blockDim.x; - __shared__ Nd4jLong length; - __shared__ int linearStatus; - __shared__ T* inputArr; - if (threadIdx.x == 0) { - length = shape::length(inputShape); - linearStatus = shape::elementWiseStride(inputShape); - inputArr = reinterpret_cast(input); - } - __syncthreads(); - - for (Nd4jLong e = tid; e < numOfElemsToReverse / 2; e += step) { - if (linearStatus == 1) { - auto idx = numOfElemsToReverse - e - 1; - indexSwap(inputArr, e, idx); - } - else if (linearStatus > 1) { - auto idx1 = (numOfElemsToReverse - e - 1) * linearStatus; - Nd4jLong idx2 = e * linearStatus; - indexSwap(inputArr, idx1, idx2); - } - else { - auto inOffset = shape::getIndexOffset(e, inputShape, length); - auto outOffset = shape::getIndexOffset(numOfElemsToReverse - e - 1, inputShape, length); - indexSwap(inputArr, inOffset, outOffset); - } - } - } - template static __global__ void reverseArrayKernel(void* input, Nd4jLong *inputShape, void* output, Nd4jLong *outputShape, Nd4jLong numOfElemsToReverse) { - const auto tid = blockIdx.x * gridDim.x + threadIdx.x; + const auto tid = blockIdx.x * blockDim.x + threadIdx.x; const auto step = gridDim.x * blockDim.x; __shared__ Nd4jLong length; __shared__ int linearStatus; @@ -93,51 +51,47 @@ namespace helpers { } __syncthreads(); - for (Nd4jLong e = tid; e < length; e += step) { - if (e < numOfElemsToReverse ) { - if (linearStatus == 1) { - auto idx = numOfElemsToReverse - e - 1; - outputArr[idx] = inputArr[e]; - } else if (linearStatus > 1) { - auto idx1 = (numOfElemsToReverse - e - 1) * linearStatus; - Nd4jLong idx2 = e * linearStatus; - outputArr[idx1] = inputArr[idx2]; - } else { - auto inOffset = shape::getIndexOffset(e, inputShape, length); - auto outOffset = shape::getIndexOffset(numOfElemsToReverse - e - 1, outputShape, length); - outputArr[outOffset] = inputArr[inOffset]; - } - } - else { - if (linearStatus == 1) { - outputArr[e] = inputArr[e]; - } else if (linearStatus > 1) { - auto idx1 = e * linearStatus; - Nd4jLong idx2 = e * linearStatus; - outputArr[idx1] = inputArr[idx2]; - } else { - auto inOffset = shape::getIndexOffset(e, inputShape, length); - auto outOffset = shape::getIndexOffset(e, outputShape, length); - outputArr[outOffset] = inputArr[inOffset]; - } - } + auto odd = length % 2 != 0; + auto limit = length / 2; + + for (Nd4jLong e = tid; e < limit; e += step) { + // we're calculating offsets within input array + auto fOffset = shape::getIndexOffset(e, inputShape, length); + auto lOffset = shape::getIndexOffset(numOfElemsToReverse - e - 1, inputShape, length); + + // now we're storing input values + auto v1 = inputArr[fOffset]; + auto v2 = inputArr[lOffset]; + + // now we're calculating offsets within output array + auto zfOffset = shape::getIndexOffset(e, outputShape, length); + auto zlOffset = shape::getIndexOffset(numOfElemsToReverse - e - 1, outputShape, length); + + // and saving values to output arrays + outputArr[zfOffset] = v2; + outputArr[zlOffset] = v1; + + //printf("TID: %i; E: %lld; z[%lld], z[%lld] = x[%lld], x[%lld];\n", tid, e, zfOffset, zlOffset, lOffset, fOffset); } - //printf("\n"); + // in case of odd array we'll have to move middle value + if (odd && tid == 0) { + auto xOffset = shape::getIndexOffset(limit, inputShape, length); + auto zOffset = shape::getIndexOffset(limit, outputShape, length); + + outputArr[zOffset] = inputArr[xOffset]; + //printf("TID: %i; E: %lld; z[%lld] = x[%lld];\n", tid, limit, zOffset, xOffset); + } } template - static void reverseArray(nd4j::LaunchContext * context, NDArray* input, NDArray* output, int numOfElemsToReverse) { + static void reverseArray(nd4j::LaunchContext * context, NDArray* input, NDArray* output, Nd4jLong numOfElemsToReverse) { auto stream = context->getCudaStream(); Nd4jLong numOfReverse = numOfElemsToReverse; if (numOfElemsToReverse == 0) numOfReverse = input->lengthOf(); - if (input == output) { - reverseArrayInplaceKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), numOfReverse); - } - else { - reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfReverse); - } + + reverseArrayKernel<<<256, 512, 8192, *stream>>>(input->specialBuffer(), input->specialShapeInfo(), output->specialBuffer(), output->specialShapeInfo(), numOfReverse); } @@ -221,7 +175,7 @@ namespace helpers { delete listIn; } -BUILD_SINGLE_TEMPLATE(template void reverseArray, (nd4j::LaunchContext * context, NDArray *inArr, NDArray *outArr, int numOfElemsToReverse), LIBND4J_TYPES); +BUILD_SINGLE_TEMPLATE(template void reverseArray, (nd4j::LaunchContext * context, NDArray *inArr, NDArray *outArr, Nd4jLong numOfElemsToReverse), LIBND4J_TYPES); } } diff --git a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp index 5d29ed826..4fe28df8c 100644 --- a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp +++ b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp @@ -19,7 +19,6 @@ // #include -#include #include #include #include @@ -190,32 +189,6 @@ namespace nd4j { auto outSha = this->calculateOutputShape(&inSha, ctx); results = outSha->size(); - // we must "validate" our output shapes - /* - for (int e = 0; e < results; e++) { - auto ptr = outSha->at(e); - - // checking for the same pointer used twice - for (int i = 0; i < results; i++){ - if (i == e) - continue; - - auto com = outSha->at(i); - - if (ptr == com) - throw std::runtime_error("ShapeFunction returned same shape instance twice [" + *_descriptor->getOpName() + "]"); - } - - // checking for input pointer returned back - for (int i = 0; i < inSha.size(); i++){ - auto com = inSha.at(i); - - if (ptr == com) - throw std::runtime_error("ShapeFunction returned input shape instance as output [" + *_descriptor->getOpName() + "]"); - } - } - */ - // optionally saving shapeTime if (Environment::getInstance()->isProfiling() && node != nullptr) { shapeEnd = std::chrono::system_clock::now(); @@ -355,75 +328,139 @@ namespace nd4j { // rolling over inputs first int cnt = 0, inT = 0; std::vector inputTypes(block.width()); - for (auto &p: *(block.inputs())) { - auto var = block.variable(p); - - // we're not checking validity, if ANY types were explicitly allowed - //if (block.dataType(cnt) == nd4j::DataType::ANY) - // continue; - - // only validating non-null variables - if (var != nullptr && var->hasNDArray()) { - auto array = var->getNDArray(); - + if (block.isFastPath()) { + for (auto array: block.fastpath_in()) { inputTypes[inT++] = array->dataType(); if (!_descriptor->checkInputMatch(cnt, array->dataType())) { auto ctype = DataTypeUtils::asString(array->dataType()); - nd4j_printf("Op [%s] failed check for input [%i], DataType: [%s]\n", _descriptor->getOpName()->data(), cnt, ctype.c_str()); + nd4j_printf("Op [%s] failed check for input [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), cnt, ctype.c_str()); return ND4J_STATUS_BAD_ARGUMENTS; } + cnt++; } + } else { + for (auto &p: *(block.inputs())) { + auto var = block.variable(p); - cnt++; - } - - // checking optionally available outputs - auto varSpace = block.getVariableSpace(); - for (int index = 0; index < DataTypeUtils::max(); index++) { - if (varSpace != nullptr && varSpace->hasVariable(block.nodeId(), index)) { - auto var = block.variable(block.nodeId(), index); + // we're not checking validity, if ANY types were explicitly allowed + //if (block.dataType(cnt) == nd4j::DataType::ANY) + // continue; // only validating non-null variables if (var != nullptr && var->hasNDArray()) { auto array = var->getNDArray(); - auto cType = array->dataType(); - if (_descriptor->isSameMode()) { - - if (index >= block.width()) { - auto iv = block.variable(0); - - if (iv->getNDArray()->dataType() != cType) { - auto t = DataTypeUtils::asString(cType); - nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", _descriptor->getOpName()->data(), index, t.c_str()); - return ND4J_STATUS_BAD_ARGUMENTS; - } - } else { - // for same mode, output type must be the same as input type - auto iv = block.variable(index); - - if (iv->getNDArray()->dataType() != cType) { - auto t = DataTypeUtils::asString(cType); - nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", _descriptor->getOpName()->data(), index, t.c_str()); - return ND4J_STATUS_BAD_ARGUMENTS; - } - } - } else if (_descriptor->isInherit(index)) { - // in inherit mode, output type must be the same as one of input types - if (std::find(inputTypes.begin(), inputTypes.end(), cType) == inputTypes.end()) { - auto t = DataTypeUtils::asString(cType); - nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s].\n", _descriptor->getOpName()->data(), index, t.c_str()); - return ND4J_STATUS_BAD_ARGUMENTS; - } - - } else if (!_descriptor->checkOutputMatch(index, cType)) { - auto t = DataTypeUtils::asString(cType); - nd4j_printf("Op [%s] failed check for output [%i], DataType: [%i];\n", _descriptor->getOpName()->data(), index, t.c_str()); + inputTypes[inT++] = array->dataType(); + if (!_descriptor->checkInputMatch(cnt, array->dataType())) { + auto ctype = DataTypeUtils::asString(array->dataType()); + nd4j_printf("Op [%s] failed check for input [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), cnt, ctype.c_str()); return ND4J_STATUS_BAD_ARGUMENTS; } } - } else - break; + + cnt++; + } + } + + if (block.isFastPath()) { + int index = 0; + for (auto array: block.fastpath_out()) { + auto cType = array->dataType(); + + if (_descriptor->isSameMode()) { + + if (index >= block.width()) { + auto ia = block.fastpath_in()[0]; + + if (ia->dataType() != cType) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + } else { + // for same mode, output type must be the same as input type + auto ia = block.fastpath_in()[index]; + + if (ia->dataType() != cType) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + } + } else if (_descriptor->isInherit(index)) { + // in inherit mode, output type must be the same as one of input types + if (std::find(inputTypes.begin(), inputTypes.end(), cType) == inputTypes.end()) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s].\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + + } else if (!_descriptor->checkOutputMatch(index, cType)) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s];\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + index++; + } + } else { + // checking optionally available outputs + auto varSpace = block.getVariableSpace(); + for (int index = 0; index < DataTypeUtils::max(); index++) { + if (varSpace != nullptr && varSpace->hasVariable(block.nodeId(), index)) { + auto var = block.variable(block.nodeId(), index); + + // only validating non-null variables + if (var != nullptr && var->hasNDArray()) { + auto array = var->getNDArray(); + auto cType = array->dataType(); + + if (_descriptor->isSameMode()) { + + if (index >= block.width()) { + auto iv = block.variable(0); + + if (iv->getNDArray()->dataType() != cType) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + } else { + // for same mode, output type must be the same as input type + auto iv = block.variable(index); + + if (iv->getNDArray()->dataType() != cType) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s]\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + } + } else if (_descriptor->isInherit(index)) { + // in inherit mode, output type must be the same as one of input types + if (std::find(inputTypes.begin(), inputTypes.end(), cType) == inputTypes.end()) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s].\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + + } else if (!_descriptor->checkOutputMatch(index, cType)) { + auto t = DataTypeUtils::asString(cType); + nd4j_printf("Op [%s] failed check for output [%i], DataType: [%s];\n", + _descriptor->getOpName()->data(), index, t.c_str()); + return ND4J_STATUS_BAD_ARGUMENTS; + } + } + } else + break; + } } diff --git a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp index ef3710371..d0d67000b 100644 --- a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp @@ -400,6 +400,32 @@ TEST_F(JavaInteropTests, Test_Synonyms_3) { ASSERT_EQ(nameRef, name); } +TEST_F(JavaInteropTests, Test_FastPath_Validation_1) { + auto x = NDArrayFactory::create('c', {4}, {1, 2, 3, 4}); + auto z = NDArrayFactory::create('c', {4}, {1, 2, 3, 4}); + + Context ctx(1); + ctx.setInputArray(0, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo()); + ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + + nd4j::ops::softmax op; + auto status = op.execute(&ctx); + ASSERT_NE(Status::OK(), status); +} + +TEST_F(JavaInteropTests, Test_FastPath_Validation_2) { + auto x = NDArrayFactory::create('c', {4}, {1.f, 2.f, 3.f, 4.f}); + auto z = NDArrayFactory::create('c', {4}, {1, 2, 3, 4}); + + Context ctx(1); + ctx.setInputArray(0, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo()); + ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); + + nd4j::ops::softmax op; + auto status = op.execute(&ctx); + ASSERT_NE(Status::OK(), status); +} + /* TEST_F(JavaInteropTests, test_avgpooling_edge_1) { int inOutH = 35; diff --git a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp index fe190d9bb..9aac42ddf 100644 --- a/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/NativeOpsTests.cpp @@ -992,81 +992,6 @@ TEST_F(NativeOpsTests, ScalarTadTest_2) { ASSERT_TRUE(exp.e(5) == z.e(5) && exp.e(15)); } -TEST_F(NativeOpsTests, FlattenTest_1) { - auto x = NDArrayFactory::create('c', {5, 5}); - auto y = NDArrayFactory::create('c', {5, 5}); - auto exp = NDArrayFactory::create('c', {2, 5,5}); - auto z = NDArrayFactory::create('c', {2, 5,5}); - - Nd4jPointer extra[6]; -#ifdef __CUDABLAS__ - extra[1] = x.getContext()->getCudaStream(); - extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr; - x.syncToHost(); - y.syncToHost(); - printf("Unsupported for CUDA platform yet.\n"); - return; -#endif - x.linspace(1.0,2); - y.linspace(2,2); - - //y.assign(2.); - x.syncToDevice(); - z.syncToDevice(); - auto dimension = NDArrayFactory::create({0, 1}); - auto dimensions = reinterpret_cast(dimension.buffer()); - auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); - auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); - exp(1, {0}).linspace(1,2); - ::flatten(extra, - 25, 'c', z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - x.buffer(), x.shapeInfo(), - x.specialBuffer(), x.specialShapeInfo()); - -// exp.printIndexedBuffer("Exp"); -// z.printIndexedBuffer("Flatten"); - ASSERT_TRUE(exp.equalsTo(z)); -} - -TEST_F(NativeOpsTests, ConcatTest_1) { - auto x = NDArrayFactory::create('c', {5, 5}); - auto y = NDArrayFactory::create('c', {5, 5}); - auto exp = NDArrayFactory::create('c', {10,5}); - auto z = NDArrayFactory::create('c', {10,5}); - - Nd4jPointer extra[6]; -#ifdef __CUDABLAS__ - extra[1] = x.getContext()->getCudaStream(); - extra[0] = extra[2] = extra[3] = extra[4] = extra[5] = nullptr; - x.syncToHost(); - y.syncToHost(); - printf("Unsupported for CUDA platform yet.\n"); - return; -#endif - x.linspace(1.0); - y.linspace(26); - - //y.assign(2.); - x.syncToDevice(); - z.syncToDevice(); - int d = 0; - auto dimension = NDArrayFactory::create('c', {1}, {d}); - auto dimensions = reinterpret_cast(dimension.buffer()); - //auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), dimensions, dimension.lengthOf()); - auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.shapeInfo(), dimensions, dimension.lengthOf()); - exp.linspace(1); - Nd4jPointer datas[] = {x.buffer(), y.buffer()}; - Nd4jPointer shapes[] = {x.shapeInfo(), y.shapeInfo()}; - - ::concat(extra, - 0, 2, datas, shapes, nullptr, nullptr, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), - nullptr, nullptr); - -// exp.printIndexedBuffer("Exp"); -// z.printIndexedBuffer("Concat"); - ASSERT_TRUE(exp.equalsTo(z)); -} - TEST_F(NativeOpsTests, ConcatTest_2) { auto x = NDArrayFactory::create('c', {5, 5}); auto y = NDArrayFactory::create('c', {5, 5}); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java index 174be9a7d..576cea78a 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java @@ -557,43 +557,6 @@ public interface NativeOps { @Cast("Nd4jLong *") LongPointer tadShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets, @Cast("Nd4jLong *") LongPointer tadShapeInfoZ, @Cast("Nd4jLong *") LongPointer tadOffsetsZ); - /** - * @param extraPointers - * @param offset - * @param order - * @param results - * @param resultShapeInfo - * @param input - * @param inputShapeInfo - */ - void flatten(PointerPointer extraPointers, - int offset, - char order, - Pointer results, @Cast("Nd4jLong *") LongPointer resultShapeInfo, - Pointer dresults, @Cast("Nd4jLong *") LongPointer dresultShapeInfo, - Pointer input, @Cast("Nd4jLong *") LongPointer inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong *") LongPointer dinputShapeInfo); - - /** - * @param extraPointers - * @param dimension - * @param numArrays - * @param data - * @param inputShapeInfo - * @param results - * @param resultShapeInfo - * @param tadPointers - * @param tadOffsets - */ - void concat(PointerPointer extraPointers, - int dimension, - int numArrays, - PointerPointer data, PointerPointer inputShapeInfo, - PointerPointer ddata, PointerPointer dinputShapeInfo, - Pointer results, @Cast("Nd4jLong *") LongPointer resultShapeInfo, - Pointer dresults, @Cast("Nd4jLong *") LongPointer dresultShapeInfo, - PointerPointer tadPointers, - PointerPointer tadOffsets); void specialConcat(PointerPointer extraPointers, int dimension, @@ -1185,4 +1148,7 @@ public interface NativeOps { Pointer lcCopyStream(OpaqueLaunchContext lc); Pointer lcBlasHandle(OpaqueLaunchContext lc); Pointer lcSolverHandle(OpaqueLaunchContext lc); + + int lastErrorCode(); + String lastErrorMessage(); } diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaEvent_t.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaEvent_t.java index 1650e08ac..52b7d7332 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaEvent_t.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaEvent_t.java @@ -22,6 +22,7 @@ import org.bytedeco.javacpp.Pointer; import org.nd4j.jita.allocator.pointers.CudaPointer; import org.nd4j.linalg.exception.ND4JException; import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.nativeblas.NativeOps; import org.nd4j.nativeblas.NativeOpsHolder; /** @@ -67,14 +68,18 @@ public class cudaEvent_t extends CudaPointer { int res = NativeOpsHolder.getInstance().getDeviceNativeOps().eventSynchronize(this); if (res == 0) throw new ND4JException("CUDA exception happened. Terminating. Last op: [" + Nd4j.getExecutioner().getLastOp() +"]"); + + if (NativeOpsHolder.getInstance().getDeviceNativeOps().lastErrorCode() != 0) + throw new RuntimeException(NativeOpsHolder.getInstance().getDeviceNativeOps().lastErrorMessage()); } } public void register(cudaStream_t stream) { if (!isDestroyed()) { int res = NativeOpsHolder.getInstance().getDeviceNativeOps().registerEvent(this, stream); - if (res == 0) - throw new ND4JException("CUDA exception happened. Terminating. Last op: [" + Nd4j.getExecutioner().getLastOp() +"]"); + + if (NativeOpsHolder.getInstance().getDeviceNativeOps().lastErrorCode() != 0) + throw new RuntimeException(NativeOpsHolder.getInstance().getDeviceNativeOps().lastErrorMessage()); } } } diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaStream_t.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaStream_t.java index b18ceb2fa..8d78ee950 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaStream_t.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/allocator/pointers/cuda/cudaStream_t.java @@ -36,8 +36,9 @@ public class cudaStream_t extends CudaPointer { public int synchronize() { NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps(); int res = nativeOps.streamSynchronize(this); - if (res == 0) - throw new ND4JException("CUDA exception happened. Terminating. Last op: [" + Nd4j.getExecutioner().getLastOp() +"]"); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); return res; } diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/JCublasNDArrayFactory.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/JCublasNDArrayFactory.java index 44c361d87..9e9dc34b2 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/JCublasNDArrayFactory.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/JCublasNDArrayFactory.java @@ -24,6 +24,7 @@ import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.buffer.DataTypeEx; import org.nd4j.linalg.api.buffer.Utf8Buffer; import org.nd4j.linalg.api.memory.enums.MemoryKind; +import org.nd4j.linalg.api.ops.custom.Flatten; import org.nd4j.linalg.api.ops.impl.shape.Concat; import org.nd4j.linalg.api.ops.performance.PerformanceTracker; import org.nd4j.linalg.api.shape.options.ArrayOptionsHelper; @@ -104,6 +105,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { functions.put(11, Loader.addressof("cusolverDnSgesvd")); functions.put(12, Loader.addressof("cusolverDnDgesvd")); nativeOps.initializeFunctions(functions); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } @Override @@ -335,75 +339,7 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { if (Nd4j.getExecutioner() instanceof GridExecutioner) ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); - int length = 0; - DataType t = null; - for (INDArray m : matrices) { - length += m.length(); - if (t == null) - t = m.dataType(); - - Preconditions.checkArgument(t == m.dataType(), "Arrays must have same data type"); - } - - INDArray ret = Nd4j.create(t, new long[] {length}, order); - int linearIndex = 0; - - AtomicAllocator allocator = AtomicAllocator.getInstance(); - - - for (INDArray m : matrices) { - if (m.isEmpty()) - continue; - - CudaContext context = allocator.getFlowController().prepareAction(ret, m); - - if (m.ordering() == order && ret.elementWiseStride() == m.elementWiseStride() - && ret.elementWiseStride() == 1) { - // do memcpy in proper direction and forget about that - // FIXME: get rid of this - ((BaseCudaDataBuffer) m.data()).lazyAllocateHostPointer(); - allocator.memcpyAsync(ret.data(), new CudaPointer(allocator.getHostPointer(m).address()), - AllocationUtils.getRequiredMemory(AllocationUtils.buildAllocationShape(m)), - linearIndex * (m.data().dataType() == DataType.DOUBLE ? 8 - : m.data().dataType() == DataType.FLOAT ? 4 : 2)); - linearIndex += m.length(); - } else { - Pointer hostYShapeInfo = AddressRetriever.retrieveHostPointer(m.shapeInfoDataBuffer()); - - PointerPointer extras = new PointerPointer( - AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), context.getOldStream(), - allocator.getDeviceIdPointer(), null, - context.getBufferReduction(), context.getBufferScalar(), null, - hostYShapeInfo, AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer())); - - - nativeOps.flatten(extras, linearIndex, order, - null, - (LongPointer) allocator.getHostPointer(ret.shapeInfoDataBuffer()), - allocator.getPointer(ret, context), - (LongPointer) allocator.getPointer(ret.shapeInfoDataBuffer(), context), - null, - (LongPointer) allocator.getHostPointer(m.shapeInfoDataBuffer()), - allocator.getPointer(m, context), - (LongPointer) allocator.getPointer(m.shapeInfoDataBuffer(), context)); - - - - - //Works for all cases... - - /* NdIndexIterator iter = new NdIndexIterator(order, m.shape()); - while (iter.hasNext()) { - ret.putScalar(linearIndex++, m.getDouble(iter.next())); - }*/ - - linearIndex += m.length(); - } - - if (ret != null) - allocator.registerAction(context, ret, m); - } - return ret; + return Nd4j.exec(new Flatten(order, matrices.toArray(new INDArray[0])))[0]; } @Override @@ -412,131 +348,6 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { ((GridExecutioner) Nd4j.getExecutioner()).flushQueue(); return Nd4j.exec(new Concat(dimension, toConcat))[0]; - - // legacy implementation -/* - boolean allScalars = true; - - var outputShape = ArrayUtil.copy(toConcat[0].shape()); - - if (toConcat.length == 1) - return toConcat[0]; - - int sumAlongDim = 0; - for (int i = 0; i < toConcat.length; i++) { - if (toConcat[i].isCompressed()) - Nd4j.getCompressor().decompressi(toConcat[i]); - - allScalars &= toConcat[i].rank() == 0; - - sumAlongDim += toConcat[i].size(dimension); - } - - if (allScalars) { - outputShape = new long[]{sumAlongDim}; - } else { - outputShape[dimension] = sumAlongDim; - } - - INDArray ret = Nd4j.createUninitialized(toConcat[0].dataType(), outputShape, Nd4j.order()); - - AtomicAllocator allocator = AtomicAllocator.getInstance(); - - CudaContext context = allocator.getFlowController().prepareAction(ret, toConcat); - - val shapeInfoPointers = new long[toConcat.length]; - val dataPointers = new long[toConcat.length]; - val tadPointers = new long[toConcat.length]; - val offsetsPointers = new long[toConcat.length]; - val hostShapeInfoPointers = new long[toConcat.length]; - - TADManager tadManager = Nd4j.getExecutioner().getTADManager(); - for (int i = 0; i < toConcat.length; i++) { - shapeInfoPointers[i] = AddressRetriever.retrieveDeviceAddress(toConcat[i].shapeInfoDataBuffer(), context); - dataPointers[i] = AtomicAllocator.getInstance().getPointer(toConcat[i], context).address(); - hostShapeInfoPointers[i] = AtomicAllocator.getInstance().getHostPointer(toConcat[i].shapeInfoDataBuffer()).address(); - - sumAlongDim += toConcat[i].size(dimension); - for (int j = 0; j < toConcat[i].rank(); j++) - if (j != dimension && toConcat[i].size(j) != outputShape[j]) { - throw new IllegalArgumentException( - "Illegal concatenation at array " + i + " and shape element " + j); - } - - if (!allScalars) { - val tadBuffers = tadManager.getTADOnlyShapeInfo(toConcat[i], new int[]{dimension}); - - long devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context).address(); - - val offsets = tadBuffers.getSecond(); - long devTadOffsets = AtomicAllocator.getInstance().getPointer(offsets, context).address(); - - tadPointers[i] = devTadShapeInfo; - offsetsPointers[i] = devTadOffsets; - } - } - - // getting tadOnlyShape for result - val zBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[] {dimension}); - val hostPointers = new LongPointer(hostShapeInfoPointers); - val hosthost = new PointerPointerWrapper(hostPointers); - - //System.out.println("shapePointers: " + Arrays.toString(shapeInfoPointers)); - - val dZ = AtomicAllocator.getInstance().getPointer(ret, context); - val dZShapeInfo = AddressRetriever.retrieveDevicePointer(ret.shapeInfoDataBuffer(), context); - - - - //val tempData = new CudaDoubleDataBuffer(toConcat.length); - //val tempShapes = new CudaDoubleDataBuffer(toConcat.length); - //val tempTAD = new CudaDoubleDataBuffer(toConcat.length); - //val tempOffsets = new CudaDoubleDataBuffer(toConcat.length); - - //AtomicAllocator.getInstance().memcpyBlocking(tempData, new LongPointer(dataPointers), dataPointers.length * 8,0); - //AtomicAllocator.getInstance().memcpyBlocking(tempShapes, new LongPointer(shapeInfoPointers), shapeInfoPointers.length * 8, 0); - //AtomicAllocator.getInstance().memcpyBlocking(tempTAD, new LongPointer(tadPointers), tadPointers.length * 8, 0); - //AtomicAllocator.getInstance().memcpyBlocking(tempOffsets, new LongPointer(offsetsPointers), offsetsPointers.length * 8, 0); - - val dataPointer = new PointerPointerWrapper(new LongPointer(dataPointers)); //AtomicAllocator.getInstance().getPointer(tempData, context); - val shapesPointer = new PointerPointerWrapper(new LongPointer(shapeInfoPointers));//AtomicAllocator.getInstance().getPointer(tempShapes, context); - //val tadPointer = AtomicAllocator.getInstance().getPointer(tempTAD, context); - //val offsetPointer = AtomicAllocator.getInstance().getPointer(tempOffsets, context); - - - // System.out.println("ShapesPointer after conversion: " + shapesPointer); - - val extras = new PointerPointer(AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), - context.getOldStream(), allocator.getDeviceIdPointer(), null, - context.getBufferReduction(), context.getBufferScalar(), null, - AddressRetriever.retrieveHostPointer(toConcat[0].shapeInfoDataBuffer()), - AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), - new LongPointer(hostShapeInfoPointers), - AtomicAllocator.getInstance().getPointer(zBuffers.getFirst(), context), // getting zTADShape - AtomicAllocator.getInstance().getPointer(zBuffers.getSecond(), context) // getting zOffset - ); - - - nativeOps.concat(extras, - dimension, - toConcat.length, - null, - hosthost, - dataPointer, - shapesPointer, - null, - (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), - dZ, - (LongPointer) dZShapeInfo, - null, - null); - - - allocator.registerAction(context, ret, toConcat); - - return ret; - //return super.concat(dimension, toConcat); - */ } @@ -590,6 +401,8 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), null, null); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AllocationPoint point = allocator.getAllocationPoint(ret); @@ -598,6 +411,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { nativeOps.memcpyAsync(point.getDevicePointer(), point.getHostPointer(), ret.lengthLong() * Nd4j.sizeOfDataType(ret.data().dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getSpecialStream()); context.getSpecialStream().synchronize(); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + PerformanceTracker.getInstance().helperRegisterTransaction(point.getDeviceId(), perfD, point.getNumberOfBytes(), MemcpyDirection.HOST_TO_DEVICE); point.tickHostRead(); @@ -729,6 +545,8 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { (LongPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets)); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); allocator.registerAction(context, ret, source); @@ -743,7 +561,6 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { return target.assign(arrays[0]); // we do averaging on GPU only if ALL devices have p2p links - //if (CudaEnvironment.getInstance().getConfiguration().isCrossDeviceAccessAllowed() && nativeOps.isP2PAvailable()) { if (true) { Nd4j.getExecutioner().push(); @@ -781,6 +598,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { nativeOps.accumulate(extras, null, (LongPointer) arrays[0].shapeInfoDataBuffer().addressPointer(), x, null, null, (LongPointer) allocator.getHostPointer(target.shapeInfoDataBuffer()) , z, (LongPointer) allocator.getPointer(target.shapeInfoDataBuffer()), arrays.length, len); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + allocator.getFlowController().registerAction(context, target, arrays); return target; @@ -824,6 +644,8 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { arrays.length, len); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().getAllocationPoint(target).tickHostWrite(); @@ -895,6 +717,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { arrays.length, len, true); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + allocator.getFlowController().registerAction(context, target, arrays); return target; @@ -940,6 +765,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { arrays.length, len, true); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + if (target != null) AtomicAllocator.getInstance().getAllocationPoint(target).tickHostWrite(); @@ -1115,6 +943,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { (IntPointer) shuffleMap, new PointerPointer(allocator.getPointer(tempTAD, context)), new PointerPointer(allocator.getPointer(tempOffsets, context))); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + for (int f = 0; f < arrays.size(); f++) { allocator.getFlowController().registerAction(context, arrays.get(f)); } @@ -1260,6 +1091,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { val p = new PointerPointer<>(new Pointer[]{null, stream}); nativeOps.convertTypes(p, typeSrc.ordinal(), source, length, typeDst.ordinal(), target); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } @Override @@ -1277,7 +1111,13 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { srcPtr = nativeOps.mallocDevice(ssize, 0, 0); dstPtr = nativeOps.mallocDevice(size, 0, 0); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + nativeOps.memcpyAsync(srcPtr, source, ssize, CudaConstants.cudaMemcpyHostToDevice, stream); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } else { // decompressing throw new UnsupportedOperationException(); @@ -1288,9 +1128,15 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { stream.synchronize(); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + if (buffer instanceof CompressedDataBuffer) { nativeOps.freeDevice(srcPtr, 0); nativeOps.freeDevice(dstPtr, 0); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } } @@ -1309,13 +1155,15 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { val size = ((CompressedDataBuffer) source).getCompressionDescriptor().getCompressedLength(); srcPtr = ws.alloc(size, MemoryKind.DEVICE, DataType.HALF, false); nativeOps.memcpyAsync(srcPtr, source.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, stream); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } // if true - we're compressing into host memory if (target instanceof CompressedDataBuffer) { val size = ((CompressedDataBuffer) target).getCompressionDescriptor().getCompressedLength(); dstPtr = ws.alloc(size, MemoryKind.DEVICE, DataType.HALF, false); - //nativeOps.memcpyAsync(dstPtr, target.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, stream); } } else { // if true - we're decompressing from host memory @@ -1325,6 +1173,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { srcPtr = nativeOps.mallocDevice(size, 0, 0); nativeOps.memcpyAsync(srcPtr, source.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, stream); stream.synchronize(); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } else srcPtr = AtomicAllocator.getInstance().getPointer(source); @@ -1333,8 +1184,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { log.info("Replacing target ptr"); val size = ((CompressedDataBuffer) target).getCompressionDescriptor().getCompressedLength(); dstPtr = nativeOps.mallocDevice(size, 0, 0); - //nativeOps.memcpyAsync(dstPtr, source.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, stream); - //stream.synchronize(); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } else dstPtr = AtomicAllocator.getInstance().getPointer(target); } @@ -1342,6 +1194,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { convertDataEx(typeSrc, srcPtr, typeDst, dstPtr, target.length()); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + Nd4j.getExecutioner().commit(); @@ -1364,6 +1219,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + Nd4j.getExecutioner().commit(); } @@ -1462,6 +1320,9 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)) ); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().getFlowController().registerActionAllWrite(context, result); AtomicAllocator.getInstance().getFlowController().registerAction(context,null, result); @@ -1517,6 +1378,8 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { descending ); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().getFlowController().registerAction(context, x); @@ -1565,6 +1428,8 @@ public class JCublasNDArrayFactory extends BaseNativeNDArrayFactory { descending ); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().getFlowController().registerAction(context, x); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java index 789f0f1a3..38a1ba382 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java @@ -207,6 +207,10 @@ public class CudaExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException("Unknown op type: " + op.getOpType()); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + + AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); profilingConfigurableHookOut(op, st); @@ -461,6 +465,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { } } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + profilingConfigurableHookOut(op, st); return op.z(); @@ -619,7 +626,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { AtomicAllocator.getInstance().getPointer(op.dimensions(), context), null); - + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); @@ -777,6 +785,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException("Unknown opType: " + op.getOpType()); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); profilingConfigurableHookOut(op, st); @@ -868,6 +879,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { AtomicAllocator.getInstance().registerAction(context, null, op.x(), op.y()); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + profilingConfigurableHookOut(op, st); return null; @@ -1105,6 +1119,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); profilingConfigurableHookOut(op, st); @@ -1194,6 +1210,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException(); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().getFlowController().registerAction(context, op.z(), op.x(), op.y()); profilingConfigurableHookOut(op, st); @@ -1268,6 +1287,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException("Unknown op type: " + op.getOpType()); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.scalar()); profilingConfigurableHookOut(op, st); @@ -1423,6 +1445,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { } } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); @@ -1582,6 +1606,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { batch.getSample().maxIndexArguments(), batch.getSample().maxRealArguments(), AtomicAllocator.getInstance().getPointer(surfaceBuffer, context), FlatBuffersMapper.getDataTypeAsByte(dataType)); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + surfacePoint.tickHostWrite(); } @@ -1676,6 +1703,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { numIndexArguments, iPtr, numIntArrays, AtomicAllocator.getInstance().getPointer(realsBuffer.data(), context), numRealArguments, FlatBuffersMapper.getDataTypeAsByte(dataType)); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } /** @@ -1739,6 +1769,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(op.z().dataType()), context)); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().getFlowController().registerAction(context, op.z(), op.x(), op.y()); profilingConfigurableHookOut(op, st); @@ -1969,6 +2002,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { nativeOps.decodeThreshold(extras, AtomicAllocator.getInstance().getPointer(buffer), compressedLength, AtomicAllocator.getInstance().getPointer(result), (LongPointer) AtomicAllocator.getInstance().getHostPointer(target.shapeInfoDataBuffer())); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().getAllocationPoint(result).tickDeviceWrite(); return target; @@ -2013,7 +2049,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { (IntPointer) AtomicAllocator.getInstance().getPointer(buffer, context), (float) threshold); - + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().getFlowController().registerAction(context, indArray); @@ -2039,6 +2076,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { nativeOps.decodeBitmap(extras, AtomicAllocator.getInstance().getPointer(encoded.data(), context), target.lengthLong(), AtomicAllocator.getInstance().getPointer(target, context), (LongPointer) AtomicAllocator.getInstance().getHostPointer(target.shapeInfoDataBuffer())); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); AtomicAllocator.getInstance().getFlowController().registerAction(context, target); @@ -2151,6 +2190,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { OpaqueShapeList ptrptr = nativeOps.calculateOutputShapes2(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length, bArgs, op.numBArguments()); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + if (ptrptr == null) throw new RuntimeException(); @@ -2221,109 +2263,6 @@ public class CudaExecutioner extends DefaultOpExecutioner { } catch (Exception e) { throw new RuntimeException("Op [" + name + "] execution failed", e); } - - /* - long st = profilingConfigurableHookIn(op); - - CudaContext context =(CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); - //AtomicAllocator.getInstance().getFlowController().prepareActionAllWrite(op.outputArguments()); - - if (extraz.get() == null) - extraz.set(new PointerPointer(32)); - - - PointerPointer extras = extraz.get().put( - new CudaPointer(1), - context.getOldStream(), - context.getBufferScalar(), - context.getBufferReduction()); - - val outputArgs = op.outputArguments(); - val inputArgs = op.inputArguments(); - - if (outputArgs.length == 0 && !op.isInplaceCall()) - throw new ND4JIllegalStateException("You can't execute non-inplace CustomOp without outputs being specified"); - - val lc = op.opName().toLowerCase(); - val hash = op.opHash(); - - - val inputShapes = new PointerPointer<>(inputArgs.length * 2); - val inputBuffers = new PointerPointer<>(inputArgs.length * 2); - - int cnt= 0; - for (val in: inputArgs) { - val hp = AtomicAllocator.getInstance().getHostPointer(in.shapeInfoDataBuffer()); - inputBuffers.put(cnt, AtomicAllocator.getInstance().getHostPointer(in)); - inputShapes.put(cnt, hp); - - - val dp = AtomicAllocator.getInstance().getPointer(in.shapeInfoDataBuffer(), context); - - inputBuffers.put(cnt + inputArgs.length, AtomicAllocator.getInstance().getPointer(in, context)); - inputShapes.put(cnt+ inputArgs.length, dp); - - if (op.isInplaceCall()) { - val ap = AtomicAllocator.getInstance().getAllocationPoint(in); - if (ap != null) - ap.tickHostWrite(); - } - - cnt++; - } - - - val outputShapes = new PointerPointer<>(outputArgs.length * 2); - val outputBuffers = new PointerPointer<>(outputArgs.length * 2); - - cnt= 0; - for (val out: outputArgs) { - outputBuffers.put(cnt, AtomicAllocator.getInstance().getHostPointer(out)); - outputShapes.put(cnt, AtomicAllocator.getInstance().getHostPointer(out.shapeInfoDataBuffer())); - - outputBuffers.put(cnt + outputArgs.length, AtomicAllocator.getInstance().getPointer(out, context)); - outputShapes.put(cnt + outputArgs.length, AtomicAllocator.getInstance().getPointer(out.shapeInfoDataBuffer(), context)); - - val ap = AtomicAllocator.getInstance().getAllocationPoint(out); - - if (ap != null) - ap.tickHostWrite(); - - cnt++; - } - - val iArgs = op.iArgs().length > 0 ? new LongPointer(op.iArgs().length) : null; - - cnt = 0; - for (val i: op.iArgs()) - iArgs.put(cnt++, i); - - - val tArgs = op.tArgs().length > 0 ? new DoublePointer(op.tArgs().length) : null; - - val bArgs = op.bArgs().length > 0 ? new BooleanPointer(op.numBArguments()) : null; - - cnt = 0; - for (val t: op.tArgs()) - tArgs.put(cnt++, t); - - cnt = 0; - for (val b: op.bArgs()) - bArgs.put(cnt++, b); - - try { - val status = OpStatus.byNumber(nativeOps.execCustomOp(extras, hash, inputBuffers, inputShapes, inputArgs.length, outputBuffers, outputShapes, outputArgs.length, tArgs, op.tArgs().length, iArgs, op.iArgs().length, bArgs, op.numBArguments(), op.isInplaceCall())); - if (status != OpStatus.ND4J_STATUS_OK) - throw new ND4JIllegalStateException("Op execution failed: " + status); - } catch (Exception e) { - throw new RuntimeException("Op [" + op.opName() + "] execution failed"); - } - - //AtomicAllocator.getInstance().getFlowController().prepareActionAllWrite(op.outputArguments()); - - profilingConfigurableHookOut(op, st); - return op.outputArguments(); - */ } @Override @@ -2341,6 +2280,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { @Override public void registerGraph(long id, Pointer graph) { nativeOps.registerGraph(null, id, graph); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } @Override @@ -2368,6 +2310,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { OpaqueVariablesSet result = nativeOps.executeStoredGraph(null, id, ptrBuffers, ptrShapes, ptrIndices, map.size()); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + OpStatus status = OpStatus.byNumber(nativeOps.getVariablesSetStatus(result)); if (status != OpStatus.ND4J_STATUS_OK) @@ -2398,6 +2343,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { newMap.put(nodeName, array); } + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + nativeOps.deleteVariablesSet(result); return newMap; @@ -2406,6 +2354,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { @Override public void forgetGraph(long id) { nativeOps.unregisterGraph(null, id); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } /** @@ -2474,6 +2425,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { null, (LongPointer) AtomicAllocator.getInstance().getHostPointer(tadY.getFirst()), null, AtomicAllocator.getInstance().getPointer(updates, context), (LongPointer) AtomicAllocator.getInstance().getPointer(tadY.getFirst()), (LongPointer) AtomicAllocator.getInstance().getPointer(tadY.getSecond()), null, (IntPointer) AtomicAllocator.getInstance().getPointer(indices, context)); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + AtomicAllocator.getInstance().getFlowController().registerAction(context, array, indices, updates); } @@ -2490,9 +2444,14 @@ public class CudaExecutioner extends DefaultOpExecutioner { ((CudaOpContext) context).setCudaStream(ctx.getOldStream(), ctx.getBufferReduction(), ctx.getBufferAllocation()); val status = nativeOps.execCustomOp2(null, op.opHash(), context.contextPointer()); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + if (status != 0) throw new RuntimeException("Op [" + op.opName() + "] execution failed"); + + for (val arr:op.outputArguments()) AtomicAllocator.getInstance().registerAction(ctx, arr); @@ -2527,6 +2486,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { nativeOps.inspectArray(extras, AtomicAllocator.getInstance().getHostPointer(array), (LongPointer) AtomicAllocator.getInstance().getHostPointer(array.shapeInfoDataBuffer()), AtomicAllocator.getInstance().getPointer(array, ctx), (LongPointer) AtomicAllocator.getInstance().getPointer(array.shapeInfoDataBuffer()), debugInfo); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + return INDArrayStatistics.builder() .minValue(debugInfo._minValue()) .maxValue(debugInfo._maxValue()) @@ -2545,6 +2507,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { public DataBuffer createShapeInfo(long[] shape, long[] stride, long elementWiseStride, char order, DataType dtype, boolean empty) { OpaqueConstantDataBuffer dbf = nativeOps.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + val result = new CudaLongDataBuffer(nativeOps.getConstantDataBufferPrimary(dbf), nativeOps.getConstantDataBufferSpecial(dbf), Shape.shapeInfoLength(shape.length)); nativeOps.deleteShapeBuffer(dbf); @@ -2556,6 +2521,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { public TadPack tadShapeInfoAndOffsets(INDArray array, int[] dimension) { OpaqueTadPack pack = nativeOps.tadOnlyShapeInfo((LongPointer) array.shapeInfoDataBuffer().addressPointer(), new IntPointer(dimension), dimension.length); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + val tadShape = new CudaLongDataBuffer(nativeOps.getPrimaryShapeInfo(pack), nativeOps.getSpecialShapeInfo(pack), nativeOps.getShapeInfoLength(pack)); val tadOffsets = new CudaLongDataBuffer(nativeOps.getPrimaryOffsets(pack), nativeOps.getSpecialOffsets(pack), nativeOps.getNumberOfTads(pack)); @@ -2568,6 +2536,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { public DataBuffer createConstantBuffer(long[] values, DataType desiredType) { OpaqueConstantDataBuffer dbf = nativeOps.constantBufferLong(desiredType.toInt(), new LongPointer(values), values.length); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + val buffer = Nd4j.createBuffer(nativeOps.getConstantDataBufferPrimary(dbf), nativeOps.getConstantDataBufferSpecial(dbf), values.length, desiredType); buffer.setConstant(true); @@ -2578,6 +2549,9 @@ public class CudaExecutioner extends DefaultOpExecutioner { public DataBuffer createConstantBuffer(double[] values, DataType desiredType) { OpaqueConstantDataBuffer dbf = nativeOps.constantBufferDouble(desiredType.toInt(), new DoublePointer(values), values.length); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + val buffer = Nd4j.createBuffer(nativeOps.getConstantDataBufferPrimary(dbf), nativeOps.getConstantDataBufferSpecial(dbf), values.length, desiredType); buffer.setConstant(true); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java index 47cfa2584..603413fd6 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java @@ -449,6 +449,60 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper { // #endif //DEV_TESTS_TADPACK_H +// Parsed from execution/ErrorReference.h + +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef DEV_TESTS_ERRORREFERENCE_H +// #define DEV_TESTS_ERRORREFERENCE_H + +// #include +// #include + @Namespace("sd") @NoOffset public static class ErrorReference extends Pointer { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public ErrorReference(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public ErrorReference(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public ErrorReference position(long position) { + return (ErrorReference)super.position(position); + } + + public ErrorReference() { super((Pointer)null); allocate(); } + private native void allocate(); + + public native int errorCode(); + public native @Cast("char*") String errorMessage(); + + public native void setErrorCode(int errorCode); + public native void setErrorMessage(@StdString BytePointer message); + public native void setErrorMessage(@StdString String message); + } + + + +// #endif //DEV_TESTS_ERRORREFERENCE_H + + // Parsed from memory/MemoryType.h // @@ -688,6 +742,18 @@ bool verbose = false; // #include // #include +/** + * This function returns last error code stored, + * @return non-zero if something bad happened + */ +public native int lastErrorCode(); + +/** + * This function returns last error message, if last error code > 0 + * @return + */ +public native @Cast("char*") String lastErrorMessage(); + /** * * @param p @@ -1710,72 +1776,6 @@ public native void execScalarBoolTad(@Cast("Nd4jPointer*") PointerPointer extraP @Cast("Nd4jLong*") long[] tadShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets, @Cast("Nd4jLong*") long[] tadShapeInfoZ, @Cast("Nd4jLong*") long[] tadOffsetsZ); - -/** -* Append an input array -* to the end of a flat array -* in a particular order -* @param offset the offset of the array to start at -* @param order the order -* @param result the result array -* @param resultShapeInfo the shape info for te array -* @param input the input for the array -* @param inputShapeInfo the shape information for that array -*/ -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") LongPointer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongPointer dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") LongPointer inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") LongPointer dinputShapeInfo); -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") LongBuffer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongBuffer dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") LongBuffer inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") LongBuffer dinputShapeInfo); -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") long[] resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") long[] dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") long[] inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") long[] dinputShapeInfo); - -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") LongPointer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongPointer dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") LongBuffer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongBuffer dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") long[] resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") long[] dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); - - public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer extraPointers, int dimension, @@ -9950,6 +9950,7 @@ public static final int PREALLOC_SIZE = 33554432; // #include // #include +// #include @Namespace("nd4j") @NoOffset public static class ContextBuffers extends Pointer { static { Loader.load(); } /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ @@ -9985,6 +9986,8 @@ public static final int PREALLOC_SIZE = 33554432; public native void setScalarBuffer(Pointer pointer); public native void setAllocationBuffer(Pointer pointer); + public native ErrorReference errorReference(); + public native void triggerOwnership(@Cast("bool") boolean isOwner); public native int deviceId(); @@ -10038,6 +10041,7 @@ public static final int PREALLOC_SIZE = 33554432; // #include // #include // #include +// #include @Namespace("nd4j") @NoOffset public static class LaunchContext extends Pointer { static { Loader.load(); } @@ -10067,9 +10071,12 @@ public static final int PREALLOC_SIZE = 33554432; public native int getDeviceID(); public native void setDeviceID(int deviceID); + public native ErrorReference errorReference(); public static native @Cast("bool") boolean isInitialized(); public static native void releaseBuffers(); + + public static native LaunchContext defaultContext(); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java index 4466cf4b5..8f95fe5cb 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCudaPresets.java @@ -32,6 +32,7 @@ import org.bytedeco.javacpp.tools.InfoMapper; "array/ConstantDescriptor.h", "array/ConstantDataBuffer.h", "array/TadPack.h", + "execution/ErrorReference.h", "memory/MemoryType.h", "Environment.h", "types/utf8string.h", diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/CpuNDArrayFactory.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/CpuNDArrayFactory.java index 2b47103c3..cacf32b38 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/CpuNDArrayFactory.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/CpuNDArrayFactory.java @@ -106,6 +106,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { functions.put(8, Loader.addressof("LAPACKE_sgesdd")); functions.put(9, Loader.addressof("LAPACKE_dgesdd")); nativeOps.initializeFunctions(functions); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } @Override @@ -489,32 +492,7 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { @Override public INDArray toFlattened(char order, Collection matrices) { Preconditions.checkArgument(matrices.size() > 0, "toFlattened expects > 0 operands"); -/* - int length = 0; - val list = new ArrayList(matrices); - val t = list.get(0).dataType(); - for (INDArray m : matrices) { - length += m.length(); - Preconditions.checkArgument(m.dataType() == t, "All operands must have same data type"); - } - INDArray ret = Nd4j.create(t, new long[] {length}, order); - int linearIndex = 0; - PointerPointer dummy = new PointerPointer(new Pointer[] {null}); - for (INDArray m : matrices) { - Nd4j.getCompressor().autoDecompress(m); - - nativeOps.flatten(dummy, linearIndex, order, - ret.data().addressPointer(), (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), - null, null, - m.data().addressPointer(), - (LongPointer) m.shapeInfoDataBuffer().addressPointer(), - null, null); - - linearIndex += m.length(); - } - return ret; - */ return Nd4j.exec(new Flatten(order, matrices.toArray(new INDArray[matrices.size()])))[0]; } @@ -555,6 +533,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { new LongPointerWrapper(tadBuffers.getSecond().pointer()) ); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + return result; } @@ -574,65 +555,6 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { return toConcat[0]; return Nd4j.exec(new Concat(dimension, toConcat))[0]; - - // legacy implementation -/* - // if reusable var wasn't created for this thread, or is smaller then needed - set it to new value - if (extrazA.get() == null || extrazB.get() == null || extrazSize.get() == null || extrazSize.get() < toConcat.length) { - extrazA.set(new PointerPointer(toConcat.length)); - extrazB.set(new PointerPointer(toConcat.length)); - extrazSize.set(toConcat.length); - } - - PointerPointer shapeInfoPointers = extrazA.get(); - PointerPointer dataPointers = extrazB.get(); - int sumAlongDim = 0; - - long[] outputShape = ArrayUtil.copy(toConcat[0].shape()); - - boolean allScalars = true; - - for (int i = 0; i < toConcat.length; i++) { - Preconditions.checkState(toConcat[i].rank() == outputShape.length, "Encountered different array ranks for concat: input[0].shape()=%ndShape, input[%s].shape()=%ndShape", - toConcat[0], i, toConcat[i]); - - if (toConcat[i].isCompressed()) - Nd4j.getCompressor().decompressi(toConcat[i]); - - Preconditions.checkArgument(toConcat[i].dataType() == toConcat[0].dataType(), "All operands must have same data type: input 0 has type %s, input %s has type %s", - toConcat[0].dataType(), i, toConcat[i].dataType()); - - allScalars &= toConcat[i].rank() == 0; - - shapeInfoPointers.put(i, toConcat[i].shapeInfoDataBuffer().addressPointer()); - dataPointers.put(i, toConcat[i].data().addressPointer()); - sumAlongDim += toConcat[i].size(dimension); - for (int j = 0; j < toConcat[i].rank(); j++) { - - if (j != dimension && toConcat[i].size(j) != outputShape[j]) { - throw new IllegalArgumentException( - "Illegal concatenation at array " + i + " and shape element " + j); - } - } - } - - if (allScalars) { - outputShape = new long[]{sumAlongDim}; - } else { - outputShape[dimension] = sumAlongDim; - } - - INDArray ret = Nd4j.createUninitialized(toConcat[0].dataType(), outputShape, Nd4j.order()); - - nativeOps.concat(null, dimension, toConcat.length, - dataPointers, shapeInfoPointers, - null, null, - ret.data().addressPointer(), (LongPointer) ret.shapeInfoDataBuffer().addressPointer(), - null, null, - null, null); - - return ret; - */ } @@ -757,6 +679,8 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { (LongPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets)); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); return ret; } @@ -794,6 +718,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { arrays.length, len); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + return target; } @@ -846,6 +773,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { len, true); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + return target; } @@ -983,6 +913,8 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { arrays.size(), ptrMap, tadPointers, offsetPointers); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); dataPointers.address(); shapePointers.address(); @@ -990,84 +922,6 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { offsetPointers.address(); } - - /** - * This method converts Half-precision databuffer to current dType buffer. - * - * @param buffer - * @return - */ - /* - @Override - public DataBuffer restoreFromHalfs(DataBuffer buffer) { - if (buffer.dataType() != DataType.COMPRESSED) - throw new IllegalStateException("DataBuffer contains wrong data: " + buffer.dataType()); - - CompressedDataBuffer comp = (CompressedDataBuffer) buffer; - CompressionDescriptor descriptor = comp.getCompressionDescriptor(); - - DataBuffer targetBuffer = Nd4j.createBuffer(descriptor.getCompressedLength() / 2); - - if (Nd4j.dataType() == DataType.DOUBLE) { - nativeOps.convertHalfsToDoubles( - null, - comp.addressPointer(), - (int) descriptor.getCompressedLength() / 2, - targetBuffer.addressPointer() - ); - } else if (Nd4j.dataType() == DataType.FLOAT) { - nativeOps.convertHalfsToFloats( - null, - comp.addressPointer(), - (int) descriptor.getCompressedLength() / 2, - targetBuffer.addressPointer() - ); - } else { - throw new UnsupportedOperationException("Target dtype isn't supported: " + Nd4j.dataType()); - } - - return targetBuffer; - } - */ - - /** - * This method converts Single/Double precision databuffer to Half-precision databuffer - * - * @param buffer - * @return - */ - /*@Override - public DataBuffer convertToHalfs(DataBuffer buffer) { - // we allocate pointer - ShortPointer pointer = new ShortPointer(buffer.length()); - - if (buffer.dataType() == DataType.DOUBLE) { - nativeOps.convertDoublesToHalfs( - null, - buffer.addressPointer(), - (int) buffer.length(), - pointer - ); - } else if (buffer.dataType() == DataType.FLOAT) { - nativeOps.convertFloatsToHalfs( - null, - buffer.addressPointer(), - (int) buffer.length(), - pointer - ); - } else { - throw new UnsupportedOperationException("Source dtype isn't supported: " + buffer.dataType()); - } - - CompressionDescriptor descriptor = new CompressionDescriptor(buffer, new Float16()); - descriptor.setCompressedLength(buffer.length() * 2); - - - CompressedDataBuffer result = new CompressedDataBuffer(pointer, descriptor); - return result; - } - */ - /** * This method converts Single/Double precision databuffer to Half-precision databuffer * @@ -1081,6 +935,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { throw new UnsupportedOperationException("Impossible to compress View. Consider using dup() before. "); DataBuffer buffer = convertDataEx(typeSrc, source.data(), typeDst); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + source.setData(buffer); if (buffer instanceof CompressedDataBuffer) @@ -1125,6 +982,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { convertDataEx(typeSrc, source, typeDst, buffer); + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); + return buffer; } @@ -1132,6 +992,9 @@ public class CpuNDArrayFactory extends BaseNativeNDArrayFactory { public void convertDataEx(DataTypeEx typeSrc, Pointer source, DataTypeEx typeDst, Pointer target, long length) { nativeOps.convertTypes(null, typeSrc.ordinal(), source, length, typeDst.ordinal(), target); + + if (nativeOps.lastErrorCode() != 0) + throw new RuntimeException(nativeOps.lastErrorMessage()); } @Override diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java index 11373c440..e79c21feb 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/NativeOpExecutioner.java @@ -234,6 +234,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { null); } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + profilingConfigurableHookOut(op, st); return op.z(); } @@ -563,6 +566,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { } } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + return ret; } @@ -644,6 +650,8 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException(); } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } public INDArray exec(ScalarOp op) { @@ -690,6 +698,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { throw new ND4JIllegalStateException("Unknown op type: [" + op.getOpType() +"]"); } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + profilingConfigurableHookOut(op, st); return op.z(); @@ -886,6 +897,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + profilingConfigurableHookOut(op, st); } @@ -962,6 +976,8 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { throw new UnsupportedOperationException("Unknown operation type: [" + op.getOpType() + "]"); } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); return op.z(); } @@ -1091,6 +1107,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { batch.getSample().maxIntArrays(), batch.getSample().maxIntArraySize(), batch.getSample().maxIndexArguments(), batch.getSample().maxRealArguments(), pointer, FlatBuffersMapper.getDataTypeAsByte(dataType)); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + } /** @@ -1197,6 +1216,8 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { numIndexArguments, intArrays, numIntArrays, block.getRealArgumentsPointer(), numRealArguments, FlatBuffersMapper.getDataTypeAsByte(dataType)); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } /** @@ -1284,6 +1305,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { op.extraArgsDataBuff(op.z().dataType()).addressPointer()); } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + profilingConfigurableHookOut(op, st); return op.z(); @@ -1370,6 +1394,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { (float) threshold); //long t2 = System.currentTimeMillis(); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + if (cntAbs < 2) return null; @@ -1429,6 +1456,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { loop.convertTypes(null, DataTypeEx.THRESHOLD.ordinal(), buffer.addressPointer(), target.length(), typeDst.ordinal(), target.data().addressPointer()); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + return target; } @@ -1460,6 +1490,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { (IntPointer) buffer.addressPointer(), (float) threshold); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + return affected; } @@ -1473,6 +1506,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { (LongPointer) target.shapeInfoDataBuffer().addressPointer() ); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + return target; } @@ -1673,136 +1709,6 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { } catch (Exception e) { throw new RuntimeException("Op [" + name + "] execution failed", e); } -/* - val name = op.opName().toLowerCase(); - val hash = op.opHash(); - - if (name.equals("noop")) { - return op.outputArguments(); - } - - val inputShapes = getInputShapes(op.numInputArguments()); - val inputBuffers = getInputBuffers(op.numInputArguments()); - - int cnt= 0; - val inputArgs = op.inputArguments(); - for (val in: inputArgs) { - if(in == null) - throw new NullPointerException("Input argument is null for op " + op.getClass().getName()); - - if (!in.isEmpty()) - inputBuffers.put(cnt, in.data().addressPointer()); - - inputShapes.put(cnt++, in.shapeInfoDataBuffer().addressPointer()); - } - - val outputArgs = op.outputArguments(); - for(int i = 0; i < outputArgs.length; i++) { - if(outputArgs[i] == null) - throw new ND4JIllegalStateException("Op output arguments must not be null! Op " + op.getClass().getName()); - } - - - val outputShapes = getOutputShapes(op.numOutputArguments()); - val outputBuffers = getOutputBuffers(op.numOutputArguments()); - - cnt= 0; - for (val out: outputArgs) { - if(out.isEmpty()){ - outputBuffers.put(cnt, null); - } else { - outputBuffers.put(cnt, out.data().addressPointer()); - } - outputShapes.put(cnt++, out.shapeInfoDataBuffer().addressPointer()); - } - - val iArgs = op.numIArguments() > 0 ? getLongPointerFrom(iArgsPointer,op.numIArguments()) : null; - val tArgs = op.numTArguments() > 0 ? getDoublePointerFrom(tArgsPointer,op.numTArguments()) : null; - val bArgs = op.numBArguments() > 0 ? getBooleanPointerFrom(bArgsPointer,op.numBArguments()) : null; - - cnt = 0; - val iArgs1 = op.iArgs(); - for (val i: iArgs1) - iArgs.put(cnt++, i); - - cnt = 0; - val bArgs1 = op.bArgs(); - for (val b: bArgs1) - bArgs.put(cnt++, b); - - cnt = 0; - val tArgs1 = op.tArgs(); - for (val t: tArgs1) - tArgs.put(cnt++, t); - - val t = op.numInputArguments(); - - OpStatus status = OpStatus.ND4J_STATUS_OK; - try { - val code = loop.execCustomOp( - null, - hash, - inputBuffers, - inputShapes, - op.numInputArguments(), - outputBuffers, - outputShapes, - op.numOutputArguments(), - tArgs, op.numTArguments(), - iArgs, op.numIArguments(), - bArgs, op.numBArguments(), - op.isInplaceCall()); - - status = OpStatus.byNumber(code); - - if (status != OpStatus.ND4J_STATUS_OK) - throw new ND4JIllegalStateException("Failed to execute op [" + name + "] with error code [" + status +"]"); - }catch(Exception e) { - val sb = new StringBuilder(); - sb.append("Inputs: [("); - for( int i=0; i 0) - sb.append("), ("); - sb.append(Shape.shapeToStringShort(inputArgs[i])); - } - sb.append(")]. Outputs: [("); - for( int i=0; i 0) - sb.append("), ("); - sb.append(Shape.shapeToStringShort(outputArgs[i])); - } - sb.append(")]. tArgs: "); - if(op.numTArguments() > 0){ - sb.append(Arrays.toString(op.tArgs())); - } else { - sb.append("-"); - } - sb.append(". iArgs: "); - if(op.numIArguments() > 0){ - sb.append(Arrays.toString(op.iArgs())); - } else { - sb.append("-"); - } - if(op instanceof DifferentialFunction){ - String n = ((DifferentialFunction) op).getOwnName(); - if(n != null && !n.equals(op.opName())){ - sb.append(". Op own name: \"").append(n).append("\""); - } - } - log.error("Failed to execute op " + op.opName() + ". Attempted to execute with " + - String.valueOf(op.numInputArguments()) + " inputs, " + - String.valueOf(op.numOutputArguments()) + " outputs, "+ - String.valueOf(op.numTArguments()) + " targs and " + - String.valueOf(op.numIArguments()) + " iargs. " + - sb.toString() + - " - Please see above message (printed out from c++) for a possible cause of error."); - throw e; - } - - profilingConfigurableHookOut(op, st); - - return op.outputArguments(); - */ } protected LongShapeDescriptor getShapeFromPointer(LongPointer ptr) { @@ -1870,6 +1776,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { ptrptr = loop.calculateOutputShapes2(null, hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs, op.numTArguments(), iArgs, op.numIArguments(), bArgs, op.numBArguments()); + + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } catch (Throwable t){ StringBuilder sb = new StringBuilder(); sb.append("Inputs: [("); @@ -1893,6 +1802,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { throw t; } + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + if (ptrptr == null) throw new RuntimeException(); @@ -1929,6 +1841,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { @Override public void registerGraph(long id, Pointer graph) { loop.registerGraph(null, id, graph); + + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } @Override @@ -1952,7 +1867,10 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { val newMap = new LinkedHashMap(); - OpaqueVariablesSet result = loop.executeStoredGraph(null, id, ptrBuffers, ptrShapes, ptrIndices, map.size()); + OpaqueVariablesSet result = loop.executeStoredGraph(null, id, ptrBuffers, ptrShapes, ptrIndices, map.size()); + + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); OpStatus status = OpStatus.byNumber(loop.getVariablesSetStatus(result)); @@ -1996,6 +1914,8 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { @Override public void forgetGraph(long id) { loop.unregisterGraph(null, id); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } /** @@ -2055,6 +1975,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { array.data().addressPointer(), (LongPointer) tadX.getFirst().addressPointer(), (LongPointer) tadX.getSecond().addressPointer(), null, null, null, updates.data().addressPointer(), (LongPointer) tadY.getFirst().addressPointer(), (LongPointer) tadY.getSecond().addressPointer(), null, null, null, (IntPointer) indices.data().addressPointer(), null); + + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); } @Override @@ -2078,6 +2001,10 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { val status = loop.execCustomOp2(null, op.opHash(), context.contextPointer()); + + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + if (status != 0) throw new RuntimeException("Op [" + op.opName() + "] execution failed"); @@ -2155,6 +2082,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { loop.inspectArray(null, array.data().addressPointer(), (LongPointer) array.shapeInfoDataBuffer().addressPointer(), null, null, debugInfo); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + return INDArrayStatistics.builder() .minValue(debugInfo._minValue()) .maxValue(debugInfo._maxValue()) @@ -2171,6 +2101,8 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { @Override public DataBuffer createShapeInfo(long[] shape, long[] stride, long elementWiseStride, char order, DataType dtype, boolean empty) { OpaqueConstantDataBuffer dbf = loop.shapeBuffer(shape.length, new LongPointer(shape), new LongPointer(stride), dtype.toInt(), order, elementWiseStride, empty); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); val result = new LongBuffer(loop.getConstantDataBufferPrimary(dbf), Shape.shapeInfoLength(shape.length)); @@ -2183,6 +2115,9 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { public TadPack tadShapeInfoAndOffsets(INDArray array, int[] dimension) { OpaqueTadPack pack = loop.tadOnlyShapeInfo((LongPointer) array.shapeInfoDataBuffer().addressPointer(), new IntPointer(dimension), dimension.length); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + val tadShape = new LongBuffer(loop.getPrimaryShapeInfo(pack), loop.getShapeInfoLength(pack)); val tadOffsets = new LongBuffer(loop.getPrimaryOffsets(pack), loop.getNumberOfTads(pack)); @@ -2205,11 +2140,19 @@ public class NativeOpExecutioner extends DefaultOpExecutioner { @Override public String runLightBenchmarkSuit(boolean printOut) { - return loop.runLightBenchmarkSuit(printOut); + val s = loop.runLightBenchmarkSuit(printOut); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + + return s; } @Override public String runFullBenchmarkSuit(boolean printOut) { - return loop.runFullBenchmarkSuit(printOut); + val s = loop.runFullBenchmarkSuit(printOut); + if (loop.lastErrorCode() != 0) + throw new RuntimeException(loop.lastErrorMessage()); + + return s; } } diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java index 8e71816f8..38c0cb8c4 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java @@ -467,6 +467,60 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper { // #endif //DEV_TESTS_TADPACK_H +// Parsed from execution/ErrorReference.h + +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@gmail.com +// + +// #ifndef DEV_TESTS_ERRORREFERENCE_H +// #define DEV_TESTS_ERRORREFERENCE_H + +// #include +// #include + @Namespace("sd") @NoOffset public static class ErrorReference extends Pointer { + static { Loader.load(); } + /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ + public ErrorReference(Pointer p) { super(p); } + /** Native array allocator. Access with {@link Pointer#position(long)}. */ + public ErrorReference(long size) { super((Pointer)null); allocateArray(size); } + private native void allocateArray(long size); + @Override public ErrorReference position(long position) { + return (ErrorReference)super.position(position); + } + + public ErrorReference() { super((Pointer)null); allocate(); } + private native void allocate(); + + public native int errorCode(); + public native @Cast("char*") String errorMessage(); + + public native void setErrorCode(int errorCode); + public native void setErrorMessage(@StdString BytePointer message); + public native void setErrorMessage(@StdString String message); + } + + + +// #endif //DEV_TESTS_ERRORREFERENCE_H + + // Parsed from Environment.h /******************************************************************************* @@ -688,6 +742,18 @@ bool verbose = false; // #include // #include +/** + * This function returns last error code stored, + * @return non-zero if something bad happened + */ +public native int lastErrorCode(); + +/** + * This function returns last error message, if last error code > 0 + * @return + */ +public native @Cast("char*") String lastErrorMessage(); + /** * * @param p @@ -1710,72 +1776,6 @@ public native void execScalarBoolTad(@Cast("Nd4jPointer*") PointerPointer extraP @Cast("Nd4jLong*") long[] tadShapeInfo, @Cast("Nd4jLong*") long[] tadOffsets, @Cast("Nd4jLong*") long[] tadShapeInfoZ, @Cast("Nd4jLong*") long[] tadOffsetsZ); - -/** -* Append an input array -* to the end of a flat array -* in a particular order -* @param offset the offset of the array to start at -* @param order the order -* @param result the result array -* @param resultShapeInfo the shape info for te array -* @param input the input for the array -* @param inputShapeInfo the shape information for that array -*/ -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") LongPointer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongPointer dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") LongPointer inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") LongPointer dinputShapeInfo); -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") LongBuffer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongBuffer dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") LongBuffer inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") LongBuffer dinputShapeInfo); -public native void flatten( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int offset, - char order, - Pointer result, @Cast("Nd4jLong*") long[] resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") long[] dresultShapeInfo, - Pointer input, @Cast("Nd4jLong*") long[] inputShapeInfo, - Pointer dinput, @Cast("Nd4jLong*") long[] dinputShapeInfo); - -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") LongPointer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongPointer dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") LongBuffer resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") LongBuffer dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); -public native void concat( - @Cast("Nd4jPointer*") PointerPointer extraPointers, - int dimension, - int numArrays, - @Cast("Nd4jPointer*") PointerPointer data, @Cast("Nd4jPointer*") PointerPointer inputShapeInfo, - @Cast("Nd4jPointer*") PointerPointer ddata, @Cast("Nd4jPointer*") PointerPointer dinputShapeInfo, - Pointer result, @Cast("Nd4jLong*") long[] resultShapeInfo, - Pointer dresult, @Cast("Nd4jLong*") long[] dresultShapeInfo, - @Cast("Nd4jPointer*") PointerPointer tadPointers, @Cast("Nd4jPointer*") PointerPointer offsetPointers); - - public native void specialConcat( @Cast("Nd4jPointer*") PointerPointer extraPointers, int dimension, @@ -22877,6 +22877,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // #include // #include +// #include @Namespace("nd4j") @NoOffset public static class ContextBuffers extends Pointer { static { Loader.load(); } /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */ @@ -22912,6 +22913,8 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); public native void setScalarBuffer(Pointer pointer); public native void setAllocationBuffer(Pointer pointer); + public native ErrorReference errorReference(); + public native void triggerOwnership(@Cast("bool") boolean isOwner); public native int deviceId(); @@ -22961,6 +22964,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); // #include // #include // #include +// #include @Namespace("nd4j") @NoOffset public static class LaunchContext extends Pointer { static { Loader.load(); } @@ -22985,9 +22989,12 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); public native int getDeviceID(); public native void setDeviceID(int deviceID); + public native ErrorReference errorReference(); public static native @Cast("bool") boolean isInitialized(); public static native void releaseBuffers(); + + public static native LaunchContext defaultContext(); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java index 58a2a7d02..554016686 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpuPresets.java @@ -38,6 +38,7 @@ import java.util.Scanner; "array/ConstantDataBuffer.h", "array/ConstantDescriptor.h", "array/TadPack.h", + "execution/ErrorReference.h", "Environment.h", "types/utf8string.h", "NativeOps.h", diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java index b302c8c0f..915d6f650 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/Nd4jTestsC.java @@ -5216,6 +5216,8 @@ public class Nd4jTestsC extends BaseNd4jTest { INDArray array = Nd4j.create(new double[] {10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); INDArray exp = Nd4j.create(new double[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + log.info("Array shapeInfo: {}", array.shapeInfoJava()); + INDArray rev = Nd4j.reverse(array); assertEquals(exp, rev); @@ -5226,7 +5228,7 @@ public class Nd4jTestsC extends BaseNd4jTest { INDArray array = Nd4j.create(new double[] {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); INDArray exp = Nd4j.create(new double[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); - INDArray rev = Nd4j.getExecutioner().exec(new Reverse(array, Nd4j.createUninitialized(array.length())))[0]; + INDArray rev = Nd4j.getExecutioner().exec(new Reverse(array, array.ulike()))[0]; assertEquals(exp, rev); } @@ -5236,7 +5238,7 @@ public class Nd4jTestsC extends BaseNd4jTest { INDArray array = Nd4j.create(new double[] {10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); INDArray exp = Nd4j.create(new double[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); - INDArray rev = Nd4j.getExecutioner().exec(new Reverse(array, Nd4j.createUninitialized(array.length())))[0]; + INDArray rev = Nd4j.getExecutioner().exec(new Reverse(array,array.ulike()))[0]; assertEquals(exp, rev); } @@ -5335,11 +5337,103 @@ public class Nd4jTestsC extends BaseNd4jTest { assertNotNull(lsd); //Fails here on CUDA, OK on native/cpu } + @Test + public void testReverseSmall_1() { + val array = Nd4j.linspace(1, 10, 10, DataType.INT); + val exp = array.dup(array.ordering()); + + Transforms.reverse(array, false); + Transforms.reverse(array, false); + + val jexp = exp.data().asInt(); + val jarr = array.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, array); + } + + @Test + public void testReverseSmall_2() { + val array = Nd4j.linspace(1, 10, 10, DataType.INT); + val exp = array.dup(array.ordering()); + + val reversed = Transforms.reverse(array, true); + val rereversed = Transforms.reverse(reversed, true); + + val jexp = exp.data().asInt(); + val jarr = rereversed.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, rereversed); + } + + @Test + public void testReverseSmall_3() { + val array = Nd4j.linspace(1, 11, 11, DataType.INT); + val exp = array.dup(array.ordering()); + + Transforms.reverse(array, false); + + log.info("Reversed shapeInfo: {}", array.shapeInfoJava()); + log.info("Reversed: {}", array); + + Transforms.reverse(array, false); + + val jexp = exp.data().asInt(); + val jarr = array.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, array); + } + + @Test + public void testReverseSmall_4() { + val array = Nd4j.linspace(1, 11, 11, DataType.INT); + val exp = array.dup(array.ordering()); + + val reversed = Transforms.reverse(array, true); + + log.info("Reversed: {}", reversed); + + val rereversed = Transforms.reverse(reversed, true); + + val jexp = exp.data().asInt(); + val jarr = rereversed.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, rereversed); + } + + @Test + public void testReverse_1() { + val array = Nd4j.linspace(1, 2017152, 2017152, DataType.INT); + val exp = array.dup(array.ordering()); + + Transforms.reverse(array, false); + Transforms.reverse(array, false); + + val jexp = exp.data().asInt(); + val jarr = array.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, array); + } + + @Test + public void testReverse_2() { + val array = Nd4j.linspace(1, 2017152, 2017152, DataType.INT); + val exp = array.dup(array.ordering()); + + val reversed = Transforms.reverse(array, true); + val rereversed = Transforms.reverse(reversed, true); + + val jexp = exp.data().asInt(); + val jarr = rereversed.data().asInt(); + assertArrayEquals(jexp, jarr); + assertEquals(exp, rereversed); + } + @Test public void testNativeSort3_1() { INDArray array = Nd4j.linspace(1, 2017152, 2017152, DataType.DOUBLE).reshape(1, -1); INDArray exp = array.dup(); Transforms.reverse(array, false); + log.info("Reverse: {}", array); long time1 = System.currentTimeMillis();