diff --git a/libnd4j/blas/NativeOps.h b/libnd4j/blas/NativeOps.h index 01a9d900a..d8b8b939d 100755 --- a/libnd4j/blas/NativeOps.h +++ b/libnd4j/blas/NativeOps.h @@ -77,441 +77,406 @@ bool verbose = false; #include #include -class ND4J_EXPORT NativeOps { +extern "C" { -public: - NativeOps(); +/** + * + * @param p + * @param len + */ +void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len); - /** - * - * @param p - * @param len - */ - void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len); +/** + * + * @param num + */ +void setElementThreshold(int num); - /** - * - * @param num - */ - void setElementThreshold(int num); +/** + * + * @param num + */ +void setTADThreshold(int num); - /** - * - * @param num - */ - void setTADThreshold(int num); +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param extraParams + */ +void execIndexReduceScalar(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo); - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param extraParams - */ - void execIndexReduceScalar(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param extraParams + * @param result + * @param resultShapeInfoBuffer + * @param dimension + * @param dimensionLength + */ +void execIndexReduce(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape); - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param extraParams - * @param result - * @param resultShapeInfoBuffer - * @param dimension - * @param dimensionLength - */ - void execIndexReduce(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape); - - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param y - * @param yShapeInfo - * @param result - * @param resultShapeInfo - * @param dimension - * @param dimensionLength - */ - void execBroadcast( - Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape); +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param y + * @param yShapeInfo + * @param result + * @param resultShapeInfo + * @param dimension + * @param dimensionLength + */ +void execBroadcast( + Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hY, Nd4jLong *hYShapeInfo, + void *dY, Nd4jLong *dYShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape); - void execBroadcastBool( - Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape); +void execBroadcastBool( + Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hY, Nd4jLong *hYShapeInfo, + void *dY, Nd4jLong *dYShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape); - /** - * - * @param opNum - * @param dx - * @param xShapeInfo - * @param y - * @param yShapeInfo - * @param result - * @param resultShapeInfo - * @param extraParams - * @param n - */ - void execPairwiseTransform( - Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); +/** + * + * @param opNum + * @param dx + * @param xShapeInfo + * @param y + * @param yShapeInfo + * @param result + * @param resultShapeInfo + * @param extraParams + * @param n + */ +void execPairwiseTransform( + Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hY, Nd4jLong *hYShapeInfo, + void *dY, Nd4jLong *dYShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *extraParams); - void execPairwiseTransformBool( - Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); +void execPairwiseTransformBool( + Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hY, Nd4jLong *hYShapeInfo, + void *dY, Nd4jLong *dYShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *extraParams); - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param extraParams - * @param result - * @param resultShapeInfo - */ - void execReduceFloat(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param extraParams + * @param result + * @param resultShapeInfo + */ +void execReduceFloat(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo); - void execReduceSame(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); - - void execReduceBool(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); - - - void execReduceLong(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); - - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param extraParams - * @param result - * @param resultShapeInfo - */ - void execReduceFloat(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape); - - - void execReduceSame(Nd4jPointer *extraPointers, +void execReduceSame(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape); + void *dZ, Nd4jLong *dZShapeInfo); - - void execReduceBool(Nd4jPointer *extraPointers, +void execReduceBool(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape); + void *dZ, Nd4jLong *dZShapeInfo); - void execReduceLong(Nd4jPointer *extraPointers, +void execReduceLong(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo); + +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param extraParams + * @param result + * @param resultShapeInfo + */ +void execReduceFloat2(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape); + + +void execReduceSame2(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape); + + +void execReduceBool2(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape); + + +void execReduceLong2(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape); + +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param extraParamsVals + * @param y + * @param yShapeInfo + * @param result + * @param resultShapeInfo + */ +void execReduce3(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParamsVals, + void *hY, Nd4jLong *hYShapeInfo, + void *dY, Nd4jLong *dYShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo); + +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param extraParamsVals + * @param y + * @param yShapeInfo + */ +void execReduce3Scalar(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParamsVals, + void *hY, Nd4jLong *hYShapeInfo, + void *dY, Nd4jLong *dYShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo); +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param extraParamsVals + * @param y + * @param yShapeInfo + * @param result + * @param resultShapeInfoBuffer + * @param dimension + * @param dimensionLength + */ +void execReduce3Tad(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParamsVals, + void *hY, Nd4jLong *hYShapeInfo, + void *dY, Nd4jLong *dYShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape, + Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); + + +void execReduce3All(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParamsVals, + void *hY, Nd4jLong *hYShapeInfo, + void *dY, Nd4jLong *dYShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape, + Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, + Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets); + +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param result + * @param resultShapeInfo + * @param scalar + * @param extraParams + * @param n + */ +void execScalar(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape); + void *hScalar, Nd4jLong *hSscalarShapeInfo, + void *dScalar, Nd4jLong *dSscalarShapeInfo, + void *extraParams); - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param extraParamsVals - * @param y - * @param yShapeInfo - * @param result - * @param resultShapeInfo - */ - void execReduce3(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); +void execScalarBool(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hScalar, Nd4jLong *hSscalarShapeInfo, + void *dScalar, Nd4jLong *dSscalarShapeInfo, + void *extraParams); - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param extraParamsVals - * @param y - * @param yShapeInfo - */ - void execReduce3Scalar(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo); - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param extraParamsVals - * @param y - * @param yShapeInfo - * @param result - * @param resultShapeInfoBuffer - * @param dimension - * @param dimensionLength - */ - void execReduce3(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape, - Nd4jLong *tadOnlyShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *yTadOnlyShapeInfo, Nd4jLong *yTadOffsets); - - - void execReduce3All(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParamsVals, - void *hY, Nd4jLong *hYShapeInfo, - void *dY, Nd4jLong *dYShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape, - Nd4jLong *xTadShapeInfo, Nd4jLong *xOffsets, - Nd4jLong *yTadShapeInfo, Nd4jLong *yOffsets); - - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param result - * @param resultShapeInfo - * @param scalar - * @param extraParams - * @param n - */ - void execScalar(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams); - - void execScalarBool(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalar, Nd4jLong *hSscalarShapeInfo, - void *dScalar, Nd4jLong *dSscalarShapeInfo, - void *extraParams); - - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param extraParams - */ - void execSummaryStatsScalar(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - bool biasCorrected); - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param extraParams - * @param result - * @param resultShapeInfo - */ - void execSummaryStats(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - bool biasCorrected); - /** - * - * @param opNum - * @param x - * @param xShapeInfo - * @param extraParams - * @param result - * @param resultShapeInfoBuffer - * @param dimension - * @param dimensionLength - */ - void execSummaryStats(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *extraParams, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape, - bool biasCorrected, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); - - /** - * - * @param opNum - * @param dx - * @param xShapeInfo - * @param result - * @param resultShapeInfo - * @param extraParams - * @param n - */ - void execTransformFloat(Nd4jPointer *extraPointers, +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param extraParams + */ +void execSummaryStatsScalar(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + bool biasCorrected); +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param extraParams + * @param result + * @param resultShapeInfo + */ +void execSummaryStats(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, + void *extraParams, void *hZ, Nd4jLong *hZShapeInfo, void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); + bool biasCorrected); +/** + * + * @param opNum + * @param x + * @param xShapeInfo + * @param extraParams + * @param result + * @param resultShapeInfoBuffer + * @param dimension + * @param dimensionLength + */ +void execSummaryStatsTad(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *extraParams, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape, + bool biasCorrected, + Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets); - void execTransformSame(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); - - void execTransformBool(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); - - void execTransformAny(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *extraParams); - - void execTransformStrict(Nd4jPointer *extraPointers, +/** + * + * @param opNum + * @param dx + * @param xShapeInfo + * @param result + * @param resultShapeInfo + * @param extraParams + * @param n + */ +void execTransformFloat(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -519,46 +484,78 @@ public: void *dZ, Nd4jLong *dZShapeInfo, void *extraParams); - /** - * - * @param extraPointers - * @param opNum - * @param x - * @param xShapeInfo - * @param z - * @param zShapeInfo - * @param scalars - * @param extraParams - * @param dimension - * @param dimensionLength - */ - void execScalar(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, - void *extraParams, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); +void execTransformSame(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *extraParams); - void execScalarBool(Nd4jPointer *extraPointers, - int opNum, - void *hX, Nd4jLong *hXShapeInfo, - void *dX, Nd4jLong *dXShapeInfo, - void *hZ, Nd4jLong *hZShapeInfo, - void *dZ, Nd4jLong *dZShapeInfo, - void *hScalars, Nd4jLong *hScalarShapeInfo, - void *dScalars, Nd4jLong *dScalarShapeInfo, - void *extraParams, - void *hDimension, Nd4jLong *hDimensionShape, - void *dDimension, Nd4jLong *dDimensionShape, - Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, - Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); +void execTransformBool(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *extraParams); + +void execTransformAny(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *extraParams); + +void execTransformStrict(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *extraParams); + +/** + * + * @param extraPointers + * @param opNum + * @param x + * @param xShapeInfo + * @param z + * @param zShapeInfo + * @param scalars + * @param extraParams + * @param dimension + * @param dimensionLength + */ +void execScalarTad(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hScalars, Nd4jLong *hScalarShapeInfo, + void *dScalars, Nd4jLong *dScalarShapeInfo, + void *extraParams, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape, + Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); + +void execScalarBoolTad(Nd4jPointer *extraPointers, + int opNum, + void *hX, Nd4jLong *hXShapeInfo, + void *dX, Nd4jLong *dXShapeInfo, + void *hZ, Nd4jLong *hZShapeInfo, + void *dZ, Nd4jLong *dZShapeInfo, + void *hScalars, Nd4jLong *hScalarShapeInfo, + void *dScalars, Nd4jLong *dScalarShapeInfo, + void *extraParams, + void *hDimension, Nd4jLong *hDimensionShape, + void *dDimension, Nd4jLong *dDimensionShape, + Nd4jLong *tadShapeInfo, Nd4jLong *tadOffsets, + Nd4jLong *tadShapeInfoZ, Nd4jLong *tadOffsetsZ); /** @@ -572,777 +569,783 @@ public: * @param input the input for the array * @param inputShapeInfo the shape information for that array */ - void flatten( - Nd4jPointer *extraPointers, - int offset, - char order, - void *result, Nd4jLong *resultShapeInfo, - void *dresult, Nd4jLong *dresultShapeInfo, - void *input, Nd4jLong *inputShapeInfo, - void *dinput, Nd4jLong *dinputShapeInfo); - - void concat( - Nd4jPointer *extraPointers, - int dimension, - int numArrays, - Nd4jPointer *data, Nd4jPointer *inputShapeInfo, - Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo, - void *result, Nd4jLong *resultShapeInfo, - void *dresult, Nd4jLong *dresultShapeInfo, - Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers); - - - void specialConcat ( - Nd4jPointer *extraPointers, - int dimension, - int numArrays, - Nd4jPointer *data, - Nd4jPointer *inputShapeInfo, - void *result, - Nd4jLong *resultShapeInfo, - Nd4jPointer *tadPointers, - Nd4jPointer *offsetPointers); - - /** - * This method implementation exists only for cuda. - * The other backends should have dummy method for JNI compatibility reasons. - */ - void initializeDevicesAndFunctions(); - - void initializeFunctions(Nd4jPointer *functions); - - /** - * This method acquires memory chunk of requested size on host side - * - * @param pointer pointer that'll be used for allocation - * @param memorySize memory size, in bytes - * @param flags optional parameter - */ - Nd4jPointer mallocHost(Nd4jLong memorySize, int flags); - - /** - * This method acquires memory chunk of requested size on specified device - * - * @param pointer pointer that'll be used for allocation - * @param memorySize memory size, in bytes - * @param ptrToDeviceId pointer to deviceId. For cuda that's just and int, for OpenCL that's pointer to device_id, etc - * @param flags optional parameter - */ - Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags); - - /** - * This method releases previously allocated host memory space - * - * @param pointer pointer that'll be freed - */ - int freeHost(Nd4jPointer pointer); - - /** - * This method releases previously allocated memory space on device - * - * @param pointer pointer that'll be freed - * @param ptrToDeviceId pointer to deviceId. - */ - int freeDevice(Nd4jPointer pointer, int deviceId); - - /** - * - * @return - */ - int ompGetMaxThreads(); - - /** - * - * @return - */ - int ompGetNumThreads(); - - /** - * - * @param threads - */ - void setOmpNumThreads(int threads); - - /** - * - * @param threads - */ - void setOmpMinThreads(int threads); - - - - - /** - * - * @return - */ - Nd4jPointer createContext(); - - /** - * - * @return - */ - Nd4jPointer createStream(); - - /** - * - * @return - */ - Nd4jPointer createEvent(); - - /** - * - * @param event - * @param stream - * @return - */ - int registerEvent(Nd4jPointer event, Nd4jPointer stream); - - /** - * - * @param event - * @return - */ - int destroyEvent(Nd4jPointer event); - - /** - * - * @param ptrToDeviceId - * @return - */ - int setDevice(int deviceId); - - /** - * - * @return - */ - int getDevice(); - - /** - * - * @param stream - * @return - */ - int streamSynchronize(Nd4jPointer stream); - - /** - * - * @param event - * @return - */ - int eventSynchronize(Nd4jPointer event); - - /** - * - * @param ptrToDeviceId - * @return - */ - Nd4jLong getDeviceFreeMemory(int deviceId); - - /** - * Returns amount of free memory for current device - * @return - */ - Nd4jLong getDeviceFreeMemory(); - - /** - * - * @param ptrToDeviceId - * @return - */ - Nd4jLong getDeviceTotalMemory(int deviceId); - - /** - * - * @param ptrToDeviceId - * @return - */ - int getDeviceMajor(int deviceId); - - /** - * This method returns amount of cached memory - * @param deviceId - * @return - */ - Nd4jLong getCachedMemory(int deviceId); - - /** - * - * @param ptrToDeviceId - * @return - */ - int getDeviceMinor(int deviceId); - - /** - * - * @param ptrToDeviceId - * @return - */ - const char * getDeviceName(int deviceId); - - /** - * - * @param dst - * @param src - * @param size - * @param flags - * @param reserved - * @return - */ - int memcpy(Nd4jPointer dst, - Nd4jPointer src, - Nd4jLong size, - int flags, - Nd4jPointer reserved); - - /** - * - * @param dst - * @param src - * @param size - * @param flags - * @param reserved - * @return - */ - int memcpyAsync(Nd4jPointer dst, - Nd4jPointer src, - Nd4jLong size, - int flags, - Nd4jPointer reserved); - - /** - * - * @param dst - * @param value - * @param size - * @param flags - * @param reserved - * @return - */ - int memset(Nd4jPointer dst, - int value, - Nd4jLong size, - int flags, - Nd4jPointer reserved); - - /** - * - * @param dst - * @param value - * @param size - * @param flags - * @param reserved - * @return - */ - int memsetAsync(Nd4jPointer dst, - int value, - Nd4jLong size, - int flags, - Nd4jPointer reserved); - - /** - * - * @param dst - * @param src - * @param size - * @param flags - * @param reserved - * @return - */ - int memcpyConstantAsync(Nd4jLong dst, - Nd4jPointer src, - Nd4jLong size, - int flags, - Nd4jPointer reserved); - - /** - * - * @return - */ - Nd4jPointer getConstantSpace(); - - /** - * - * @return - */ - int getAvailableDevices(); - - /** - * - * @param reallyEnable - */ - void enableDebugMode(bool reallyEnable); - - /** - * - * @param reallyEnable - */ - void enableVerboseMode(bool reallyEnable); - - /** - * - * @param gridSize - */ - void setGridLimit(int gridSize); - - /** - * - * @param xShapeInfo - * @param dimension - * @param dimensionLength - * @param targetBuffer - * @param offsetsBuffer - */ - nd4j::TadPack* tadOnlyShapeInfo(Nd4jLong *xShapeInfo, - int *dimension, - int dimensionLength); - - /* - * PullRow special op - */ - - /** - * - * @param extraPointers - * @param x - * @param xShapeInfo - * @param z - * @param zShapeInfo - * @param n - * @param indexes - * @param tadShapeInfo - * @param tadOffsets - * @param zTadShapeInfo - * @param zTadOffsets - */ - void pullRows(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, - Nd4jLong n, - Nd4jLong *indexes, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - Nd4jLong *zTadShapeInfo, - Nd4jLong *zTadOffsets); - - /** - * - * @param extras - * @param dx - * @param dz - * @param n - * @param length - * @param propagate - */ - void average(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jLong *xShapeInfo, - Nd4jPointer *dx, Nd4jLong *dxShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, - int n, - Nd4jLong length, - bool propagate); - - - void accumulate(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jLong *xShapeInfo, - Nd4jPointer *dx, Nd4jLong *dxShapeInfo, - void *z, Nd4jLong *zShapeInfo, - void *dz, Nd4jLong *dzShapeInfo, - int n, - Nd4jLong length); - - - /** - * P2P enabler - */ - /** - * - * @param enable - */ - void enableP2P(bool enable); - - /** - * - */ - void checkP2P(); - - /** - * - * @return - */ - bool isP2PAvailable(); - - /** - * Shuffle methods - */ - - /** - * - * @param extras - * @param dx - * @param xShapeInfo - * @param dz - * @param zShapeInfo - * @param N - * @param shuffleMap - * @param tadShapeInfo - * @param tadOffsets - */ - void shuffle(Nd4jPointer *extras, - Nd4jPointer *x, Nd4jPointer *xShapeInfo, - Nd4jPointer *dx, Nd4jPointer *dxShapeInfo, - Nd4jPointer *z, Nd4jPointer *zShapeInfo, - Nd4jPointer *dz, Nd4jPointer *dzShapeInfo, - int N, - int *shuffleMap, - Nd4jPointer *tadShapeInfo, - Nd4jPointer *tadOffsets); - - - /** - * Type Conversions - */ - - /** - * - * @param extras - * @param srcType - * @param x - * @param N - * @param dstType - * @param z - */ - void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer x, Nd4jLong N, int dstType, Nd4jPointer z); - - - /** - * - * @return - */ - bool isExperimentalEnabled(); - - /** - * Aggregate - */ - - /** - * - * @param extraPointers - * @param opNum - * @param arguments - * @param numArguments - * @param shapeArguments - * @param numShapeArguments - * @param indexArguments - * @param numIndexArguments - * @param intArrays - * @param numIntArrays - * @param realArguments - * @param numRealArguments - */ - void execAggregate(Nd4jPointer *extraPointers, - int opNum, - void **arguments, - int numArguments, - Nd4jLong **shapeArguments, - int numShapeArguments, - int *indexArguments, - int numIndexArguments, - int **intArrays, - int numIntArrays, - void *realArguments, - int numRealArguments, - nd4j::DataType dtype); - - - template - void _batchExecutor(Nd4jPointer *extraPointers, - int numAggregates, - int opNum, - int maxArgs, - int maxShapes, - int maxIntArrays, - int maxIntArraySize, - int maxIdx, - int maxReals, - void *ptrToArguments, - nd4j::DataType dtype); - - void execAggregateBatch(Nd4jPointer *extraPointers, - int numAggregates, - int opNum, - int maxArgs, - int maxShapes, - int maxIntArrays, - int maxIntArraySize, - int maxIdx, - int maxReals, - void *ptrToArguments, - nd4j::DataType dtype); - - /** - * Random operations - */ - - /** - * - * @param extraPointers - * @param opNum - * @param state - * @param z - * @param zShapeBuffer - * @param extraArguments - */ - void execRandom(Nd4jPointer *extraPointers, - int opNum, - Nd4jPointer state, - void *hZ, Nd4jLong *hZShapeBuffer, - void *dZ, Nd4jLong *dZShapeBuffer, - void *extraArguments); - - /** - * - * @param extraPointers - * @param opNum - * @param state - * @param x - * @param xShapeBuffer - * @param y - * @param yShapeBuffer - * @param z - * @param zShapeBuffer - * @param extraArguments - */ - void execRandom(Nd4jPointer *extraPointers, - int opNum, - Nd4jPointer state, - void *hX, Nd4jLong *hXShapeBuffer, - void *dX, Nd4jLong *dXShapeBuffer, - void *hY, Nd4jLong *hYShapeBuffer, - void *dY, Nd4jLong *dYShapeBuffer, - void *hZ, Nd4jLong *hZShapeBuffer, - void *dZ, Nd4jLong *dZShapeBuffer, - void *extraArguments); - - /** - * - * @param extraPointers - * @param opNum - * @param state - * @param x - * @param xShapeBuffer - * @param z - * @param zShapeBuffer - * @param extraArguments - */ - void execRandom(Nd4jPointer *extraPointers, - int opNum, - Nd4jPointer state, - void *hX, Nd4jLong *hXShapeBuffer, - void *dX, Nd4jLong *dXShapeBuffer, - void *hZ, Nd4jLong *hZShapeBuffer, - void *dZ, Nd4jLong *dZShapeBuffer, - void *extraArguments); - - - /** - * - * @param extraPointers - * @param seed - * @param bufferSize - * @param ptrToBuffer - * @return - */ - Nd4jPointer initRandom(Nd4jPointer *extraPointers, - long seed, - long bufferSize, - Nd4jPointer ptrToBuffer); - - /** - * - * @param extraPointers - * @param seed - * @param ptrRandom - */ - void refreshBuffer(Nd4jPointer *extraPointers, +void flatten( + Nd4jPointer *extraPointers, + int offset, + char order, + void *result, Nd4jLong *resultShapeInfo, + void *dresult, Nd4jLong *dresultShapeInfo, + void *input, Nd4jLong *inputShapeInfo, + void *dinput, Nd4jLong *dinputShapeInfo); + +void concat( + Nd4jPointer *extraPointers, + int dimension, + int numArrays, + Nd4jPointer *data, Nd4jPointer *inputShapeInfo, + Nd4jPointer *ddata, Nd4jPointer *dinputShapeInfo, + void *result, Nd4jLong *resultShapeInfo, + void *dresult, Nd4jLong *dresultShapeInfo, + Nd4jPointer *tadPointers, Nd4jPointer *offsetPointers); + + +void specialConcat ( + Nd4jPointer *extraPointers, + int dimension, + int numArrays, + Nd4jPointer *data, + Nd4jPointer *inputShapeInfo, + void *result, + Nd4jLong *resultShapeInfo, + Nd4jPointer *tadPointers, + Nd4jPointer *offsetPointers); + +/** + * This method implementation exists only for cuda. + * The other backends should have dummy method for JNI compatibility reasons. + */ +void initializeDevicesAndFunctions(); + +void initializeFunctions(Nd4jPointer *functions); + +/** + * This method acquires memory chunk of requested size on host side + * + * @param pointer pointer that'll be used for allocation + * @param memorySize memory size, in bytes + * @param flags optional parameter + */ +Nd4jPointer mallocHost(Nd4jLong memorySize, int flags); + +/** + * This method acquires memory chunk of requested size on specified device + * + * @param pointer pointer that'll be used for allocation + * @param memorySize memory size, in bytes + * @param ptrToDeviceId pointer to deviceId. For cuda that's just and int, for OpenCL that's pointer to device_id, etc + * @param flags optional parameter + */ +Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags); + +/** + * This method releases previously allocated host memory space + * + * @param pointer pointer that'll be freed + */ +int freeHost(Nd4jPointer pointer); + +/** + * This method releases previously allocated memory space on device + * + * @param pointer pointer that'll be freed + * @param ptrToDeviceId pointer to deviceId. + */ +int freeDevice(Nd4jPointer pointer, int deviceId); + +/** + * + * @return + */ +int ompGetMaxThreads(); + +/** + * + * @return + */ +int ompGetNumThreads(); + +/** + * + * @param threads + */ +void setOmpNumThreads(int threads); + +/** + * + * @param threads + */ +void setOmpMinThreads(int threads); + + + + +/** + * + * @return + */ +Nd4jPointer createContext(); + +/** + * + * @return + */ +Nd4jPointer createStream(); + +/** + * + * @return + */ +Nd4jPointer createEvent(); + +/** + * + * @param event + * @param stream + * @return + */ +int registerEvent(Nd4jPointer event, Nd4jPointer stream); + +/** + * + * @param event + * @return + */ +int destroyEvent(Nd4jPointer event); + +/** + * + * @param ptrToDeviceId + * @return + */ +int setDevice(int deviceId); + +/** + * + * @return + */ +int getDevice(); + +/** + * + * @param stream + * @return + */ +int streamSynchronize(Nd4jPointer stream); + +/** + * + * @param event + * @return + */ +int eventSynchronize(Nd4jPointer event); + +/** + * + * @param ptrToDeviceId + * @return + */ +Nd4jLong getDeviceFreeMemory(int deviceId); + +/** + * Returns amount of free memory for current device + * @return + */ +Nd4jLong getDeviceFreeMemoryDefault(); + +/** + * + * @param ptrToDeviceId + * @return + */ +Nd4jLong getDeviceTotalMemory(int deviceId); + +/** + * + * @param ptrToDeviceId + * @return + */ +int getDeviceMajor(int deviceId); + +/** + * This method returns amount of cached memory + * @param deviceId + * @return + */ +Nd4jLong getCachedMemory(int deviceId); + +/** + * + * @param ptrToDeviceId + * @return + */ +int getDeviceMinor(int deviceId); + +/** + * + * @param ptrToDeviceId + * @return + */ +const char * getDeviceName(int deviceId); + +/** + * + * @param dst + * @param src + * @param size + * @param flags + * @param reserved + * @return + */ +int memcpySync(Nd4jPointer dst, + Nd4jPointer src, + Nd4jLong size, + int flags, + Nd4jPointer reserved); + +/** + * + * @param dst + * @param src + * @param size + * @param flags + * @param reserved + * @return + */ +int memcpyAsync(Nd4jPointer dst, + Nd4jPointer src, + Nd4jLong size, + int flags, + Nd4jPointer reserved); + +/** + * + * @param dst + * @param value + * @param size + * @param flags + * @param reserved + * @return + */ +int memsetSync(Nd4jPointer dst, + int value, + Nd4jLong size, + int flags, + Nd4jPointer reserved); + +/** + * + * @param dst + * @param value + * @param size + * @param flags + * @param reserved + * @return + */ +int memsetAsync(Nd4jPointer dst, + int value, + Nd4jLong size, + int flags, + Nd4jPointer reserved); + +/** + * + * @param dst + * @param src + * @param size + * @param flags + * @param reserved + * @return + */ +int memcpyConstantAsync(Nd4jLong dst, + Nd4jPointer src, + Nd4jLong size, + int flags, + Nd4jPointer reserved); + +/** + * + * @return + */ +Nd4jPointer getConstantSpace(); + +/** + * + * @return + */ +int getAvailableDevices(); + +/** + * + * @param reallyEnable + */ +void enableDebugMode(bool reallyEnable); + +/** + * + * @param reallyEnable + */ +void enableVerboseMode(bool reallyEnable); + +/** + * + * @param gridSize + */ +void setGridLimit(int gridSize); + +/** + * + * @param xShapeInfo + * @param dimension + * @param dimensionLength + * @param targetBuffer + * @param offsetsBuffer + */ +nd4j::TadPack* tadOnlyShapeInfo(Nd4jLong *xShapeInfo, + int *dimension, + int dimensionLength); + +/* + * PullRow special op + */ + +/** + * + * @param extraPointers + * @param x + * @param xShapeInfo + * @param z + * @param zShapeInfo + * @param n + * @param indexes + * @param tadShapeInfo + * @param tadOffsets + * @param zTadShapeInfo + * @param zTadOffsets + */ +void pullRows(Nd4jPointer *extraPointers, + void *x, Nd4jLong *xShapeInfo, + void *dx, Nd4jLong *dxShapeInfo, + void *z, Nd4jLong *zShapeInfo, + void *dz, Nd4jLong *dzShapeInfo, + Nd4jLong n, + Nd4jLong *indexes, + Nd4jLong *tadShapeInfo, + Nd4jLong *tadOffsets, + Nd4jLong *zTadShapeInfo, + Nd4jLong *zTadOffsets); + +/** + * + * @param extras + * @param dx + * @param dz + * @param n + * @param length + * @param propagate + */ +void average(Nd4jPointer *extras, + Nd4jPointer *x, Nd4jLong *xShapeInfo, + Nd4jPointer *dx, Nd4jLong *dxShapeInfo, + void *z, Nd4jLong *zShapeInfo, + void *dz, Nd4jLong *dzShapeInfo, + int n, + Nd4jLong length, + bool propagate); + + +void accumulate(Nd4jPointer *extras, + Nd4jPointer *x, Nd4jLong *xShapeInfo, + Nd4jPointer *dx, Nd4jLong *dxShapeInfo, + void *z, Nd4jLong *zShapeInfo, + void *dz, Nd4jLong *dzShapeInfo, + int n, + Nd4jLong length); + + +/** + * P2P enabler + */ +/** + * + * @param enable + */ +void enableP2P(bool enable); + +/** + * + */ +void checkP2P(); + +/** + * + * @return + */ +bool isP2PAvailable(); + +/** + * Shuffle methods + */ + +/** + * + * @param extras + * @param dx + * @param xShapeInfo + * @param dz + * @param zShapeInfo + * @param N + * @param shuffleMap + * @param tadShapeInfo + * @param tadOffsets + */ +void shuffle(Nd4jPointer *extras, + Nd4jPointer *x, Nd4jPointer *xShapeInfo, + Nd4jPointer *dx, Nd4jPointer *dxShapeInfo, + Nd4jPointer *z, Nd4jPointer *zShapeInfo, + Nd4jPointer *dz, Nd4jPointer *dzShapeInfo, + int N, + int *shuffleMap, + Nd4jPointer *tadShapeInfo, + Nd4jPointer *tadOffsets); + + +/** + * Type Conversions + */ + +/** + * + * @param extras + * @param srcType + * @param x + * @param N + * @param dstType + * @param z + */ +void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer x, Nd4jLong N, int dstType, Nd4jPointer z); + + +/** + * + * @return + */ +bool isExperimentalEnabled(); + +/** + * Aggregate + */ + +/** + * + * @param extraPointers + * @param opNum + * @param arguments + * @param numArguments + * @param shapeArguments + * @param numShapeArguments + * @param indexArguments + * @param numIndexArguments + * @param intArrays + * @param numIntArrays + * @param realArguments + * @param numRealArguments + */ +void execAggregate(Nd4jPointer *extraPointers, + int opNum, + void **arguments, + int numArguments, + Nd4jLong **shapeArguments, + int numShapeArguments, + int *indexArguments, + int numIndexArguments, + int **intArrays, + int numIntArrays, + void *realArguments, + int numRealArguments, + nd4j::DataType dtype); + + +void batchExecutor(Nd4jPointer *extraPointers, + int numAggregates, + int opNum, + int maxArgs, + int maxShapes, + int maxIntArrays, + int maxIntArraySize, + int maxIdx, + int maxReals, + void *ptrToArguments, + nd4j::DataType dtype); + +void execAggregateBatch(Nd4jPointer *extraPointers, + int numAggregates, + int opNum, + int maxArgs, + int maxShapes, + int maxIntArrays, + int maxIntArraySize, + int maxIdx, + int maxReals, + void *ptrToArguments, + nd4j::DataType dtype); + +/** + * Random operations + */ + +/** + * + * @param extraPointers + * @param opNum + * @param state + * @param z + * @param zShapeBuffer + * @param extraArguments + */ +void execRandom(Nd4jPointer *extraPointers, + int opNum, + Nd4jPointer state, + void *hZ, Nd4jLong *hZShapeBuffer, + void *dZ, Nd4jLong *dZShapeBuffer, + void *extraArguments); + +/** + * + * @param extraPointers + * @param opNum + * @param state + * @param x + * @param xShapeBuffer + * @param y + * @param yShapeBuffer + * @param z + * @param zShapeBuffer + * @param extraArguments + */ +void execRandom3(Nd4jPointer *extraPointers, + int opNum, + Nd4jPointer state, + void *hX, Nd4jLong *hXShapeBuffer, + void *dX, Nd4jLong *dXShapeBuffer, + void *hY, Nd4jLong *hYShapeBuffer, + void *dY, Nd4jLong *dYShapeBuffer, + void *hZ, Nd4jLong *hZShapeBuffer, + void *dZ, Nd4jLong *dZShapeBuffer, + void *extraArguments); + +/** + * + * @param extraPointers + * @param opNum + * @param state + * @param x + * @param xShapeBuffer + * @param z + * @param zShapeBuffer + * @param extraArguments + */ +void execRandom2(Nd4jPointer *extraPointers, + int opNum, + Nd4jPointer state, + void *hX, Nd4jLong *hXShapeBuffer, + void *dX, Nd4jLong *dXShapeBuffer, + void *hZ, Nd4jLong *hZShapeBuffer, + void *dZ, Nd4jLong *dZShapeBuffer, + void *extraArguments); + + +/** + * + * @param extraPointers + * @param seed + * @param bufferSize + * @param ptrToBuffer + * @return + */ +Nd4jPointer initRandom(Nd4jPointer *extraPointers, long seed, - Nd4jPointer ptrRandom); + long bufferSize, + Nd4jPointer ptrToBuffer); - /** - * - * @param extraPointers - * @param seed - * @param ptrRandom - */ - void reSeedBuffer(Nd4jPointer *extraPointers, - long seed, - Nd4jPointer ptrRandom); +/** + * + * @param extraPointers + * @param seed + * @param ptrRandom + */ +void refreshBuffer(Nd4jPointer *extraPointers, + long seed, + Nd4jPointer ptrRandom); - /** - * - * @param ptrRandom - */ - void destroyRandom(Nd4jPointer ptrRandom); +/** + * + * @param extraPointers + * @param seed + * @param ptrRandom + */ +void reSeedBuffer(Nd4jPointer *extraPointers, + long seed, + Nd4jPointer ptrRandom); - /** - * Grid operations - */ +/** + * + * @param ptrRandom + */ +void destroyRandom(Nd4jPointer ptrRandom); + +/** + * Grid operations + */ - /** - * - * @param extras - * @param opTypeA - * @param opNumA - * @param opTypeB - * @param opNumB - * @param N - * @param dx - * @param xShapeInfo - * @param dy - * @param yShapeInfo - * @param dz - * @param zShapeInfo - * @param extraA - * @param extraB - * @param scalarA - * @param scalarB - */ - /* - void execMetaPredicateShape(Nd4jPointer *extras, - const int opTypeA, - const int opNumA, - const int opTypeB, - const int opNumB, - Nd4jLong N, - void *hX, Nd4jLong *hXShapeBuffer, - void *dX, Nd4jLong *dXShapeBuffer, - void *hY, Nd4jLong *hYShapeBuffer, - void *dY, Nd4jLong *dYShapeBuffer, - void *hZ, Nd4jLong *hZShapeBuffer, - void *dZ, Nd4jLong *dZShapeBuffer, - void *extraA, - void *extraB, - double scalarA, - double scalarB); +/** + * + * @param extras + * @param opTypeA + * @param opNumA + * @param opTypeB + * @param opNumB + * @param N + * @param dx + * @param xShapeInfo + * @param dy + * @param yShapeInfo + * @param dz + * @param zShapeInfo + * @param extraA + * @param extraB + * @param scalarA + * @param scalarB + */ + /* +void execMetaPredicateShape(Nd4jPointer *extras, + const int opTypeA, + const int opNumA, + const int opTypeB, + const int opNumB, + Nd4jLong N, + void *hX, Nd4jLong *hXShapeBuffer, + void *dX, Nd4jLong *dXShapeBuffer, + void *hY, Nd4jLong *hYShapeBuffer, + void *dY, Nd4jLong *dYShapeBuffer, + void *hZ, Nd4jLong *hZShapeBuffer, + void *dZ, Nd4jLong *dZShapeBuffer, + void *extraA, + void *extraB, + double scalarA, + double scalarB); */ -/** - * - * @param data - * @param shapeBuffer - * @param wordSize - * @param headerSize - * @return - */ - template - static Nd4jPointer _numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong *headerSize) { - Nd4jLong *shapeBufferCast = reinterpret_cast(shapeBuffer); - int rank = shape::rank(shapeBufferCast); - Nd4jLong *shape = shape::shapeOf(shapeBufferCast); - unsigned int *npShape = new unsigned int[rank]; - for(int i = 0; i < rank; i++) { - npShape[i] = shape[i]; - } - - Nd4jLong length = shape::prodLong(shape,rank); - auto npHeader = cnpy::createNpyHeader(data,npShape,rank,wordSize); - char *ret = new char[npHeader.size() + 1]; - int count = 0; - for(int i = 0; i < npHeader.size(); i++) { - ret[count] = npHeader[i]; - count++; - } - - ret[count] = '\0'; - count++; - - *headerSize = count; - return reinterpret_cast(ret); - } - - Nd4jPointer numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong *headerSize) { - auto shapeBufferCast = reinterpret_cast(shapeBuffer); - auto type = nd4j::ArrayOptions::dataType(shapeBufferCast); - BUILD_SINGLE_SELECTOR(type, return _numpyHeaderForNd4j, (data, shapeBuffer, wordSize, headerSize), LIBND4J_TYPES); - } +} /** - * Load numpy from a header - * based on the cnpy parse from header method. - * @param data the header data to parse - * @return a pointer to a numpy cnpy:NpyArray struct - */ - Nd4jPointer loadNpyFromHeader(Nd4jPointer data) { - char *header = reinterpret_cast(data); +* +* @param data +* @param shapeBuffer +* @param wordSize +* @param headerSize +* @return +*/ - cnpy::NpyArray arr = cnpy::loadNpyFromHeader(header); - cnpy::NpyArray *ret = new cnpy::NpyArray(); - int totalLengthOfShape = 1; - for(int i = 0; i < arr.shape.size(); i++) { - totalLengthOfShape *= arr.shape[i]; - } - - ret->data = arr.data; - ret->wordSize = arr.wordSize; - ret->shape = arr.shape; - return reinterpret_cast(ret); +template +static Nd4jPointer _numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong *headerSize) { + Nd4jLong *shapeBufferCast = reinterpret_cast(shapeBuffer); + int rank = shape::rank(shapeBufferCast); + Nd4jLong *shape = shape::shapeOf(shapeBufferCast); + unsigned int *npShape = new unsigned int[rank]; + for(int i = 0; i < rank; i++) { + npShape[i] = shape[i]; } + Nd4jLong length = shape::prodLong(shape,rank); + auto npHeader = cnpy::createNpyHeader(data,npShape,rank,wordSize); + char *ret = new char[npHeader.size() + 1]; + int count = 0; + for(int i = 0; i < npHeader.size(); i++) { + ret[count] = npHeader[i]; + count++; + } + + ret[count] = '\0'; + count++; + + *headerSize = count; + return reinterpret_cast(ret); +} + +extern "C" { + +static Nd4jPointer numpyHeaderForNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize,Nd4jLong *headerSize) { + auto shapeBufferCast = reinterpret_cast(shapeBuffer); + auto type = nd4j::ArrayOptions::dataType(shapeBufferCast); + BUILD_SINGLE_SELECTOR(type, return _numpyHeaderForNd4j, (data, shapeBuffer, wordSize, headerSize), LIBND4J_TYPES); +} + +/** +* Load numpy from a header +* based on the cnpy parse from header method. +* @param data the header data to parse +* @return a pointer to a numpy cnpy:NpyArray struct +*/ +static Nd4jPointer loadNpyFromHeader(Nd4jPointer data) { + char *header = reinterpret_cast(data); + + cnpy::NpyArray arr = cnpy::loadNpyFromHeader(header); + cnpy::NpyArray *ret = new cnpy::NpyArray(); + int totalLengthOfShape = 1; + for(int i = 0; i < arr.shape.size(); i++) { + totalLengthOfShape *= arr.shape[i]; + } + + ret->data = arr.data; + ret->wordSize = arr.wordSize; + ret->shape = arr.shape; + return reinterpret_cast(ret); +} + +} + +/** +* Create a numpy array from an nd4j +* array +* @param data a pointer to the data +* @param shapeBuffer the shapebuffer for the nd4j array +* @param wordSize the word size (4 for float, 8 for doubles) +* @return a pointer to a numpy array +*/ + +template +static Nd4jPointer _numpyFromNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize) { + Nd4jLong *shapeBufferCast = reinterpret_cast(shapeBuffer); + int rank = shape::rank(shapeBufferCast); + Nd4jLong *shape = shape::shapeOf(shapeBufferCast); + unsigned int *npShape = new unsigned int[rank]; + for(int i = 0; i < rank; i++) { + npShape[i] = shape[i]; + } + + Nd4jLong length = shape::prodLong(shape,rank); + auto npHeader = cnpy::createNpyHeader(data,npShape,rank,wordSize); + char *dataChar = reinterpret_cast(data); + char *npHeaderData = npHeader.data(); + char *ret = new char[(wordSize * length) + npHeader.size()]; + char *cursorStart = ret; + std::memcpy(reinterpret_cast(ret), reinterpret_cast(npHeaderData), npHeader.size() * sizeof(Nd4jLong)); + //move to next + cursorStart += npHeader.size(); + std::memcpy(reinterpret_cast(ret), reinterpret_cast(dataChar), length * wordSize * sizeof(Nd4jLong)); + Nd4jPointer rettPointer = reinterpret_cast(ret); + return rettPointer; +} + +extern "C" { + +static Nd4jPointer numpyFromNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize) { + auto shapeBufferCast = reinterpret_cast(shapeBuffer); + auto type = nd4j::ArrayOptions::dataType(shapeBufferCast); + BUILD_SINGLE_SELECTOR(type, return _numpyFromNd4j, (data, shapeBuffer, wordSize), LIBND4J_TYPES); +} + /** - * Create a numpy array from an nd4j - * array - * @param data a pointer to the data - * @param shapeBuffer the shapebuffer for the nd4j array - * @param wordSize the word size (4 for float, 8 for doubles) - * @return a pointer to a numpy array - */ - - template - static Nd4jPointer _numpyFromNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize) { - Nd4jLong *shapeBufferCast = reinterpret_cast(shapeBuffer); - int rank = shape::rank(shapeBufferCast); - Nd4jLong *shape = shape::shapeOf(shapeBufferCast); - unsigned int *npShape = new unsigned int[rank]; - for(int i = 0; i < rank; i++) { - npShape[i] = shape[i]; - } - - Nd4jLong length = shape::prodLong(shape,rank); - auto npHeader = cnpy::createNpyHeader(data,npShape,rank,wordSize); - char *dataChar = reinterpret_cast(data); - char *npHeaderData = npHeader.data(); - char *ret = new char[(wordSize * length) + npHeader.size()]; - char *cursorStart = ret; - std::memcpy(reinterpret_cast(ret), reinterpret_cast(npHeaderData), npHeader.size() * sizeof(Nd4jLong)); - //move to next - cursorStart += npHeader.size(); - std::memcpy(reinterpret_cast(ret), reinterpret_cast(dataChar), length * wordSize * sizeof(Nd4jLong)); - Nd4jPointer rettPointer = reinterpret_cast(ret); - return rettPointer; - } - - - Nd4jPointer numpyFromNd4j(Nd4jPointer data,Nd4jPointer shapeBuffer,Nd4jLong wordSize) { - auto shapeBufferCast = reinterpret_cast(shapeBuffer); - auto type = nd4j::ArrayOptions::dataType(shapeBufferCast); - BUILD_SINGLE_SELECTOR(type, return _numpyFromNd4j, (data, shapeBuffer, wordSize), LIBND4J_TYPES); - } - - -/** - * - * @param npyArray - * @return - */ - Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray); +* +* @param npyArray +* @return +*/ +Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray); /** @@ -1352,168 +1355,168 @@ public: * @param npyArray * @return */ - Nd4jPointer shapeBufferForNumpyHeader(Nd4jPointer npyArray) { - cnpy::NpyArray arr = cnpy::loadNpyFromHeader(reinterpret_cast(npyArray)); - auto shape = new unsigned int[arr.shape.size()]; - for(unsigned int i = 0; i < arr.shape.size(); i++) { - shape[i] = arr.shape[i]; +static Nd4jPointer shapeBufferForNumpyHeader(Nd4jPointer npyArray) { + cnpy::NpyArray arr = cnpy::loadNpyFromHeader(reinterpret_cast(npyArray)); + auto shape = new unsigned int[arr.shape.size()]; + for(unsigned int i = 0; i < arr.shape.size(); i++) { + shape[i] = arr.shape[i]; + } + + auto shapeBuffer = shape::shapeBufferOfNpy(arr.shape.size(), shape, arr.fortranOrder); + delete[] shape; + return reinterpret_cast(shapeBuffer); +} + + + +/** +* +* @param npyArray +* @return +*/ +static Nd4jPointer dataPointForNumpyHeader(Nd4jPointer npyArray) { + cnpy::NpyArray arr = cnpy::loadNpyFromHeader(reinterpret_cast(npyArray)); + unsigned char *dataToPrint = reinterpret_cast(arr.data); + return dataToPrint; +} + +/** +* +* @param npyArray +* @return +*/ +static Nd4jPointer dataPointForNumpyStruct(Nd4jPointer npyArrayStruct) { + cnpy::NpyArray *arrPointer = reinterpret_cast(npyArrayStruct); + unsigned char *dataToPrint = reinterpret_cast(arrPointer->data); + return reinterpret_cast(dataToPrint); +} + +/** +* +* @param npyArray +* @param fromFile +* @return +*/ +static Nd4jPointer dataPointForNumpy(Nd4jPointer npyArray) { + char *npyArrayBuffer = reinterpret_cast< char *>(npyArray); + cnpy::NpyArray arr = cnpy::loadNpyFromPointer(npyArrayBuffer); + return dataPointForNumpyStruct(reinterpret_cast(&arr)); +} + +/** +* Load a numpy array from a file +* and return it as an Nd4jPointer +* @param path +* @return +*/ +static Nd4jPointer numpyFromFile(std::string path) { + char *numpyBuffer = cnpy::loadFile(path.data()); + return reinterpret_cast(numpyBuffer); +} + + +////// NPZ ////// + +static void* mapFromNpzFile(std::string path){ + cnpy::npz_t* mapPtr = new cnpy::npz_t(); + cnpy::npz_t map = cnpy::npzLoad(path); + mapPtr->insert(map.begin(), map.end()); + return reinterpret_cast(mapPtr); +} + + +static int getNumNpyArraysInMap(void *map){ + cnpy::npz_t* arrays = reinterpret_cast(map); + int n = arrays->size(); + return n; +} + +static const char* getNpyArrayNameFromMap(void *map, int index){ + cnpy::npz_t* arrays = reinterpret_cast(map); + cnpy::npz_t::iterator it = arrays->begin(); + cnpy::npz_t::iterator end = arrays->end(); + int cnt = 0; + for(; it != end; ++it, ++cnt){ + if (cnt == index){ + // FIXME: @fariz, this is a leak! + return const_cast(strdup(it->first.c_str())); } - - auto shapeBuffer = shape::shapeBufferOfNpy(arr.shape.size(), shape, arr.fortranOrder); - delete[] shape; - return reinterpret_cast(shapeBuffer); } + throw std::runtime_error("No array at index."); +} - - -/** - * - * @param npyArray - * @return - */ - Nd4jPointer dataPointForNumpyHeader(Nd4jPointer npyArray) { - cnpy::NpyArray arr = cnpy::loadNpyFromHeader(reinterpret_cast(npyArray)); - unsigned char *dataToPrint = reinterpret_cast(arr.data); - return dataToPrint; - } - -/** - * - * @param npyArray - * @return - */ - Nd4jPointer dataPointForNumpyStruct(Nd4jPointer npyArrayStruct) { - cnpy::NpyArray *arrPointer = reinterpret_cast(npyArrayStruct); - unsigned char *dataToPrint = reinterpret_cast(arrPointer->data); - return reinterpret_cast(dataToPrint); - } - -/** - * - * @param npyArray - * @param fromFile - * @return - */ - Nd4jPointer dataPointForNumpy(Nd4jPointer npyArray) { - char *npyArrayBuffer = reinterpret_cast< char *>(npyArray); - cnpy::NpyArray arr = cnpy::loadNpyFromPointer(npyArrayBuffer); - return dataPointForNumpyStruct(reinterpret_cast(&arr)); - } - -/** - * Load a numpy array from a file - * and return it as an Nd4jPointer - * @param path - * @return - */ - Nd4jPointer numpyFromFile(std::string path) { - char *numpyBuffer = cnpy::loadFile(path.data()); - return reinterpret_cast(numpyBuffer); - } - - - ////// NPZ ////// - - void* mapFromNpzFile(std::string path){ - cnpy::npz_t* mapPtr = new cnpy::npz_t(); - cnpy::npz_t map = cnpy::npzLoad(path); - mapPtr->insert(map.begin(), map.end()); - return reinterpret_cast(mapPtr); - } - - - int getNumNpyArraysInMap(void *map){ - cnpy::npz_t* arrays = reinterpret_cast(map); - int n = arrays->size(); - return n; - } - - const char* getNpyArrayNameFromMap(void *map, int index){ - cnpy::npz_t* arrays = reinterpret_cast(map); - cnpy::npz_t::iterator it = arrays->begin(); - cnpy::npz_t::iterator end = arrays->end(); - int cnt = 0; - for(; it != end; ++it, ++cnt){ - if (cnt == index){ - // FIXME: @fariz, this is a leak! - return const_cast(strdup(it->first.c_str())); - } +static void* getNpyArrayFromMap(void *map, int index){ + cnpy::npz_t* arrays = reinterpret_cast(map); + cnpy::npz_t::iterator it = arrays->begin(); + cnpy::npz_t::iterator end = arrays->end(); + cnpy::NpyArray *arr = new cnpy::NpyArray(); + int cnt = 0; + for(; it != end; ++it, ++cnt){ + if (cnt == index){ + *arr = it->second; + return arr; } - throw std::runtime_error("No array at index."); } + throw std::runtime_error("No array at index."); +} - void* getNpyArrayFromMap(void *map, int index){ - cnpy::npz_t* arrays = reinterpret_cast(map); - cnpy::npz_t::iterator it = arrays->begin(); - cnpy::npz_t::iterator end = arrays->end(); - cnpy::NpyArray *arr = new cnpy::NpyArray(); - int cnt = 0; - for(; it != end; ++it, ++cnt){ - if (cnt == index){ - *arr = it->second; - return arr; - } - } - throw std::runtime_error("No array at index."); +int dataTypeFromNpyHeader(void *header); + +static void* getNpyArrayData(void *npArray){ + cnpy::NpyArray* npyArray2 = reinterpret_cast(npArray); + return reinterpret_cast(npyArray2->data); +} + +static int getNpyArrayRank(void *npArray){ + cnpy::NpyArray* arr = reinterpret_cast(npArray); + int rank = arr->shape.size(); + return rank; +} + +static Nd4jLong* getNpyArrayShape(void *npArray){ + cnpy::NpyArray* arr = reinterpret_cast(npArray); + int ndim = arr->shape.size(); + Nd4jLong* shape = new Nd4jLong[ndim]; + for (int i=0; ishape.at(i); } + return shape; +} - int dataTypeFromNpyHeader(void *header); +static char getNpyArrayOrder(void *npArray){ + cnpy::NpyArray* arr = reinterpret_cast(npArray); + return (arr->fortranOrder)?'f':'c'; +} - void* getNpyArrayData(void *npArray){ - cnpy::NpyArray* npyArray2 = reinterpret_cast(npArray); - return reinterpret_cast(npyArray2->data); - } +static int getNpyArrayElemSize(void *npArray){ + cnpy::NpyArray* arr = reinterpret_cast(npArray); + return arr->wordSize; +} - int getNpyArrayRank(void *npArray){ - cnpy::NpyArray* arr = reinterpret_cast(npArray); - int rank = arr->shape.size(); - return rank; - } +static void deleteNPArrayStruct(void *npArray){ + cnpy::NpyArray* arr = reinterpret_cast(npArray); + delete arr; +} - Nd4jLong* getNpyArrayShape(void *npArray){ - cnpy::NpyArray* arr = reinterpret_cast(npArray); - int ndim = arr->shape.size(); - Nd4jLong* shape = new Nd4jLong[ndim]; - for (int i=0; ishape.at(i); - } - return shape; - } - - char getNpyArrayOrder(void *npArray){ - cnpy::NpyArray* arr = reinterpret_cast(npArray); - return (arr->fortranOrder)?'f':'c'; - } - - int getNpyArrayElemSize(void *npArray){ - cnpy::NpyArray* arr = reinterpret_cast(npArray); - return arr->wordSize; - } - - void deleteNPArrayStruct(void *npArray){ - cnpy::NpyArray* arr = reinterpret_cast(npArray); - delete arr; - } - - void deleteNPArrayMap(void *map){ - cnpy::npz_t* arrays = reinterpret_cast(map); - delete arrays; - } - ////// +static void deleteNPArrayMap(void *map){ + cnpy::npz_t* arrays = reinterpret_cast(map); + delete arrays; +} +////// /** - * Get the element size for a numpy array - * @param npyArray the numpy array's address - * to get the length for - * @return - */ - int elementSizeForNpyArray(Nd4jPointer npyArray) { - cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); - cnpy::NpyArray *arrPointer = &arr; - int size = arrPointer->wordSize; - // arrPointer->destruct(); - return size; - } +* Get the element size for a numpy array +* @param npyArray the numpy array's address +* to get the length for +* @return +*/ +static int elementSizeForNpyArray(Nd4jPointer npyArray) { + cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); + cnpy::NpyArray *arrPointer = &arr; + int size = arrPointer->wordSize; + // arrPointer->destruct(); + return size; +} /** @@ -1522,193 +1525,190 @@ public: * to get the length for * @return */ - int elementSizeForNpyArrayHeader(Nd4jPointer npyArray) { - cnpy::NpyArray arr = cnpy::loadNpyFromHeader(reinterpret_cast(npyArray)); - cnpy::NpyArray *arrPointer = &arr; - int size = arrPointer->wordSize; - return size; - } +static int elementSizeForNpyArrayHeader(Nd4jPointer npyArray) { + cnpy::NpyArray arr = cnpy::loadNpyFromHeader(reinterpret_cast(npyArray)); + cnpy::NpyArray *arrPointer = &arr; + int size = arrPointer->wordSize; + return size; +} - void releaseNumpy(Nd4jPointer npyArray) { - free(reinterpret_cast(npyArray)); - } +static void releaseNumpy(Nd4jPointer npyArray) { + free(reinterpret_cast(npyArray)); +} - /** - * Return the length of a shape buffer - * based on the pointer - * @param buffer the buffer pointer to check - * @return - */ - int lengthForShapeBufferPointer(Nd4jPointer buffer); +/** + * Return the length of a shape buffer + * based on the pointer + * @param buffer the buffer pointer to check + * @return + */ +int lengthForShapeBufferPointer(Nd4jPointer buffer); - /** - * The pointer to get the address for - * - * @param address the address to get the pointer - * @return the pointer for the given address - */ + /** +* The pointer to get the address for +* +* @param address the address to get the pointer +* @return the pointer for the given address +*/ - Nd4jPointer pointerForAddress(Nd4jLong address); +Nd4jPointer pointerForAddress(Nd4jLong address); - /** - * This method takes single N-dimensional tensor, and copies its TADs to target arrays - * - * @param x - * @param xShapeInfo - * @param targets - * @param zShapeInfo - * @return - */ - void tear(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - Nd4jPointer *targets, Nd4jLong *zShapeInfo, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets); +/** + * This method takes single N-dimensional tensor, and copies its TADs to target arrays + * + * @param x + * @param xShapeInfo + * @param targets + * @param zShapeInfo + * @return + */ +void tear(Nd4jPointer *extraPointers, + void *x, Nd4jLong *xShapeInfo, + void *dx, Nd4jLong *dxShapeInfo, + Nd4jPointer *targets, Nd4jLong *zShapeInfo, + Nd4jLong *tadShapeInfo, + Nd4jLong *tadOffsets); - Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold); - void decodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo); +Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold); +void decodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo); - void encodeThresholdP1(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold); - void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *dx, Nd4jLong N, int *dz); - void encodeThresholdP3(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, int *offsets, Nd4jLong N, int *dz); +void encodeThresholdP1(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, Nd4jLong N, int *dz, float threshold); +void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *dx, Nd4jLong N, int *dz); +void encodeThresholdP3(Nd4jPointer *extraPointers, void *dx, Nd4jLong *xShapeInfo, int *offsets, Nd4jLong N, int *dz); - void decodeThreshold(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo); +void decodeThreshold(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo); - void sort(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - bool descending); +void sort(Nd4jPointer *extraPointers, + void *x, Nd4jLong *xShapeInfo, + void *dx, Nd4jLong *dxShapeInfo, + bool descending); - void sortByKey(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, - bool descending); +void sortByKey(Nd4jPointer *extraPointers, + void *x, Nd4jLong *xShapeInfo, + void *dx, Nd4jLong *dxShapeInfo, + void *y, Nd4jLong *yShapeInfo, + void *dy, Nd4jLong *dyShapeInfo, + bool descending); - void sortByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, - bool descending); - - void sortTad(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - int *dimension, - int dimensionLength, - Nd4jLong *tadShapeInfo, - Nd4jLong *tadOffsets, - bool descending); - - void sortTadByKey(Nd4jPointer *extraPointers, +void sortByValue(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dx, Nd4jLong *dxShapeInfo, void *y, Nd4jLong *yShapeInfo, void *dy, Nd4jLong *dyShapeInfo, - int *dimension, - int dimensionLength, bool descending); - void sortTadByValue(Nd4jPointer *extraPointers, - void *x, Nd4jLong *xShapeInfo, - void *dx, Nd4jLong *dxShapeInfo, - void *y, Nd4jLong *yShapeInfo, - void *dy, Nd4jLong *dyShapeInfo, - int *dimension, - int dimensionLength, - bool descending); +void sortTad(Nd4jPointer *extraPointers, + void *x, Nd4jLong *xShapeInfo, + void *dx, Nd4jLong *dxShapeInfo, + int *dimension, + int dimensionLength, + Nd4jLong *tadShapeInfo, + Nd4jLong *tadOffsets, + bool descending); + +void sortTadByKey(Nd4jPointer *extraPointers, + void *x, Nd4jLong *xShapeInfo, + void *dx, Nd4jLong *dxShapeInfo, + void *y, Nd4jLong *yShapeInfo, + void *dy, Nd4jLong *dyShapeInfo, + int *dimension, + int dimensionLength, + bool descending); + +void sortTadByValue(Nd4jPointer *extraPointers, + void *x, Nd4jLong *xShapeInfo, + void *dx, Nd4jLong *dxShapeInfo, + void *y, Nd4jLong *yShapeInfo, + void *dy, Nd4jLong *dyShapeInfo, + int *dimension, + int dimensionLength, + bool descending); - // special sort impl for sorting out COO indices and values - void sortCooIndices(Nd4jPointer *extraPointers, Nd4jLong *indices, void *values, Nd4jLong length, int rank); +// special sort impl for sorting out COO indices and values +void sortCooIndices(Nd4jPointer *extraPointers, Nd4jLong *indices, void *values, Nd4jLong length, int rank); - Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length); +Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length); - void munmapFile(Nd4jPointer *extraPointers, Nd4jLong* ptrMap, Nd4jLong length); +void munmapFile(Nd4jPointer *extraPointers, Nd4jLong* ptrMap, Nd4jLong length); - // flatbuffers execution - nd4j::graph::ResultWrapper* executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer); +// flatbuffers execution +nd4j::graph::ResultWrapper* executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer); - const char* getAllCustomOps(); +const char* getAllCustomOps(); - const char* getAllOperations(); +const char* getAllOperations(); - // customOp executioner - int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace); - int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext); +// customOp executioner +int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace); +int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext); - nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs); - nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs); +nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs); +nd4j::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs); - void deleteShapeList(Nd4jPointer shapeList); +void deleteShapeList(Nd4jPointer shapeList); - int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer); +int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer); - nd4j::graph::VariablesSet *executeStoredGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs); +nd4j::graph::VariablesSet *executeStoredGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs); - int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId); +int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId); - void deleteCharArray(Nd4jPointer pointer); - void deleteIntArray(Nd4jPointer pointer); - void deleteLongArray(Nd4jPointer pointer); - void deletePointerArray(Nd4jPointer pointer); +void deleteCharArray(Nd4jPointer pointer); +void deleteIntArray(Nd4jPointer pointer); +void deleteLongArray(Nd4jPointer pointer); +void deletePointerArray(Nd4jPointer pointer); - void deleteVariablesSet(Nd4jPointer pointer); +void deleteVariablesSet(Nd4jPointer pointer); - // GraphState creation - Nd4jPointer getGraphState(Nd4jLong id); +// GraphState creation +Nd4jPointer getGraphState(Nd4jLong id); - void deleteGraphState(Nd4jPointer state); +void deleteGraphState(Nd4jPointer state); - void deleteResultWrapper(Nd4jPointer ptr); +void deleteResultWrapper(Nd4jPointer ptr); - int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer x, Nd4jLong *xShapeInfo, int N, float threshold); +int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer x, Nd4jLong *xShapeInfo, int N, float threshold); - // this method executes op that requires scope to be present: if/while/cond/whatever - Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs); +// this method executes op that requires scope to be present: if/while/cond/whatever +Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs); - //void fillUtf8String(Nd4jPointer *extraPointers, const char **string, int numStrings, Nd4jPointer buffer); - Nd4jPointer createUtf8String(Nd4jPointer *extraPointers, const char *string, int length); - void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr); +//void fillUtf8String(Nd4jPointer *extraPointers, const char **string, int numStrings, Nd4jPointer buffer); +Nd4jPointer createUtf8String(Nd4jPointer *extraPointers, const char *string, int length); +void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr); - void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, - void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, - void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, - void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, - void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, - int* hIindexes, int* dIindexes); +void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, + void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, + void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, + void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, + void* dY, Nd4jLong* dYShapeInfo, Nd4jLong* dYOffsets, + int* hIindexes, int* dIindexes); - void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo); +void deleteShapeBuffer(Nd4jPointer ptr); +void deleteTadPack(Nd4jPointer ptr); + +void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo); - nd4j::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty); - - nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, Nd4jLong *data, int length); - nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, double *data, int length); - nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor); - - void deleteShapeBuffer(Nd4jPointer ptr); - void deleteTadPack(Nd4jPointer ptr); - - const char* runLightBenchmarkSuit(bool printOut); - const char* runFullBenchmarkSuit(bool printOut); -}; - +nd4j::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty); +nd4j::ConstantDataBuffer* constantBufferLong(nd4j::DataType dtype, Nd4jLong *data, int length); +nd4j::ConstantDataBuffer* constantBufferDouble(nd4j::DataType dtype, double *data, int length); +nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor); +const char* runLightBenchmarkSuit(bool printOut); +const char* runFullBenchmarkSuit(bool printOut); +} #endif //NATIVEOPERATIONS_NATIVEOPS_H diff --git a/libnd4j/blas/cpu/NativeOps.cpp b/libnd4j/blas/cpu/NativeOps.cpp index 871d7cdae..7e5560536 100644 --- a/libnd4j/blas/cpu/NativeOps.cpp +++ b/libnd4j/blas/cpu/NativeOps.cpp @@ -78,16 +78,12 @@ bool experimentalSupport = false; using namespace nd4j; -NativeOps::NativeOps() { - // -} - -void NativeOps::setElementThreshold(int num) { +void setElementThreshold(int num) { if (num > 0) nd4j::Environment::getInstance()->setElementwiseThreshold(num); } -void NativeOps::setTADThreshold(int num) { +void setTADThreshold(int num) { if (num > 0) nd4j::Environment::getInstance()->setTadThreshold(num); } @@ -99,7 +95,7 @@ void NativeOps::setTADThreshold(int num) { * @param hXShapeInfo * @param extraParams */ -void NativeOps::execIndexReduceScalar(Nd4jPointer *extraPointers, +void execIndexReduceScalar(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -121,7 +117,7 @@ void NativeOps::execIndexReduceScalar(Nd4jPointer *extraPointers, * @param dimension * @param dimensionLength */ -void NativeOps::execIndexReduce(Nd4jPointer *extraPointers,int opNum, +void execIndexReduce(Nd4jPointer *extraPointers,int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *extraParams, @@ -169,7 +165,7 @@ void NativeOps::execIndexReduce(Nd4jPointer *extraPointers,int opNum, * @param dimension * @param dimensionLength */ -void NativeOps::execBroadcast(Nd4jPointer *extraPointers, +void execBroadcast(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -206,7 +202,7 @@ void NativeOps::execBroadcast(Nd4jPointer *extraPointers, dimensionLength, hTADShapeInfo, hTADOffsets, hTADShapeInfoZ, hTADOffsetsZ); } -void NativeOps::execBroadcastBool(Nd4jPointer *extraPointers, +void execBroadcastBool(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -255,7 +251,7 @@ void NativeOps::execBroadcastBool(Nd4jPointer *extraPointers, * @param extraParams * @param n */ -void NativeOps::execPairwiseTransform( +void execPairwiseTransform( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -282,7 +278,7 @@ void NativeOps::execPairwiseTransform( extraParams); } -void NativeOps::execPairwiseTransformBool( +void execPairwiseTransformBool( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -318,7 +314,7 @@ void NativeOps::execPairwiseTransformBool( * @param hZ * @param hZShapeInfo */ -void NativeOps::execReduceFloat( +void execReduceFloat( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -341,7 +337,7 @@ void NativeOps::execReduceFloat( } -void NativeOps::execReduceSame( +void execReduceSame( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -364,7 +360,7 @@ void NativeOps::execReduceSame( } -void NativeOps::execReduceBool( +void execReduceBool( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -387,7 +383,7 @@ void NativeOps::execReduceBool( } -void NativeOps::execReduceLong( +void execReduceLong( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -419,7 +415,7 @@ void NativeOps::execReduceLong( * @param hZ * @param hZShapeInfo */ -void NativeOps::execReduceFloat(Nd4jPointer *extraPointers, +void execReduceFloat2(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -452,7 +448,7 @@ void NativeOps::execReduceFloat(Nd4jPointer *extraPointers, hTADOffsets); } -void NativeOps::execReduceBool(Nd4jPointer *extraPointers, +void execReduceBool2(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -485,7 +481,7 @@ void NativeOps::execReduceBool(Nd4jPointer *extraPointers, hTADOffsets); } -void NativeOps::execReduceSame(Nd4jPointer *extraPointers, +void execReduceSame2(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -518,7 +514,7 @@ void NativeOps::execReduceSame(Nd4jPointer *extraPointers, hTADOffsets); } -void NativeOps::execReduceLong(Nd4jPointer *extraPointers, +void execReduceLong2(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -562,7 +558,7 @@ void NativeOps::execReduceLong(Nd4jPointer *extraPointers, * @param hZ * @param hZShapeInfo */ -void NativeOps::execReduce3(Nd4jPointer *extraPointers, +void execReduce3(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -584,7 +580,7 @@ void NativeOps::execReduce3(Nd4jPointer *extraPointers, * @param hY * @param hYShapeInfo */ -void NativeOps::execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, +void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *extraParams, @@ -608,7 +604,7 @@ void NativeOps::execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, * @param dimension * @param dimensionLength */ -void NativeOps::execReduce3(Nd4jPointer *extraPointers, +void execReduce3Tad(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -648,7 +644,7 @@ void NativeOps::execReduce3(Nd4jPointer *extraPointers, * @param extraParams * @param n */ -void NativeOps::execScalar( +void execScalar( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -675,7 +671,7 @@ void NativeOps::execScalar( extraParams); } -void NativeOps::execScalarBool( +void execScalarBool( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -710,7 +706,7 @@ void NativeOps::execScalarBool( * @param hXShapeInfo * @param extraParams */ -void NativeOps::execSummaryStatsScalar(Nd4jPointer *extraPointers, +void execSummaryStatsScalar(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -740,7 +736,7 @@ void NativeOps::execSummaryStatsScalar(Nd4jPointer *extraPointers, * @param hZ * @param hZShapeInfo */ -void NativeOps::execSummaryStats(Nd4jPointer *extraPointers, +void execSummaryStats(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -772,7 +768,7 @@ void NativeOps::execSummaryStats(Nd4jPointer *extraPointers, * @param dimension * @param dimensionLength */ -void NativeOps::execSummaryStats(Nd4jPointer *extraPointers, +void execSummaryStatsTad(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -816,7 +812,7 @@ void NativeOps::execSummaryStats(Nd4jPointer *extraPointers, * @param extraParams * @param n */ -void NativeOps::execTransformFloat( +void execTransformFloat( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -840,7 +836,7 @@ void NativeOps::execTransformFloat( nullptr); } -void NativeOps::execTransformSame( +void execTransformSame( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -864,7 +860,7 @@ void NativeOps::execTransformSame( nullptr); } -void NativeOps::execTransformBool( +void execTransformBool( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -888,7 +884,7 @@ void NativeOps::execTransformBool( nullptr); } -void NativeOps::execTransformAny( +void execTransformAny( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -912,7 +908,7 @@ void NativeOps::execTransformAny( nullptr); } -void NativeOps::execTransformStrict( +void execTransformStrict( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -936,7 +932,7 @@ void NativeOps::execTransformStrict( nullptr); } -void NativeOps::execReduce3All(Nd4jPointer *extraPointers, +void execReduce3All(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1090,7 +1086,7 @@ void flattenGeneric(Nd4jPointer *extraPointers, * Concatneate multi array of the same shape together * along a particular dimension */ -void NativeOps::concat( +void concat( Nd4jPointer *extraPointers, int dimension, int numArrays, @@ -1110,7 +1106,7 @@ void NativeOps::concat( * Concatneate multi array of the same shape together * along a particular dimension */ -void NativeOps::specialConcat( +void specialConcat( Nd4jPointer *extraPointers, int dimension, int numArrays, @@ -1137,7 +1133,7 @@ void NativeOps::specialConcat( * @param input the input for the array * @param inputShapeInfo the shape information for that array */ -void NativeOps::flatten( +void flatten( Nd4jPointer *extraPointers, int offset, char order, @@ -1159,11 +1155,11 @@ void NativeOps::flatten( * This is dummy method for JNI compatibility * Since we'll use this from java, jni compiler would like to have method no matter what. */ -void NativeOps::initializeDevicesAndFunctions() { +void initializeDevicesAndFunctions() { } -void NativeOps::initializeFunctions(Nd4jPointer *functions) { +void initializeFunctions(Nd4jPointer *functions) { nd4j::BlasHelper::getInstance()->initializeFunctions(functions); } @@ -1174,7 +1170,7 @@ void NativeOps::initializeFunctions(Nd4jPointer *functions) { * @param memorySize memory size, in bytes * @param flags optional parameter */ -Nd4jPointer NativeOps::mallocHost(Nd4jLong memorySize, int flags) { +Nd4jPointer mallocHost(Nd4jLong memorySize, int flags) { Nd4jPointer pointer = (Nd4jPointer) malloc(memorySize); if (pointer == 0) return 0L; @@ -1191,7 +1187,7 @@ Nd4jPointer NativeOps::mallocHost(Nd4jLong memorySize, int flags) { * @param ptrToDeviceId pointer to deviceId. For cuda that's just and int, for OpenCL that's pointer to device_id, etc * @param flags optional parameter */ -Nd4jPointer NativeOps::mallocDevice(Nd4jLong memorySize, int deviceId, int flags) { +Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags) { // not supported return 0L; } @@ -1201,7 +1197,7 @@ Nd4jPointer NativeOps::mallocDevice(Nd4jLong memorySize, int deviceId, int flags * * @param pointer pointer that'll be freed */ -int NativeOps::freeHost(Nd4jPointer pointer) { +int freeHost(Nd4jPointer pointer) { free(reinterpret_cast(pointer)); return 1L; } @@ -1214,7 +1210,7 @@ int NativeOps::freeHost(Nd4jPointer pointer) { * @param pointer pointer that'll be freed * @param ptrToDeviceId pointer to deviceId. */ -int NativeOps::freeDevice(Nd4jPointer pointer, int deviceId) { +int freeDevice(Nd4jPointer pointer, int deviceId) { // not supported return 0L; } @@ -1223,121 +1219,121 @@ int NativeOps::freeDevice(Nd4jPointer pointer, int deviceId) { /** * Returns the maximum number open mp threads */ -int NativeOps::ompGetMaxThreads() { +int ompGetMaxThreads() { return omp_get_max_threads(); } /** * Returns the number open mp threads */ -int NativeOps::ompGetNumThreads() { +int ompGetNumThreads() { return omp_get_num_threads(); } /** * Sets the number of openmp threads */ -void NativeOps::setOmpNumThreads(int threads) { +void setOmpNumThreads(int threads) { omp_set_num_threads(threads); } -Nd4jPointer NativeOps::createContext() { +Nd4jPointer createContext() { return 0L; } -Nd4jPointer NativeOps::createStream() { +Nd4jPointer createStream() { return 0L; } -Nd4jPointer NativeOps::createEvent() { +Nd4jPointer createEvent() { return 0L; } -int NativeOps::getDeviceMajor(int deviceId ) { +int getDeviceMajor(int deviceId ) { return 0; } -int NativeOps::getDeviceMinor(int deviceId) { +int getDeviceMinor(int deviceId) { return 0; } -int NativeOps::registerEvent(Nd4jPointer event, Nd4jPointer stream) { +int registerEvent(Nd4jPointer event, Nd4jPointer stream) { return 0L; } -int NativeOps::setDevice(int deviceId) { +int setDevice(int deviceId) { return 0L; } -Nd4jLong NativeOps::getDeviceFreeMemory(int deviceId) { +Nd4jLong getDeviceFreeMemory(int deviceId) { return 0L; } -Nd4jLong NativeOps::getDeviceFreeMemory() { +Nd4jLong getDeviceFreeMemoryDefault() { return 0L; } -Nd4jLong NativeOps::getDeviceTotalMemory(int deviceId) { +Nd4jLong getDeviceTotalMemory(int deviceId) { return 0L; } -int NativeOps::memcpy(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { +int memcpySync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { return 0L; } -int NativeOps::memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { +int memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { return 0L; } -int NativeOps::memset(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { +int memsetSync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { return 0L; } -int NativeOps::memsetAsync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { +int memsetAsync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { return 0L; } -int NativeOps::destroyEvent(Nd4jPointer event) { +int destroyEvent(Nd4jPointer event) { return 0L; } -int NativeOps::streamSynchronize(Nd4jPointer stream) { +int streamSynchronize(Nd4jPointer stream) { return 0L; } -int NativeOps::eventSynchronize(Nd4jPointer event) { +int eventSynchronize(Nd4jPointer event) { return 0L; } -int NativeOps::getAvailableDevices() { +int getAvailableDevices() { return 0L; } -void NativeOps::enableDebugMode(bool reallyEnable) { +void enableDebugMode(bool reallyEnable) { nd4j::Environment::getInstance()->setDebug(reallyEnable); } -void NativeOps::enableVerboseMode(bool reallyEnable) { +void enableVerboseMode(bool reallyEnable) { nd4j::Environment::getInstance()->setVerbose(reallyEnable); } -void NativeOps::setGridLimit(int gridSize) { +void setGridLimit(int gridSize) { // no-op } -nd4j::TadPack* NativeOps::tadOnlyShapeInfo(Nd4jLong *hXShapeInfo, int *dimension, int dimensionLength) { +nd4j::TadPack* tadOnlyShapeInfo(Nd4jLong *hXShapeInfo, int *dimension, int dimensionLength) { auto pack = new TadPack(); *pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(hXShapeInfo, dimension, dimensionLength); return pack; } -int NativeOps::memcpyConstantAsync(Nd4jLong dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { +int memcpyConstantAsync(Nd4jLong dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { // no-op return 0L; } -Nd4jPointer NativeOps::getConstantSpace() { +Nd4jPointer getConstantSpace() { // no-op return 0L; } @@ -1395,7 +1391,7 @@ void pullRowsGeneric(void *vx, } } -void NativeOps::pullRows(Nd4jPointer *extraPointers, +void pullRows(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, @@ -1452,7 +1448,7 @@ void tearGeneric(void *vx, } } -void NativeOps::tear(Nd4jPointer *extraPointers, +void tear(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, Nd4jPointer *targets, @@ -1465,7 +1461,7 @@ void NativeOps::tear(Nd4jPointer *extraPointers, } -void NativeOps::average(Nd4jPointer *extras, +void average(Nd4jPointer *extras, Nd4jPointer *hX, Nd4jLong *hXShapeInfo, Nd4jPointer *dX, Nd4jLong *dXShapeInfo, void *z, Nd4jLong *hZShapeInfo, @@ -1478,7 +1474,7 @@ void NativeOps::average(Nd4jPointer *extras, BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::averageGeneric(hX, z, hZShapeInfo, n, length, propagate), LIBND4J_TYPES); } -void NativeOps::accumulate(Nd4jPointer *extras, +void accumulate(Nd4jPointer *extras, Nd4jPointer *hX, Nd4jLong *hXShapeInfo, Nd4jPointer *dX, Nd4jLong *dXShapeInfo, void *hz, Nd4jLong *hZShapeInfo, @@ -1491,42 +1487,42 @@ void NativeOps::accumulate(Nd4jPointer *extras, BUILD_SINGLE_SELECTOR(xType, nd4j::SpecialMethods, ::accumulateGeneric(hX, hz, hZShapeInfo, n, length), LIBND4J_TYPES); } -void NativeOps::enableP2P(bool enable) { +void enableP2P(bool enable) { // no-op } -void NativeOps::encodeThresholdP1(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { +void encodeThresholdP1(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { // TODO: to be implemented } -void NativeOps::encodeThresholdP2Int(Nd4jPointer *extraPointers, int *hX, Nd4jLong N, int *dz) { +void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *hX, Nd4jLong N, int *dz) { // TODO: to be implemented } -void NativeOps::encodeThresholdP3(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ +void encodeThresholdP3(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ // offsets won't be used here // TODO: to be implemented } -void NativeOps::decodeThreshold(Nd4jPointer *extraPointers, void *hX, Nd4jLong N, void *dz, Nd4jLong *hZShapeInfo){ +void decodeThreshold(Nd4jPointer *extraPointers, void *hX, Nd4jLong N, void *dz, Nd4jLong *hZShapeInfo){ // TODO: to be implemented } -bool NativeOps::isP2PAvailable() { +bool isP2PAvailable() { // always TRUE for cpu backend return true; } -void NativeOps::checkP2P() { +void checkP2P() { // no-op } -void NativeOps::decodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong N, void *dz, Nd4jLong *hZShapeInfo) { +void decodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong N, void *dz, Nd4jLong *hZShapeInfo) { NativeOpExecutioner::decodeBitmap(hX, N, dz, hZShapeInfo); } @@ -1589,7 +1585,7 @@ void shuffleGeneric(void **hX, Nd4jLong **hXShapeInfo, void **dz, Nd4jLong **hZS } } -void NativeOps::shuffle(Nd4jPointer *extras, +void shuffle(Nd4jPointer *extras, Nd4jPointer *hX, Nd4jPointer *hXShapeInfo, Nd4jPointer *dX, Nd4jPointer *dXShapeInfo, Nd4jPointer *hz, Nd4jPointer *hZShapeInfo, @@ -1609,17 +1605,17 @@ void NativeOps::shuffle(Nd4jPointer *extras, } -bool NativeOps::isExperimentalEnabled() { +bool isExperimentalEnabled() { return nd4j::Environment::getInstance()->isExperimentalBuild(); } -void NativeOps::setOmpMinThreads(int threads) { +void setOmpMinThreads(int threads) { // TODO: to be implemented } /* -void NativeOps::execMetaPredicateShape(Nd4jPointer *extras, +void execMetaPredicateShape(Nd4jPointer *extras, const int opTypeA, const int opNumA, const int opTypeB, @@ -1639,11 +1635,11 @@ void NativeOps::execMetaPredicateShape(Nd4jPointer *extras, } */ -int NativeOps::getDevice() { +int getDevice() { return 0; } -void NativeOps::execScalar(Nd4jPointer *extraPointers, +void execScalarTad(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1683,7 +1679,7 @@ void NativeOps::execScalar(Nd4jPointer *extraPointers, tadOffsetsZ); } -void NativeOps::execScalarBool(Nd4jPointer *extraPointers, +void execScalarBoolTad(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1723,7 +1719,7 @@ void NativeOps::execScalarBool(Nd4jPointer *extraPointers, tadOffsetsZ); } -const char * NativeOps::getDeviceName(int deviceId) { +const char * getDeviceName(int deviceId) { if (!nameSet) { name = reinterpret_cast(malloc(256 * sizeof(char))); @@ -1741,7 +1737,7 @@ const char * NativeOps::getDeviceName(int deviceId) { } -void NativeOps::execAggregate(Nd4jPointer *extraPointers,int opNum, +void execAggregate(Nd4jPointer *extraPointers,int opNum, void **arguments, int numArguments, Nd4jLong **shapeArguments, @@ -1759,7 +1755,7 @@ void NativeOps::execAggregate(Nd4jPointer *extraPointers,int opNum, } template -void NativeOps::_batchExecutor(Nd4jPointer *extraPointers, +void _batchExecutor(Nd4jPointer *extraPointers, int numAggregates, int opNum, int maxArgs, @@ -1813,9 +1809,23 @@ void NativeOps::_batchExecutor(Nd4jPointer *extraPointers, delete [] intArrays; } } -BUILD_SINGLE_TEMPLATE(template void NativeOps::_batchExecutor, (Nd4jPointer *extraPointers, int numAggregates, int opNum, int maxArgs, int maxShapes, int maxIntArrays, int maxIntArraySize, int maxIdx, int maxReals, void *ptrToArguments, nd4j::DataType dtype), FLOAT_TYPES); +BUILD_SINGLE_TEMPLATE(template void _batchExecutor, (Nd4jPointer *extraPointers, int numAggregates, int opNum, int maxArgs, int maxShapes, int maxIntArrays, int maxIntArraySize, int maxIdx, int maxReals, void *ptrToArguments, nd4j::DataType dtype), FLOAT_TYPES); -void NativeOps::execAggregateBatch(Nd4jPointer *extraPointers, +void batchExecutor(Nd4jPointer *extraPointers, + int numAggregates, + int opNum, + int maxArgs, + int maxShapes, + int maxIntArrays, + int maxIntArraySize, + int maxIdx, + int maxReals, + void *ptrToArguments, + nd4j::DataType dtype) { + BUILD_SINGLE_SELECTOR(dtype, _batchExecutor, (extraPointers, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIdx, maxReals, ptrToArguments, dtype), FLOAT_TYPES); +} + +void execAggregateBatch(Nd4jPointer *extraPointers, int numAggregates, int opNum, int maxArgs, @@ -1830,7 +1840,7 @@ void NativeOps::execAggregateBatch(Nd4jPointer *extraPointers, } -void NativeOps::execRandom(Nd4jPointer *extraPointers, +void execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, void *hZ, Nd4jLong *hZShapeInfo, @@ -1839,7 +1849,7 @@ void NativeOps::execRandom(Nd4jPointer *extraPointers, NativeOpExecutioner::execRandom(nullptr, opNum, state, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); } -void NativeOps::execRandom(Nd4jPointer *extraPointers, +void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, void *hX, Nd4jLong *hXShapeInfo, @@ -1853,7 +1863,7 @@ void NativeOps::execRandom(Nd4jPointer *extraPointers, NativeOpExecutioner::execRandom(nullptr, opNum, state, hX, hXShapeInfo, dX, dXShapeInfo, hY, hYShapeInfo, dY, dYShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); } -void NativeOps::execRandom(Nd4jPointer *extraPointers, +void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer state, void *hX, Nd4jLong *hXShapeInfo, @@ -1865,7 +1875,7 @@ void NativeOps::execRandom(Nd4jPointer *extraPointers, NativeOpExecutioner::execRandom(nullptr, opNum, state, hX, hXShapeInfo, dX, dXShapeInfo, hZ, hZShapeInfo, dZ, dZShapeInfo, extraArguments); } -Nd4jPointer NativeOps::initRandom(Nd4jPointer *extraPointers, long seed, long bufferSize, Nd4jPointer ptrToBuffer) { +Nd4jPointer initRandom(Nd4jPointer *extraPointers, long seed, long bufferSize, Nd4jPointer ptrToBuffer) { auto ptrBuf = reinterpret_cast(ptrToBuffer); auto buffer = new nd4j::random::RandomBuffer(seed, bufferSize, reinterpret_cast(ptrBuf)); @@ -1875,7 +1885,7 @@ Nd4jPointer NativeOps::initRandom(Nd4jPointer *extraPointers, long seed, long bu return (Nd4jPointer) buffer; } -void NativeOps::refreshBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer ptrRandom) { +void refreshBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer ptrRandom) { auto buffer = reinterpret_cast (ptrRandom); buffer->setSeed(seed); @@ -1884,14 +1894,14 @@ void NativeOps::refreshBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer generator.refreshBuffer(); } -void NativeOps::reSeedBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer ptrRandom) { +void reSeedBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer ptrRandom) { auto buffer = reinterpret_cast (ptrRandom); buffer->reSeed(seed); } -void NativeOps::destroyRandom(Nd4jPointer ptrBuffer) { +void destroyRandom(Nd4jPointer ptrBuffer) { auto buffer = reinterpret_cast(ptrBuffer); delete buffer; } @@ -1905,7 +1915,7 @@ void NativeOps::destroyRandom(Nd4jPointer ptrBuffer) { * @param buffer the buffer pointer to check * @return */ -int NativeOps::lengthForShapeBufferPointer(Nd4jPointer buffer) { +int lengthForShapeBufferPointer(Nd4jPointer buffer) { auto shapeBuffer = reinterpret_cast(buffer); return shape::shapeInfoLength(shape::rank(shapeBuffer)); } @@ -1918,18 +1928,18 @@ int NativeOps::lengthForShapeBufferPointer(Nd4jPointer buffer) { * @return the pointer for the given address */ -Nd4jPointer NativeOps::pointerForAddress(Nd4jLong address) { +Nd4jPointer pointerForAddress(Nd4jLong address) { return reinterpret_cast(address); } -void NativeOps::sort(Nd4jPointer *extraPointers, +void sort(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, bool descending) { NativeOpExecutioner::execSort(hX, hXShapeInfo, descending); } -void NativeOps::sortTad(Nd4jPointer *extraPointers, +void sortTad(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, int *dimension, @@ -1940,7 +1950,7 @@ void NativeOps::sortTad(Nd4jPointer *extraPointers, NativeOpExecutioner::execSort(hX, hXShapeInfo, dimension, dimensionLength, tadShapeInfo, tadOffsets, descending); } -void NativeOps::sortCooIndices(Nd4jPointer *extraPointers, +void sortCooIndices(Nd4jPointer *extraPointers, Nd4jLong *indices, void *values, Nd4jLong length, @@ -1948,13 +1958,13 @@ void NativeOps::sortCooIndices(Nd4jPointer *extraPointers, NativeOpExecutioner::execSortCooIndices(indices, values, length, rank); } -Nd4jLong NativeOps::encodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { +Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *hX, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { return NativeOpExecutioner::encodeBitmap(hX, hXShapeInfo, N, dz, threshold); } -Nd4jLong* NativeOps::mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length) { +Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length) { auto hZ = new Nd4jLong[2];errno = 0; #if defined(_WIN32) || defined(_WIN64) @@ -1980,7 +1990,7 @@ Nd4jLong* NativeOps::mmapFile(Nd4jPointer *extraPointers, const char *fileName, } -void NativeOps::munmapFile(Nd4jPointer *extraPointers, Nd4jLong *ptrMap, Nd4jLong length) { +void munmapFile(Nd4jPointer *extraPointers, Nd4jLong *ptrMap, Nd4jLong length) { munmap((Nd4jPointer) ptrMap[0], length); #if defined(_WIN32) || defined(_WIN64) CloseHandle(reinterpret_cast(ptrMap[1])); @@ -1991,11 +2001,11 @@ void NativeOps::munmapFile(Nd4jPointer *extraPointers, Nd4jLong *ptrMap, Nd4jLon delete[] ptrMap; } -nd4j::graph::ResultWrapper* NativeOps::executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer) { +nd4j::graph::ResultWrapper* executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer) { return nd4j::graph::GraphExecutioner::executeFlatBuffer(flatBufferPointer); } -const char* NativeOps::getAllCustomOps() { +const char* getAllCustomOps() { return nd4j::ops::OpRegistrator::getInstance()->getAllCustomOperations(); } @@ -2026,14 +2036,14 @@ FORCEINLINE int estimateThresholdGeneric(Nd4jPointer *extraPointers, Nd4jPointer } -int NativeOps::estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer hX, Nd4jLong *hXShapeInfo, int N, float threshold) { +int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer hX, Nd4jLong *hXShapeInfo, int N, float threshold) { auto xType = ArrayOptions::dataType(hXShapeInfo); BUILD_SINGLE_SELECTOR(xType, return estimateThresholdGeneric, (extraPointers, hX, N, threshold), FLOAT_TYPES); } -void NativeOps::deleteShapeList(Nd4jPointer shapeList) { +void deleteShapeList(Nd4jPointer shapeList) { auto list = reinterpret_cast(shapeList); //list->destroy(); @@ -2081,7 +2091,7 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D return shapeList; } -nd4j::ShapeList* NativeOps::calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs) { +nd4j::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs) { auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs); @@ -2106,13 +2116,13 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D return shapeList; } -nd4j::ShapeList* NativeOps::calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { +nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); } -int NativeOps::execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) { +int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) { auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); auto context = reinterpret_cast(opContext); @@ -2232,12 +2242,12 @@ Nd4jStatus realExec(nd4j::ops::DeclarableOp* op, Nd4jPointer* extraPointers, Nd4 } -int NativeOps::execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { +int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); } -int NativeOps::registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer) { +int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer) { auto graph = nd4j::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); nd4j::graph::GraphHolder::getInstance()->registerGraph(graphId, graph); @@ -2291,33 +2301,33 @@ static VariablesSet* executeStoredGraphT(Nd4jPointer *extraPointers, Nd4jLong gr return varSet; } -nd4j::graph::VariablesSet* NativeOps::executeStoredGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) { +nd4j::graph::VariablesSet* executeStoredGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) { return nullptr; } -int NativeOps::unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId) { +int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId) { nd4j::graph::GraphHolder::getInstance()->dropGraphAny(graphId); return nd4j::Status::OK(); } -void NativeOps::deletePointerArray(Nd4jPointer pointer) { +void deletePointerArray(Nd4jPointer pointer) { auto ptr = reinterpret_cast(pointer); delete[] ptr; } -void NativeOps::deleteCharArray(Nd4jPointer pointer) { +void deleteCharArray(Nd4jPointer pointer) { auto ptr = reinterpret_cast(pointer); delete[] ptr; } -void NativeOps::deleteIntArray(Nd4jPointer pointer) { +void deleteIntArray(Nd4jPointer pointer) { auto ptr = reinterpret_cast(pointer); delete[] ptr; } -void NativeOps::deleteLongArray(Nd4jPointer pointer) { +void deleteLongArray(Nd4jPointer pointer) { auto ptr = reinterpret_cast(pointer); delete[] ptr; } @@ -2328,20 +2338,20 @@ static void deleteVariablesSetT(Nd4jPointer pointer) { delete ptr; } -void NativeOps::deleteVariablesSet(Nd4jPointer pointer) { +void deleteVariablesSet(Nd4jPointer pointer) { deleteVariablesSetT(pointer); } -const char* NativeOps::getAllOperations() { +const char* getAllOperations() { return nd4j::OpTracker::getInstance()->exportOperations(); } -Nd4jPointer NativeOps::getGraphState(Nd4jLong id) { +Nd4jPointer getGraphState(Nd4jLong id) { return (Nd4jPointer) new nd4j::graph::GraphState(id); } -void NativeOps::deleteGraphState(Nd4jPointer state) { +void deleteGraphState(Nd4jPointer state) { auto stateP = reinterpret_cast(state); delete stateP; } @@ -2411,11 +2421,11 @@ Nd4jStatus execCustomOpWithScope_(Nd4jPointer *extraPointers, nd4j::graph::Graph return Status::OK(); } -Nd4jStatus NativeOps::execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs) { +Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs) { return execCustomOpWithScope_(extraPointers, reinterpret_cast(state), opHash, scopes, numScopes, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs); } -void NativeOps::deleteResultWrapper(Nd4jPointer ptr) { +void deleteResultWrapper(Nd4jPointer ptr) { // just 0 room for compiler s@!t auto p = reinterpret_cast(ptr); delete p; @@ -2425,7 +2435,7 @@ void NativeOps::deleteResultWrapper(Nd4jPointer ptr) { * TypeDef: * void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer hX, long N, int dstType, Nd4jPointer hZ); */ -void NativeOps::convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer hX, Nd4jLong N, int dstType, Nd4jPointer hZ) { +void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer hX, Nd4jLong N, int dstType, Nd4jPointer hZ) { auto hx = reinterpret_cast(hX); auto hz = reinterpret_cast(hZ); @@ -2605,7 +2615,7 @@ void NativeOps::convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer hX, N } /* -void NativeOps::fillUtf8String(Nd4jPointer *extraPointers, const char **strings, int numStrings, Nd4jPointer buffer) { +void fillUtf8String(Nd4jPointer *extraPointers, const char **strings, int numStrings, Nd4jPointer buffer) { auto hZ = reinterpret_cast(buffer); for (int e = 0; e < numStrings; e++) { hZ[e] = reinterpret_cast(createUtf8String(extraPointers, strings[e])); @@ -2613,18 +2623,18 @@ void NativeOps::fillUtf8String(Nd4jPointer *extraPointers, const char **strings, } */ -Nd4jPointer NativeOps::createUtf8String(Nd4jPointer *extraPointers, const char *string, int length) { +Nd4jPointer createUtf8String(Nd4jPointer *extraPointers, const char *string, int length) { auto u = new nd4j::utf8string(string, length); return reinterpret_cast(u); } -void NativeOps::deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr) { +void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr) { delete(reinterpret_cast(ptr)); } //////////////////////////////////////////////////////////////////////// -void NativeOps::scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, +void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, @@ -2681,54 +2691,54 @@ void NativeOps::scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfS } } -void NativeOps::inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo) { +void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo) { auto p = reinterpret_cast(debugInfo); NDArray array(buffer, shapeInfo); nd4j::DebugHelper::retrieveDebugStatistics(p, &array); } -void NativeOps::tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { +void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { auto buf = reinterpret_cast(p); int cnt = 0; for (int i = 0; i < len; i++) cnt += buf[cnt]; } -nd4j::ConstantDataBuffer* NativeOps::shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty) { +nd4j::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty) { auto buffer = new ConstantDataBuffer(); *buffer = nd4j::ConstantShapeHelper::getInstance()->bufferForShapeInfo(ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); return buffer; } -void NativeOps::deleteShapeBuffer(Nd4jPointer ptr) { +void deleteShapeBuffer(Nd4jPointer ptr) { auto buffer = reinterpret_cast(ptr); delete buffer; } -void NativeOps::deleteTadPack(Nd4jPointer ptr) { +void deleteTadPack(Nd4jPointer ptr) { auto buffer = reinterpret_cast(ptr); delete buffer; } -nd4j::ConstantDataBuffer* NativeOps::constantBuffer(nd4j::DataType dtype, Nd4jLong *data, int length) { +nd4j::ConstantDataBuffer* constantBufferLong(nd4j::DataType dtype, Nd4jLong *data, int length) { return nullptr; } -nd4j::ConstantDataBuffer* NativeOps::constantBuffer(nd4j::DataType dtype, double *data, int length) { +nd4j::ConstantDataBuffer* constantBufferDouble(nd4j::DataType dtype, double *data, int length) { return nullptr; } -nd4j::ConstantDataBuffer* NativeOps::constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor) { +nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor) { return nd4j::ConstantHelper::getInstance()->constantBuffer(*descriptor, dtype); } -int NativeOps::dataTypeFromNpyHeader(void *header) { +int dataTypeFromNpyHeader(void *header) { return (int) cnpy::dataTypeFromHeader(reinterpret_cast(header)); } -Nd4jPointer NativeOps::shapeBufferForNumpy(Nd4jPointer npyArray) { +Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); unsigned int shapeSize = arr.shape.size(); std::vector shape(shapeSize); @@ -2757,7 +2767,7 @@ Nd4jPointer NativeOps::shapeBufferForNumpy(Nd4jPointer npyArray) { return reinterpret_cast(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); } -void NativeOps::sortByKey(Nd4jPointer *extraPointers, +void sortByKey(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dx, Nd4jLong *dxShapeInfo, void *y, Nd4jLong *yShapeInfo, @@ -2769,7 +2779,7 @@ void NativeOps::sortByKey(Nd4jPointer *extraPointers, BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortByKey(x, xShapeInfo, y, yShapeInfo, descending), LIBND4J_TYPES, LIBND4J_TYPES); } -void NativeOps::sortByValue(Nd4jPointer *extraPointers, +void sortByValue(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dx, Nd4jLong *dxShapeInfo, void *y, Nd4jLong *yShapeInfo, @@ -2782,7 +2792,7 @@ void NativeOps::sortByValue(Nd4jPointer *extraPointers, BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortByValue(x, xShapeInfo, y, yShapeInfo, descending), LIBND4J_TYPES, LIBND4J_TYPES); } -void NativeOps::sortTadByKey(Nd4jPointer *extraPointers, +void sortTadByKey(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dx, Nd4jLong *dxShapeInfo, void *y, Nd4jLong *yShapeInfo, @@ -2796,7 +2806,7 @@ void NativeOps::sortTadByKey(Nd4jPointer *extraPointers, BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByKey(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); } -void NativeOps::sortTadByValue(Nd4jPointer *extraPointers, +void sortTadByValue(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dx, Nd4jLong *dxShapeInfo, void *y, Nd4jLong *yShapeInfo, @@ -2810,7 +2820,7 @@ void NativeOps::sortTadByValue(Nd4jPointer *extraPointers, BUILD_DOUBLE_SELECTOR(xType, yType, nd4j::DoubleMethods, ::sortTadByValue(x, xShapeInfo, y, yShapeInfo, dimension, dimensionLength, descending), LIBND4J_TYPES, LIBND4J_TYPES); } -const char* NativeOps::runLightBenchmarkSuit(bool printOut) { +const char* runLightBenchmarkSuit(bool printOut) { nd4j::LightBenchmarkSuit suit; auto result = suit.runSuit(); @@ -2824,11 +2834,11 @@ const char* NativeOps::runLightBenchmarkSuit(bool printOut) { return chars; } -Nd4jLong NativeOps::getCachedMemory(int deviceId) { +Nd4jLong getCachedMemory(int deviceId) { return nd4j::ConstantHelper::getInstance()->getCachedAmount(deviceId); } -const char* NativeOps::runFullBenchmarkSuit(bool printOut) { +const char* runFullBenchmarkSuit(bool printOut) { nd4j::FullBenchmarkSuit suit; auto result = suit.runSuit(); diff --git a/libnd4j/blas/cuda/NativeOps.cu b/libnd4j/blas/cuda/NativeOps.cu index d045ee16b..34177de87 100755 --- a/libnd4j/blas/cuda/NativeOps.cu +++ b/libnd4j/blas/cuda/NativeOps.cu @@ -239,11 +239,7 @@ public: } }; -NativeOps::NativeOps() { -// -} - -void NativeOps::execPairwiseTransform( Nd4jPointer *extraPointers, +void execPairwiseTransform( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -258,7 +254,7 @@ void NativeOps::execPairwiseTransform( Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execPairwiseTransformBool(Nd4jPointer *extraPointers, +void execPairwiseTransformBool(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -273,7 +269,7 @@ void NativeOps::execPairwiseTransformBool(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execSummaryStatsScalar(Nd4jPointer *extraPointers, +void execSummaryStatsScalar(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -287,7 +283,7 @@ void NativeOps::execSummaryStatsScalar(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execBroadcastBool(Nd4jPointer *extraPointers, +void execBroadcastBool(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -329,7 +325,7 @@ void NativeOps::execBroadcastBool(Nd4jPointer *extraPointers, * @param dimension * @param dimensionLength */ -void NativeOps::execBroadcast( +void execBroadcast( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -385,7 +381,7 @@ void NativeOps::execBroadcast( * @param dZShapeInfo */ //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduceFloat(Nd4jPointer *extraPointers, +void execReduceFloat(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -398,7 +394,7 @@ void NativeOps::execReduceFloat(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduceSame(Nd4jPointer *extraPointers, +void execReduceSame(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -411,7 +407,7 @@ void NativeOps::execReduceSame(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduceSame(Nd4jPointer *extraPointers, +void execReduceSame2(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -430,7 +426,7 @@ void NativeOps::execReduceSame(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduceLong(Nd4jPointer *extraPointers, +void execReduceLong2(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -449,7 +445,7 @@ void NativeOps::execReduceLong(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduceLong(Nd4jPointer *extraPointers, +void execReduceLong(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -470,7 +466,7 @@ void NativeOps::execReduceLong(Nd4jPointer *extraPointers, auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); if (zType != nd4j::DataType::INT64) - throw datatype_exception::build("NativeOps::execReduceLong wrong Z data type", nd4j::DataType::INT64, zType); + throw datatype_exception::build("execReduceLong wrong Z data type", nd4j::DataType::INT64, zType); auto xLength = shape::length(hXShapeInfo); auto blockWidth = 256; @@ -483,7 +479,7 @@ void NativeOps::execReduceLong(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduceBool(Nd4jPointer *extraPointers, +void execReduceBool(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -502,7 +498,7 @@ void NativeOps::execReduceBool(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduceBool(Nd4jPointer *extraPointers, +void execReduceBool(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -523,7 +519,7 @@ void NativeOps::execReduceBool(Nd4jPointer *extraPointers, auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); if (zType != nd4j::DataType::BOOL) - throw std::runtime_error("NativeOps::execReduceBool requires Z operand to have BOOL type"); + throw std::runtime_error("execReduceBool requires Z operand to have BOOL type"); auto xLength = shape::length(hXShapeInfo); auto blockWidth = 256; @@ -547,7 +543,7 @@ void NativeOps::execReduceBool(Nd4jPointer *extraPointers, * @param dimensionLength */ //////////////////////////////////////////////////////////////////////// -void NativeOps::execIndexReduce(Nd4jPointer *extraPointers, +void execIndexReduce(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -575,7 +571,7 @@ void NativeOps::execIndexReduce(Nd4jPointer *extraPointers, * @param dZShapeInfo */ //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduceFloat(Nd4jPointer *extraPointers, +void execReduceFloat2(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -601,7 +597,7 @@ void NativeOps::execReduceFloat(Nd4jPointer *extraPointers, * @param extraParams */ //////////////////////////////////////////////////////////////////////// -void NativeOps::execIndexReduceScalar( +void execIndexReduceScalar( Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, @@ -615,7 +611,7 @@ void NativeOps::execIndexReduceScalar( } //////////////////////////////////////////////////////////////////////// -void NativeOps::execTransformSame(Nd4jPointer *extraPointers,int opNum, +void execTransformSame(Nd4jPointer *extraPointers,int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, @@ -630,7 +626,7 @@ void NativeOps::execTransformSame(Nd4jPointer *extraPointers,int opNum, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execTransformBool(Nd4jPointer *extraPointers,int opNum, +void execTransformBool(Nd4jPointer *extraPointers,int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, @@ -645,7 +641,7 @@ void NativeOps::execTransformBool(Nd4jPointer *extraPointers,int opNum, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execTransformAny(Nd4jPointer *extraPointers,int opNum, +void execTransformAny(Nd4jPointer *extraPointers,int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, @@ -688,7 +684,7 @@ void NativeOps::execTransformAny(Nd4jPointer *extraPointers,int opNum, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execTransformStrict(Nd4jPointer *extraPointers,int opNum, +void execTransformStrict(Nd4jPointer *extraPointers,int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, @@ -703,7 +699,7 @@ void NativeOps::execTransformStrict(Nd4jPointer *extraPointers,int opNum, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execTransformFloat(Nd4jPointer *extraPointers,int opNum, +void execTransformFloat(Nd4jPointer *extraPointers,int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, @@ -729,7 +725,7 @@ void NativeOps::execTransformFloat(Nd4jPointer *extraPointers,int opNum, * @param input the input for the array * @param inputShapeInfo the shape information for that array */ -void NativeOps::flatten(Nd4jPointer *extraPointers, +void flatten(Nd4jPointer *extraPointers, int offset, char order, void *hZ, Nd4jLong *hZShapeInfo, @@ -758,7 +754,7 @@ void NativeOps::flatten(Nd4jPointer *extraPointers, -void NativeOps::checkP2P() { +void checkP2P() { int curDevice = 0; cudaGetDevice(&curDevice); @@ -799,7 +795,7 @@ void NativeOps::checkP2P() { } } -void NativeOps::enableP2P(bool enable) { +void enableP2P(bool enable) { if (enable == allowedP2P) return; @@ -845,12 +841,12 @@ void NativeOps::enableP2P(bool enable) { cudaSetDevice(curDevice); } -bool NativeOps::isP2PAvailable() { +bool isP2PAvailable() { return supportedP2P; } -void NativeOps::initializeDevicesAndFunctions() { +void initializeDevicesAndFunctions() { int devCnt = 0; cudaGetDeviceCount(&devCnt); deviceProperties = new cudaDeviceProp[devCnt]; @@ -870,18 +866,18 @@ void NativeOps::initializeDevicesAndFunctions() { enableP2P(allowedP2P); } -void NativeOps::initializeFunctions(Nd4jPointer *functions) { +void initializeFunctions(Nd4jPointer *functions) { nd4j::BlasHelper::getInstance()->initializeDeviceFunctions(functions); /* - this->cublasSgemv = (CublasSgemv)functions[0]; - this->cublasDgemv = (CublasDgemv)functions[1]; - this->cublasHgemm = (CublasHgemm)functions[2]; - this->cublasSgemm = (CublasSgemm)functions[3]; - this->cublasDgemm = (CublasDgemm)functions[4]; - this->cublasSgemmEx = (CublasSgemmEx)functions[5]; - this->cublasHgemmBatched = (CublasHgemmBatched)functions[6]; - this->cublasSgemmBatched = (CublasSgemmBatched)functions[7]; - this->cublasDgemmBatched = (CublasDgemmBatched)functions[8]; + cublasSgemv = (CublasSgemv)functions[0]; + cublasDgemv = (CublasDgemv)functions[1]; + cublasHgemm = (CublasHgemm)functions[2]; + cublasSgemm = (CublasSgemm)functions[3]; + cublasDgemm = (CublasDgemm)functions[4]; + cublasSgemmEx = (CublasSgemmEx)functions[5]; + cublasHgemmBatched = (CublasHgemmBatched)functions[6]; + cublasSgemmBatched = (CublasSgemmBatched)functions[7]; + cublasDgemmBatched = (CublasDgemmBatched)functions[8]; */ } @@ -893,7 +889,7 @@ void NativeOps::initializeFunctions(Nd4jPointer *functions) { * @param memorySize memory size, in bytes * @param flags optional parameter */ -Nd4jPointer NativeOps::mallocHost(Nd4jLong memorySize, int flags) { +Nd4jPointer mallocHost(Nd4jLong memorySize, int flags) { Nd4jPointer pointer; // cudaHostAllocMapped |cudaHostAllocPortable cudaError_t res = cudaHostAlloc(reinterpret_cast(&pointer), memorySize, cudaHostAllocDefault); @@ -910,7 +906,7 @@ Nd4jPointer NativeOps::mallocHost(Nd4jLong memorySize, int flags) { * @param ptrToDeviceId pointer to deviceId. For cuda that's just and int, for OpenCL that's pointer to device_id, etc * @param flags optional parameter */ -Nd4jPointer NativeOps::mallocDevice(Nd4jLong memorySize, int deviceId, int flags) { +Nd4jPointer mallocDevice(Nd4jLong memorySize, int deviceId, int flags) { Nd4jPointer pointer; auto res = cudaMalloc(reinterpret_cast(&pointer), memorySize); if (res != 0) @@ -923,7 +919,7 @@ Nd4jPointer NativeOps::mallocDevice(Nd4jLong memorySize, int deviceId, int flags * * @param pointer pointer that'll be freed */ -int NativeOps::freeHost(Nd4jPointer pointer) { +int freeHost(Nd4jPointer pointer) { cudaError_t res = cudaFreeHost(reinterpret_cast(pointer)); if (res != 0) pointer = 0L; @@ -936,7 +932,7 @@ int NativeOps::freeHost(Nd4jPointer pointer) { * @param pointer pointer that'll be freed * @param ptrToDeviceId pointer to deviceId. */ -int NativeOps::freeDevice(Nd4jPointer pointer, int deviceId) { +int freeDevice(Nd4jPointer pointer, int deviceId) { cudaError_t res = cudaFree(reinterpret_cast(pointer)); if (res != 0) pointer = 0L; @@ -944,11 +940,11 @@ int NativeOps::freeDevice(Nd4jPointer pointer, int deviceId) { } -Nd4jPointer NativeOps::createContext() { +Nd4jPointer createContext() { return 0L; } -Nd4jPointer NativeOps::createStream() { +Nd4jPointer createStream() { /* Nd4jPointer nativeStream = (Nd4jPointer) malloc(sizeof(cudaStream_t)); @@ -969,7 +965,7 @@ Nd4jPointer NativeOps::createStream() { return stream; } -Nd4jPointer NativeOps::createEvent() { +Nd4jPointer createEvent() { Nd4jPointer nativeEvent= (Nd4jPointer) malloc(sizeof(cudaEvent_t)); CHECK_ALLOC(nativeEvent, "Failed to allocate new CUDA event buffer", sizeof(cudaEvent_t)); @@ -983,7 +979,7 @@ Nd4jPointer NativeOps::createEvent() { return nativeEvent; } -int NativeOps::registerEvent(Nd4jPointer event, Nd4jPointer stream) { +int registerEvent(Nd4jPointer event, Nd4jPointer stream) { cudaEvent_t *pEvent = reinterpret_cast(&event); cudaStream_t *pStream = reinterpret_cast(stream); @@ -995,7 +991,7 @@ int NativeOps::registerEvent(Nd4jPointer event, Nd4jPointer stream) { return 1; } -int NativeOps::setDevice(int deviceId) { +int setDevice(int deviceId) { auto dZ = cudaSetDevice(deviceId); checkCudaErrors(dZ); if (dZ != 0) @@ -1004,7 +1000,7 @@ int NativeOps::setDevice(int deviceId) { return 1; } -Nd4jLong NativeOps::getDeviceFreeMemory() { +Nd4jLong getDeviceFreeMemory() { size_t memFree = 0; size_t memTotal = 0; @@ -1013,7 +1009,7 @@ Nd4jLong NativeOps::getDeviceFreeMemory() { return (Nd4jLong) memFree; } -Nd4jLong NativeOps::getDeviceFreeMemory(int device) { +Nd4jLong getDeviceFreeMemory(int device) { int orig = -1; cudaGetDevice(&orig); @@ -1034,7 +1030,7 @@ Nd4jLong NativeOps::getDeviceFreeMemory(int device) { return (Nd4jLong) memFree; } -Nd4jLong NativeOps::getDeviceTotalMemory(int device) { +Nd4jLong getDeviceTotalMemory(int device) { int orig = -1; cudaGetDevice(&orig); @@ -1054,12 +1050,12 @@ Nd4jLong NativeOps::getDeviceTotalMemory(int device) { return (Nd4jLong) memTotal; } -int NativeOps::memcpy(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { +int memcpySync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { return memcpyAsync(dst, src, size, flags, reserved); } -int NativeOps::memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { +int memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { cudaStream_t *pStream = reinterpret_cast(reserved); cudaMemcpyKind kind; @@ -1102,7 +1098,7 @@ int NativeOps::memcpyAsync(Nd4jPointer dst, Nd4jPointer src, Nd4jLong size, int return 1; } -int NativeOps::memset(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { +int memsetSync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { cudaError_t dZ = cudaMemset(reinterpret_cast(dst), value, static_cast(size)); checkCudaErrors(dZ); if (dZ != 0) @@ -1111,7 +1107,7 @@ int NativeOps::memset(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4j return 1; } -int NativeOps::memsetAsync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { +int memsetAsync(Nd4jPointer dst, int value, Nd4jLong size, int flags, Nd4jPointer reserved) { cudaStream_t *pStream = reinterpret_cast(reserved); cudaError_t dZ = cudaMemsetAsync(reinterpret_cast(dst), value, static_cast(size), *pStream); @@ -1122,7 +1118,7 @@ int NativeOps::memsetAsync(Nd4jPointer dst, int value, Nd4jLong size, int flags, return 1; } -int NativeOps::destroyEvent(Nd4jPointer event) { +int destroyEvent(Nd4jPointer event) { cudaEvent_t *pEvent = reinterpret_cast(&event); cudaError_t dZ = cudaEventDestroy(*pEvent); checkCudaErrors(dZ); @@ -1132,7 +1128,7 @@ int NativeOps::destroyEvent(Nd4jPointer event) { return 1; } -int NativeOps::streamSynchronize(Nd4jPointer stream) { +int streamSynchronize(Nd4jPointer stream) { cudaStream_t *pStream = reinterpret_cast(stream); cudaError_t dZ = cudaStreamSynchronize(*pStream); @@ -1143,7 +1139,7 @@ int NativeOps::streamSynchronize(Nd4jPointer stream) { return 1L; } -int NativeOps::eventSynchronize(Nd4jPointer event) { +int eventSynchronize(Nd4jPointer event) { cudaEvent_t *pEvent = reinterpret_cast(&event); cudaError_t dZ = cudaEventSynchronize(*pEvent); @@ -1154,17 +1150,17 @@ int NativeOps::eventSynchronize(Nd4jPointer event) { return 1L; } -int NativeOps::getAvailableDevices() { +int getAvailableDevices() { int devCnt = 0; cudaGetDeviceCount(&devCnt); return devCnt; } -void NativeOps::enableDebugMode(bool reallyEnable) { +void enableDebugMode(bool reallyEnable) { nd4j::Environment::getInstance()->setDebug(reallyEnable); } -void NativeOps::setGridLimit(int gridSize) { +void setGridLimit(int gridSize) { if (gridSize > 8192) gridSize = 8192; if (gridSize < 1) @@ -1172,15 +1168,15 @@ void NativeOps::setGridLimit(int gridSize) { blockLimit = gridSize; } -int NativeOps::ompGetMaxThreads() { +int ompGetMaxThreads() { return maxThreads; } -int NativeOps::ompGetNumThreads() { +int ompGetNumThreads() { return maxThreads; } -void NativeOps::setOmpNumThreads(int threads) { +void setOmpNumThreads(int threads) { if (threads > 1024) threads = 1024; if (threads < 32) @@ -1188,20 +1184,20 @@ void NativeOps::setOmpNumThreads(int threads) { maxThreads = threads; } -void NativeOps::enableVerboseMode(bool reallyEnable) { +void enableVerboseMode(bool reallyEnable) { nd4j::Environment::getInstance()->setVerbose(reallyEnable); } -int NativeOps::getDeviceMajor(int device) { +int getDeviceMajor(int device) { return deviceProperties[device].major; } -int NativeOps::getDeviceMinor(int device) { +int getDeviceMinor(int device) { return deviceProperties[device].minor; } -const char * NativeOps::getDeviceName(int device) { +const char * getDeviceName(int device) { return deviceProperties[device].name; } @@ -1302,7 +1298,7 @@ specialBufferAndShapeWithOffset(void* vZ, Nd4jLong* hZShapeInfo, Nd4jLong* dZSha * Concatneate multi array of the same shape together * along a particular dimension */ -void NativeOps::concat( +void concat( Nd4jPointer *extraPointers, int dimension, int numArrays, @@ -1354,7 +1350,7 @@ void NativeOps::concat( // nd4j_printf(" done\n", ""); LaunchContext context(stream); // allocate and copy all buffers and shapes arrays to global memory - PointersManager manager(&context, "NativeOps::concat"); + PointersManager manager(&context, "concat"); void* dOutBuffers = manager.replicatePointer(hOutBuffers.data(), hOutBuffers.size() * sizeof(void*)); void* dInBuffers = manager.replicatePointer(hInBuffers.data(), hInBuffers.size() * sizeof(void*)); void* dInShapeInfo = manager.replicatePointer(hInShapeInfo.data(), hInShapeInfo.size() * sizeof(Nd4jLong*)); @@ -1377,7 +1373,7 @@ void NativeOps::concat( * Concatneate multi array of the same shape together * along a particular dimension */ -// void NativeOps::concat( +// void concat( // Nd4jPointer *extraPointers, // int dimension, // int numArrays, @@ -1481,7 +1477,7 @@ void NativeOps::concat( -void NativeOps::specialConcat( +void specialConcat( Nd4jPointer *extraPointers, int dimension, int numArrays, @@ -1497,13 +1493,13 @@ void NativeOps::specialConcat( /** * This method saves */ -nd4j::TadPack* NativeOps::tadOnlyShapeInfo(Nd4jLong *dXShapeInfo, int *dimension, int dimensionLength) { +nd4j::TadPack* tadOnlyShapeInfo(Nd4jLong *dXShapeInfo, int *dimension, int dimensionLength) { auto pack = new TadPack(); *pack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(dXShapeInfo, dimension, dimensionLength); return pack; } -int NativeOps::memcpyConstantAsync(Nd4jLong dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { +int memcpyConstantAsync(Nd4jLong dst, Nd4jPointer src, Nd4jLong size, int flags, Nd4jPointer reserved) { cudaStream_t *pStream = reinterpret_cast(reserved); cudaMemcpyKind kind; @@ -1536,7 +1532,7 @@ int NativeOps::memcpyConstantAsync(Nd4jLong dst, Nd4jPointer src, Nd4jLong size, return 1; } -Nd4jPointer NativeOps::getConstantSpace() { +Nd4jPointer getConstantSpace() { Nd4jPointer dConstAddr; cudaError_t dZ = cudaGetSymbolAddress(reinterpret_cast(&dConstAddr), deviceConstantMemory); @@ -1546,7 +1542,7 @@ Nd4jPointer NativeOps::getConstantSpace() { return dConstAddr; } -void NativeOps::pullRows(Nd4jPointer *extraPointers, +void pullRows(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *z, Nd4jLong *zShapeInfo, @@ -1567,7 +1563,7 @@ void NativeOps::pullRows(Nd4jPointer *extraPointers, } -void NativeOps::average(Nd4jPointer *extras, +void average(Nd4jPointer *extras, Nd4jPointer *x, Nd4jLong *xShapeInfo, Nd4jPointer *dx, Nd4jLong *dXShapeInfo, void *z, Nd4jLong *zShapeInfo, @@ -1596,7 +1592,7 @@ void NativeOps::average(Nd4jPointer *extras, } } -void NativeOps::accumulate(Nd4jPointer *extras, +void accumulate(Nd4jPointer *extras, Nd4jPointer *x, Nd4jLong *xShapeInfo, Nd4jPointer *dx, Nd4jLong *dXShapeInfo, void *z, Nd4jLong *zShapeInfo, @@ -1625,7 +1621,7 @@ void NativeOps::accumulate(Nd4jPointer *extras, } -void NativeOps::shuffle(Nd4jPointer *extras, +void shuffle(Nd4jPointer *extras, Nd4jPointer *x, Nd4jPointer *xShapeInfo, Nd4jPointer *dx, Nd4jPointer *dXShapeInfo, Nd4jPointer *z, Nd4jPointer *zShapeInfo, @@ -1652,7 +1648,7 @@ void NativeOps::shuffle(Nd4jPointer *extras, } /* -void NativeOps::execMetaPredicateShape(Nd4jPointer *extras, +void execMetaPredicateShape(Nd4jPointer *extras, const int opTypeA, const int opNumA, const int opTypeB, @@ -1678,16 +1674,16 @@ void NativeOps::execMetaPredicateShape(Nd4jPointer *extras, } */ -bool NativeOps::isExperimentalEnabled() { +bool isExperimentalEnabled() { return nd4j::Environment::getInstance()->isExperimentalBuild(); } -void NativeOps::setOmpMinThreads(int threads) { +void setOmpMinThreads(int threads) { minThreads = nd4j::math::nd4j_max(32, threads); minThreads = nd4j::math::nd4j_min(maxThreads, minThreads); } -int NativeOps::getDevice() { +int getDevice() { int curDevice = -1; cudaGetDevice(&curDevice); @@ -1695,16 +1691,16 @@ int NativeOps::getDevice() { return curDevice; } -void NativeOps::setElementThreshold(int num) { +void setElementThreshold(int num) { // this is no-op for CUDA } -void NativeOps::setTADThreshold(int num) { +void setTADThreshold(int num) { // this is no-op for CUDA } //////////////////////////////////////////////////////////////////////// -void NativeOps::execSummaryStats(Nd4jPointer *extraPointers, +void execSummaryStats(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1718,7 +1714,7 @@ void NativeOps::execSummaryStats(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execSummaryStats(Nd4jPointer *extraPointers, +void execSummaryStatsTad(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1736,7 +1732,7 @@ void NativeOps::execSummaryStats(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduce3(Nd4jPointer *extraPointers, +void execReduce3(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1751,7 +1747,7 @@ void NativeOps::execReduce3(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduce3(Nd4jPointer *extraPointers, +void execReduce3Tad(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1795,7 +1791,7 @@ void NativeOps::execReduce3(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, +void execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *extraParams, @@ -1809,7 +1805,7 @@ void NativeOps::execReduce3Scalar(Nd4jPointer *extraPointers,int opNum, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execScalarBool(Nd4jPointer *extraPointers, +void execScalarBool(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1824,7 +1820,7 @@ void NativeOps::execScalarBool(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execScalarBool(Nd4jPointer *extraPointers, +void execScalarBool(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1844,7 +1840,7 @@ void NativeOps::execScalarBool(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execScalar(Nd4jPointer *extraPointers, +void execScalar(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1859,7 +1855,7 @@ void NativeOps::execScalar(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execScalar(Nd4jPointer *extraPointers, +void execScalarTad(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -1881,8 +1877,8 @@ void NativeOps::execScalar(Nd4jPointer *extraPointers, auto yType = nd4j::ArrayOptions::dataType(hScalarShapeInfo); auto zType = nd4j::ArrayOptions::dataType(hZShapeInfo); - if (yType != xType && yType != nd4j::DataType::BOOL && !this->isExperimentalEnabled()) - throw nd4j::datatype_exception::build("NativeOps::execScalar both operands must have same data type", xType, yType); + if (yType != xType && yType != nd4j::DataType::BOOL && !isExperimentalEnabled()) + throw nd4j::datatype_exception::build("execScalar both operands must have same data type", xType, yType); dim3 launchDims(256, 256, 16384); @@ -1895,7 +1891,7 @@ void NativeOps::execScalar(Nd4jPointer *extraPointers, DEBUG_KERNEL(stream, opNum); } -void NativeOps::execAggregate(Nd4jPointer *extraPointers, +void execAggregate(Nd4jPointer *extraPointers, int opNum, void **arguments, int numArguments, @@ -1920,7 +1916,7 @@ void NativeOps::execAggregate(Nd4jPointer *extraPointers, nd4j::DebugHelper::checkErrorCode(stream, "execAggregateFloat(...) failed"); } -void NativeOps::execAggregateBatch(Nd4jPointer *extraPointers, +void execAggregateBatch(Nd4jPointer *extraPointers, int numAggregates, int opNum, int maxArgs, int maxShapes, int maxIntArrays, int maxIntArraySize, @@ -1940,7 +1936,7 @@ void NativeOps::execAggregateBatch(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execRandom(Nd4jPointer *extraPointers, +void execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, void *hZ, Nd4jLong *hZShapeInfo, @@ -1952,7 +1948,7 @@ void NativeOps::execRandom(Nd4jPointer *extraPointers, } //////////////////////////////////////////////////////////////////////// -void NativeOps::execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, +void execRandom2(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *hZ, Nd4jLong *hZShapeInfo, @@ -1964,7 +1960,7 @@ void NativeOps::execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer st } //////////////////////////////////////////////////////////////////////// -void NativeOps::execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, +void execRandom3(Nd4jPointer *extraPointers, int opNum, Nd4jPointer stateHost, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *hY, Nd4jLong *hYShapeInfo, @@ -1978,7 +1974,7 @@ void NativeOps::execRandom(Nd4jPointer *extraPointers, int opNum, Nd4jPointer st } -Nd4jPointer NativeOps::initRandom(Nd4jPointer *extraPointers, long seed, long bufferSize, Nd4jPointer ptrToBuffer) { +Nd4jPointer initRandom(Nd4jPointer *extraPointers, long seed, long bufferSize, Nd4jPointer ptrToBuffer) { unsigned long long *ptrHost = reinterpret_cast(extraPointers[0]); cudaStream_t *stream = reinterpret_cast(extraPointers[1]); @@ -2004,7 +2000,7 @@ Nd4jPointer NativeOps::initRandom(Nd4jPointer *extraPointers, long seed, long bu } -void NativeOps::destroyRandom(Nd4jPointer ptrBuffer) { +void destroyRandom(Nd4jPointer ptrBuffer) { nd4j::random::RandomBuffer *buffer = reinterpret_cast (ptrBuffer); @@ -2014,7 +2010,7 @@ void NativeOps::destroyRandom(Nd4jPointer ptrBuffer) { delete buffer; } -void NativeOps::refreshBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer ptrRandom) { +void refreshBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer ptrRandom) { nd4j::random::RandomBuffer *buffer = reinterpret_cast (ptrRandom); @@ -2037,7 +2033,7 @@ void NativeOps::refreshBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer cudaMemcpyAsync(ptrDev, ptrHost, buffer->getSize() * 8, cudaMemcpyHostToDevice, *stream); } -void NativeOps::reSeedBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer ptrRandom) { +void reSeedBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer ptrRandom) { nd4j::random::RandomBuffer *buffer = reinterpret_cast (ptrRandom); @@ -2058,7 +2054,7 @@ void NativeOps::reSeedBuffer(Nd4jPointer *extraPointers, long seed, Nd4jPointer * @param buffer the buffer pointer to check * @return */ -int NativeOps::lengthForShapeBufferPointer(Nd4jPointer buffer) { +int lengthForShapeBufferPointer(Nd4jPointer buffer) { auto shapeBuffer = reinterpret_cast(buffer); return shape::shapeInfoLength(shape::rank(shapeBuffer)); } @@ -2071,11 +2067,11 @@ int NativeOps::lengthForShapeBufferPointer(Nd4jPointer buffer) { * @return the pointer for the given address */ -Nd4jPointer NativeOps::pointerForAddress(Nd4jLong address) { +Nd4jPointer pointerForAddress(Nd4jLong address) { return reinterpret_cast(address); } -void NativeOps::tear(Nd4jPointer *extras, +void tear(Nd4jPointer *extras, void *x, Nd4jLong *xShapeInfo, void *dX, Nd4jLong *dXShapeInfo, Nd4jPointer *targets, @@ -2174,7 +2170,7 @@ void prescanArrayRecursive(Nd4jPointer *extras, int *dZ, int *dX, int numElement } -void NativeOps::encodeThresholdP1(Nd4jPointer *extras, void *dx, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { +void encodeThresholdP1(Nd4jPointer *extras, void *dx, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, float threshold) { cudaStream_t *stream = reinterpret_cast(extras[1]); @@ -2190,7 +2186,7 @@ void NativeOps::encodeThresholdP1(Nd4jPointer *extras, void *dx, Nd4jLong *hXSha -void NativeOps::encodeThresholdP2Int(Nd4jPointer *extraPointers, int *dx, Nd4jLong N, int *dz) { +void encodeThresholdP2Int(Nd4jPointer *extraPointers, int *dx, Nd4jLong N, int *dz) { cudaStream_t *stream = reinterpret_cast(extraPointers[1]); //encoderKernelP2Float<<>>(dx, N, dz); @@ -2198,7 +2194,7 @@ void NativeOps::encodeThresholdP2Int(Nd4jPointer *extraPointers, int *dx, Nd4jLo nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP2Int(...) failed"); } -void NativeOps::encodeThresholdP3(Nd4jPointer *extraPointers, void *dx, Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ +void encodeThresholdP3(Nd4jPointer *extraPointers, void *dx, Nd4jLong *hXShapeInfo, int *offsets, Nd4jLong N, int *dz){ cudaStream_t *stream = reinterpret_cast(extraPointers[1]); @@ -2212,7 +2208,7 @@ void NativeOps::encodeThresholdP3(Nd4jPointer *extraPointers, void *dx, Nd4jLong nd4j::DebugHelper::checkErrorCode(stream, "encodeThresholdP3Float(...) failed"); } -void NativeOps::decodeThreshold(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo){ +void decodeThreshold(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo){ cudaStream_t *stream = reinterpret_cast(extraPointers[1]); @@ -2228,7 +2224,7 @@ void NativeOps::decodeThreshold(Nd4jPointer *extraPointers, void *dx, Nd4jLong N } //////////////////////////////////////////////////////////////////////// -void NativeOps::execReduce3All(Nd4jPointer *extraPointers, +void execReduce3All(Nd4jPointer *extraPointers, int opNum, void *hX, Nd4jLong *hXShapeInfo, void *dX, Nd4jLong *dXShapeInfo, @@ -2249,7 +2245,7 @@ void NativeOps::execReduce3All(Nd4jPointer *extraPointers, } -void NativeOps::sort(Nd4jPointer *extraPointers, +void sort(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dX, Nd4jLong *dXShapeInfo, bool descending) { @@ -2307,7 +2303,7 @@ void NativeOps::sort(Nd4jPointer *extraPointers, } -void NativeOps::sortByKey(Nd4jPointer *extraPointers, +void sortByKey(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *y, Nd4jLong *yShapeInfo, @@ -2365,7 +2361,7 @@ void NativeOps::sortByKey(Nd4jPointer *extraPointers, } } -void NativeOps::sortByValue(Nd4jPointer *extraPointers, +void sortByValue(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *y, Nd4jLong *yShapeInfo, @@ -2424,7 +2420,7 @@ void NativeOps::sortByValue(Nd4jPointer *extraPointers, -void NativeOps::sortTadByKey(Nd4jPointer *extraPointers, +void sortTadByKey(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *y, Nd4jLong *yShapeInfo, @@ -2443,7 +2439,7 @@ void NativeOps::sortTadByKey(Nd4jPointer *extraPointers, nd4j::DebugHelper::checkErrorCode(stream, "sortTadKey(...) failed"); } -void NativeOps::sortTadByValue(Nd4jPointer *extraPointers, +void sortTadByValue(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dX, Nd4jLong *dXShapeInfo, void *y, Nd4jLong *yShapeInfo, @@ -2464,7 +2460,7 @@ void NativeOps::sortTadByValue(Nd4jPointer *extraPointers, } -void NativeOps::sortTad(Nd4jPointer *extraPointers, +void sortTad(Nd4jPointer *extraPointers, void *x, Nd4jLong *xShapeInfo, void *dX, Nd4jLong *dXShapeInfo, int *dimension, @@ -2483,12 +2479,12 @@ void NativeOps::sortTad(Nd4jPointer *extraPointers, nd4j::DebugHelper::checkErrorCode(stream, "sortTad(...) failed"); } -void NativeOps::sortCooIndices(Nd4jPointer *extraPointers, Nd4jLong *indices, void *values, Nd4jLong length, int rank) { +void sortCooIndices(Nd4jPointer *extraPointers, Nd4jLong *indices, void *values, Nd4jLong length, int rank) { throw std::runtime_error("sortCooIndices:: Not implemented yet"); } -Nd4jLong NativeOps::encodeBitmap(Nd4jPointer *extraPointers, +Nd4jLong encodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong *hXShapeInfo, Nd4jLong N, int *dz, @@ -2511,7 +2507,7 @@ Nd4jLong NativeOps::encodeBitmap(Nd4jPointer *extraPointers, } -void NativeOps::decodeBitmap(Nd4jPointer *extraPointers, +void decodeBitmap(Nd4jPointer *extraPointers, void *dx, Nd4jLong N, void *dz, Nd4jLong *zShapeInfo) { @@ -2524,21 +2520,21 @@ void NativeOps::decodeBitmap(Nd4jPointer *extraPointers, nd4j::DebugHelper::checkErrorCode(stream, "decodeBitmapFloat(...) failed"); } -Nd4jLong* NativeOps::mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length) { +Nd4jLong* mmapFile(Nd4jPointer *extraPointers, const char *fileName, Nd4jLong length) { return nullptr; } -void NativeOps::munmapFile(Nd4jPointer *extraPointers, Nd4jLong* ptrMap, Nd4jLong length) { +void munmapFile(Nd4jPointer *extraPointers, Nd4jLong* ptrMap, Nd4jLong length) { } -nd4j::graph::ResultWrapper* NativeOps::executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer) { +nd4j::graph::ResultWrapper* executeFlatGraph(Nd4jPointer *extraPointers, Nd4jPointer flatBufferPointer) { return nd4j::graph::GraphExecutioner::executeFlatBuffer(flatBufferPointer); } -const char* NativeOps::getAllCustomOps() { +const char* getAllCustomOps() { return nd4j::ops::OpRegistrator::getInstance()->getAllCustomOperations(); } @@ -2581,7 +2577,7 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D return shapeList; } -nd4j::ShapeList* NativeOps::calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs) { +nd4j::ShapeList* calculateOutputShapes2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool *bArgs, int numBArgs) { auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); return _calculateOutputShapes(extraPointers, op, inputBuffers, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs); @@ -2605,7 +2601,7 @@ nd4j::ShapeList* _calculateOutputShapes(Nd4jPointer* extraPointers, nd4j::ops::D return shapeList; } -nd4j::ShapeList* NativeOps::calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { +nd4j::ShapeList* calculateOutputShapes(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputShapes, int numInputShapes, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs) { auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); return _calculateOutputShapes(extraPointers, op, inputShapes, numInputShapes, tArgs, numTArgs, iArgs, numIArgs); @@ -2695,13 +2691,13 @@ static FORCEINLINE Nd4jStatus realExec(nd4j::ops::DeclarableOp* op, Nd4jPointer* } -int NativeOps::execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { +int execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer* inputBuffers, Nd4jPointer* inputShapes, int numInputs, Nd4jPointer* outputBuffers, Nd4jPointer* outputShapes, int numOutputs, double* tArgs, int numTArgs, Nd4jLong *iArgs, int numIArgs, bool* bArgs, int numBArgs, bool isInplace) { auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); return realExec(op, extraPointers, hash, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs, tArgs, numTArgs, iArgs, numIArgs, bArgs, numBArgs, isInplace); } -int NativeOps::execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) { +int execCustomOp2(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPointer opContext) { auto op = nd4j::ops::OpRegistrator::getInstance()->getOperation(hash); auto context = reinterpret_cast(opContext); @@ -2719,7 +2715,7 @@ int NativeOps::execCustomOp(Nd4jPointer* extraPointers, Nd4jLong hash, Nd4jPoint return result; } -int NativeOps::registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer) { +int registerGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer flatBufferPointer) { auto graph = nd4j::graph::GraphExecutioner::importFromFlatPointer(flatBufferPointer); @@ -2775,33 +2771,33 @@ static VariablesSet* executeStoredGraphT(Nd4jPointer *extraPointers, Nd4jLong gr return varSet; } -VariablesSet* NativeOps::executeStoredGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) { +VariablesSet* executeStoredGraph(Nd4jPointer *extraPointers, Nd4jLong graphId, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int* inputIndices, int numInputs) { return executeStoredGraphT(extraPointers, graphId, inputBuffers, inputShapes, inputIndices, numInputs); } -int NativeOps::unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId) { +int unregisterGraph(Nd4jPointer *extraPointers, Nd4jLong graphId) { nd4j::graph::GraphHolder::getInstance()->dropGraphAny(graphId); return ND4J_STATUS_OK; } -void NativeOps::deletePointerArray(Nd4jPointer pointer) { +void deletePointerArray(Nd4jPointer pointer) { Nd4jPointer *ptr = reinterpret_cast(pointer); delete[] ptr; } -void NativeOps::deleteCharArray(Nd4jPointer pointer) { +void deleteCharArray(Nd4jPointer pointer) { auto ptr = reinterpret_cast(pointer); delete[] ptr; } -void NativeOps::deleteIntArray(Nd4jPointer pointer) { +void deleteIntArray(Nd4jPointer pointer) { auto ptr = reinterpret_cast(pointer); delete[] ptr; } -void NativeOps::deleteLongArray(Nd4jPointer pointer) { +void deleteLongArray(Nd4jPointer pointer) { auto ptr = reinterpret_cast(pointer); delete[] ptr; } @@ -2812,27 +2808,27 @@ static void deleteVariablesSetT(Nd4jPointer pointer) { delete ptr; } -void NativeOps::deleteVariablesSet(Nd4jPointer pointer) { +void deleteVariablesSet(Nd4jPointer pointer) { deleteVariablesSetT(pointer); } -void NativeOps::deleteShapeList(Nd4jPointer shapeList) { +void deleteShapeList(Nd4jPointer shapeList) { nd4j::ShapeList* list = reinterpret_cast(shapeList); //list->destroy(); delete list; } -const char* NativeOps::getAllOperations() { +const char* getAllOperations() { return nd4j::OpTracker::getInstance()->exportOperations(); } -Nd4jPointer NativeOps::getGraphState(Nd4jLong id) { +Nd4jPointer getGraphState(Nd4jLong id) { return (Nd4jPointer) new nd4j::graph::GraphState(id); } -void NativeOps::deleteGraphState(Nd4jPointer state) { +void deleteGraphState(Nd4jPointer state) { auto stateP = reinterpret_cast(state); delete stateP; } @@ -2903,18 +2899,18 @@ Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, nd4j::graph::GraphS } -Nd4jStatus NativeOps::execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs) { +Nd4jStatus execCustomOpWithScope(Nd4jPointer *extraPointers, Nd4jPointer state, Nd4jLong opHash, Nd4jLong *scopes, int numScopes, Nd4jPointer *inputBuffers, Nd4jPointer *inputShapes, int numInputs, Nd4jPointer *outputBuffers, Nd4jPointer *outputShapes, int numOutputs) { return execCustomOpWithScope(extraPointers, reinterpret_cast(state), opHash, scopes, numScopes, inputBuffers, inputShapes, numInputs, outputBuffers, outputShapes, numOutputs); } -void NativeOps::deleteResultWrapper(Nd4jPointer ptr) { +void deleteResultWrapper(Nd4jPointer ptr) { // just 0 room for compiler s@!t auto p = reinterpret_cast(ptr); delete p; } -int NativeOps::estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer dX, Nd4jLong *dXShapeInfo, int N, float threshold) { +int estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer dX, Nd4jLong *dXShapeInfo, int N, float threshold) { throw std::runtime_error("estimateThreshold: Not implemented yet"); } @@ -2922,7 +2918,7 @@ int NativeOps::estimateThreshold(Nd4jPointer *extraPointers, Nd4jPointer dX, Nd4 * TypeDef: * void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer dX, long N, int dstType, Nd4jPointer dZ); */ -void NativeOps::convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer dX, Nd4jLong N, int dstType, Nd4jPointer dZ) { +void convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer dX, Nd4jLong N, int dstType, Nd4jPointer dZ) { auto dx = reinterpret_cast(dX); auto dz = reinterpret_cast(dZ); @@ -3101,12 +3097,12 @@ void NativeOps::convertTypes(Nd4jPointer *extras, int srcType, Nd4jPointer dX, N } } -Nd4jPointer NativeOps::createUtf8String(Nd4jPointer *extraPointers, const char *string, int length) { +Nd4jPointer createUtf8String(Nd4jPointer *extraPointers, const char *string, int length) { auto u = new nd4j::utf8string(string, length); return reinterpret_cast(u); } -void NativeOps::deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr) { +void deleteUtf8String(Nd4jPointer *extraPointers, Nd4jPointer ptr) { delete(reinterpret_cast(ptr)); } @@ -3183,7 +3179,7 @@ __host__ static void scatterUpdateCudaLauncher(const cudaStream_t* stream, const ////////////////////////////////////////////////////////////////////////// -void NativeOps::scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, +void scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfSubArrs, void* hX, Nd4jLong* hXShapeInfo, Nd4jLong* hXOffsets, void* dX, Nd4jLong* dXShapeInfo, Nd4jLong* dXOffsets, void* hY, Nd4jLong* hYShapeInfo, Nd4jLong* hYOffsets, @@ -3198,7 +3194,7 @@ void NativeOps::scatterUpdate(Nd4jPointer *extraPointers, int opCode, int numOfS nd4j::DebugHelper::checkErrorCode(stream, "scatterUpdate(...) failed"); } -void NativeOps::inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo) { +void inspectArray(Nd4jPointer *extraPointers, Nd4jPointer buffer, Nd4jLong *shapeInfo, Nd4jPointer specialBuffer, Nd4jLong *specialShapeInfo, Nd4jPointer debugInfo) { LaunchContext lc(extraPointers[1], extraPointers[4], extraPointers[5], extraPointers[3]); auto p = reinterpret_cast(debugInfo); NDArray array(buffer, specialBuffer, shapeInfo, &lc); @@ -3218,7 +3214,7 @@ void __global__ tryPointerKernel(void* p, int len) { printf("Pointer check complete: %i\n", b); } -void NativeOps::tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { +void tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { cudaStream_t stream; cudaStreamCreate(&stream); @@ -3232,38 +3228,38 @@ void NativeOps::tryPointer(Nd4jPointer extra, Nd4jPointer p, int len) { cudaStreamDestroy(stream); } -int NativeOps::dataTypeFromNpyHeader(void *header) { +int dataTypeFromNpyHeader(void *header) { return (int) cnpy::dataTypeFromHeader(reinterpret_cast(header)); } -nd4j::ConstantDataBuffer* NativeOps::shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty) { +nd4j::ConstantDataBuffer* shapeBuffer(int rank, Nd4jLong *shape, Nd4jLong *strides, nd4j::DataType dtype, char order, Nd4jLong ews, bool empty) { auto buffer = new ConstantDataBuffer(); *buffer = nd4j::ConstantShapeHelper::getInstance()->bufferForShapeInfo(ShapeDescriptor(dtype, order, shape, strides, rank, ews, empty)); return buffer; } -void NativeOps::deleteShapeBuffer(Nd4jPointer ptr) { +void deleteShapeBuffer(Nd4jPointer ptr) { auto buffer = reinterpret_cast(ptr); delete buffer; } -void NativeOps::deleteTadPack(Nd4jPointer ptr) { +void deleteTadPack(Nd4jPointer ptr) { auto buffer = reinterpret_cast(ptr); delete buffer; } -nd4j::ConstantDataBuffer* NativeOps::constantBuffer(nd4j::DataType dtype, Nd4jLong *data, int length) { +nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, Nd4jLong *data, int length) { return nd4j::ConstantHelper::getInstance()->constantBuffer(ConstantDescriptor(data, length), dtype); } -nd4j::ConstantDataBuffer* NativeOps::constantBuffer(nd4j::DataType dtype, double *data, int length) { +nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, double *data, int length) { return nd4j::ConstantHelper::getInstance()->constantBuffer(ConstantDescriptor(data, length), dtype); } -nd4j::ConstantDataBuffer* NativeOps::constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor) { +nd4j::ConstantDataBuffer* constantBuffer(nd4j::DataType dtype, nd4j::ConstantDescriptor *descriptor) { return nd4j::ConstantHelper::getInstance()->constantBuffer(*descriptor, dtype); } -Nd4jPointer NativeOps::shapeBufferForNumpy(Nd4jPointer npyArray) { +Nd4jPointer shapeBufferForNumpy(Nd4jPointer npyArray) { cnpy::NpyArray arr = cnpy::loadNpyFromPointer(reinterpret_cast(npyArray)); unsigned int shapeSize = arr.shape.size(); std::vector shape(shapeSize); @@ -3292,7 +3288,7 @@ Nd4jPointer NativeOps::shapeBufferForNumpy(Nd4jPointer npyArray) { return reinterpret_cast(nd4j::ConstantShapeHelper::getInstance()->createFromExisting(shapeBuffer, true)); } -const char* NativeOps::runLightBenchmarkSuit(bool printOut) { +const char* runLightBenchmarkSuit(bool printOut) { nd4j::LightBenchmarkSuit suit; auto result = suit.runSuit(); @@ -3306,7 +3302,7 @@ const char* NativeOps::runLightBenchmarkSuit(bool printOut) { return chars; } -const char* NativeOps::runFullBenchmarkSuit(bool printOut) { +const char* runFullBenchmarkSuit(bool printOut) { nd4j::FullBenchmarkSuit suit; auto result = suit.runSuit(); @@ -3320,6 +3316,6 @@ const char* NativeOps::runFullBenchmarkSuit(bool printOut) { return chars; } -Nd4jLong NativeOps::getCachedMemory(int deviceId) { +Nd4jLong getCachedMemory(int deviceId) { return nd4j::ConstantHelper::getInstance()->getCachedAmount(deviceId); -} \ No newline at end of file +} diff --git a/libnd4j/include/helpers/impl/ProviderRNG.cpp b/libnd4j/include/helpers/impl/ProviderRNG.cpp index 01a45984a..216aa3a32 100644 --- a/libnd4j/include/helpers/impl/ProviderRNG.cpp +++ b/libnd4j/include/helpers/impl/ProviderRNG.cpp @@ -27,11 +27,10 @@ namespace nd4j { ProviderRNG::ProviderRNG() { Nd4jLong *buffer = new Nd4jLong[100000]; - NativeOps nativeOps; std::lock_guard lock(_mutex); #ifndef __CUDABLAS__ // at this moment we don't have streams etc, so let's just skip this for now - _rng = (nd4j::random::RandomBuffer *) nativeOps.initRandom(nullptr, 123, 100000, (Nd4jPointer) buffer); + _rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, 123, 100000, (Nd4jPointer) buffer); #endif // if(_rng != nullptr) } @@ -49,4 +48,4 @@ random::RandomBuffer* ProviderRNG::getRNG() const { std::mutex ProviderRNG::_mutex; -} \ No newline at end of file +} diff --git a/libnd4j/include/ops/declarable/generic/random/set_seed.cpp b/libnd4j/include/ops/declarable/generic/random/set_seed.cpp index 68768da92..b42c7c763 100644 --- a/libnd4j/include/ops/declarable/generic/random/set_seed.cpp +++ b/libnd4j/include/ops/declarable/generic/random/set_seed.cpp @@ -41,8 +41,7 @@ namespace nd4j { } // FIXME: this approach isn't really good for cuda, since it'll assume that CUDA might get nullptr instead of stream - NativeOps nativeOps; - nativeOps.refreshBuffer(nullptr, seed, (Nd4jPointer) rng); + refreshBuffer(nullptr, seed, (Nd4jPointer) rng); return Status::OK(); } @@ -60,4 +59,4 @@ namespace nd4j { } } -#endif \ No newline at end of file +#endif diff --git a/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu b/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu index 2faf44106..cd6887bf0 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/image_suppression.cu @@ -110,11 +110,9 @@ namespace helpers { indices->syncToDevice(); // linspace only on CPU, so sync to Device as well NDArray scores(*scales); - NativeOps nativeOps; - Nd4jPointer extras[2] = {nullptr, stream}; - nativeOps.sortByValue(extras, indices->buffer(), indices->shapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), scores.buffer(), scores.shapeInfo(), scores.specialBuffer(), scores.specialShapeInfo(), true); + sortByValue(extras, indices->buffer(), indices->shapeInfo(), indices->specialBuffer(), indices->specialShapeInfo(), scores.buffer(), scores.shapeInfo(), scores.specialBuffer(), scores.specialShapeInfo(), true); // TO DO: sort indices using scales as value row //std::sort(indices.begin(), indices.end(), [scales](int i, int j) {return scales->e(i) > scales->e(j);}); I* indexBuf = reinterpret_cast(indices->specialBuffer()); @@ -169,4 +167,4 @@ namespace helpers { } } -} \ No newline at end of file +} diff --git a/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu b/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu index c1c969c6f..80662a19b 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/nth_element.cu @@ -60,8 +60,7 @@ namespace helpers { params[1] = context->getCudaStream(); if (input->isVector()) { - NativeOps ops; - ops.sort(params, nullptr, sortedVals.shapeInfo(), sortedVals.specialBuffer(), sortedVals.specialShapeInfo(), reverse); + sort(params, nullptr, sortedVals.shapeInfo(), sortedVals.specialBuffer(), sortedVals.specialShapeInfo(), reverse); cudaMemcpy(reinterpret_cast(output->specialBuffer()), reinterpret_cast(sortedVals.specialBuffer()) + n, sizeof(T), cudaMemcpyDeviceToDevice); } @@ -74,8 +73,7 @@ namespace helpers { auto pTadShapeH = packX.primaryShapeInfo(); auto pTadOffsets = packX.specialOffsets(); // auto pLastDimData = (int*) manager.replicatePointer(lastDims.data(), lastDims.size() * sizeof(int)); - NativeOps ops; - ops.sortTad(params, sortedVals.buffer(), sortedVals.shapeInfo(), sortedVals.specialBuffer(), sortedVals.specialShapeInfo(), lastDims.data(), lastDims.size(), pTadShape, pTadOffsets, reverse); + sortTad(params, sortedVals.buffer(), sortedVals.shapeInfo(), sortedVals.specialBuffer(), sortedVals.specialShapeInfo(), lastDims.data(), lastDims.size(), pTadShape, pTadOffsets, reverse); // manager.synchronize(); sortedVals.tickWriteDevice(); sortedVals.syncToHost(); diff --git a/libnd4j/tests_cpu/layers_tests/CnpyTests.cpp b/libnd4j/tests_cpu/layers_tests/CnpyTests.cpp index 82a83a550..086da26c5 100644 --- a/libnd4j/tests_cpu/layers_tests/CnpyTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/CnpyTests.cpp @@ -38,32 +38,28 @@ TEST_F(HeaderTest, test_dataTypes_1) { std::string header("0NUMPY6789{'descr': '>f4"); - NativeOps nativeOps; - ASSERT_EQ(nd4j::DataType::FLOAT32, nativeOps.dataTypeFromNpyHeader(const_cast(header.data()))); + ASSERT_EQ(nd4j::DataType::FLOAT32, dataTypeFromNpyHeader(const_cast(header.data()))); } TEST_F(HeaderTest, test_dataTypes_2) { std::string header("0NUMPY6789{'descr': '>f8"); - NativeOps nativeOps; - ASSERT_EQ(nd4j::DataType::DOUBLE, nativeOps.dataTypeFromNpyHeader(const_cast(header.data()))); + ASSERT_EQ(nd4j::DataType::DOUBLE, dataTypeFromNpyHeader(const_cast(header.data()))); } TEST_F(HeaderTest, test_dataTypes_3) { std::string header("0NUMPY6789{'descr': '(header.data()))); + ASSERT_EQ(nd4j::DataType::INT32, dataTypeFromNpyHeader(const_cast(header.data()))); } TEST_F(HeaderTest, test_dataTypes_4) { std::string header("0NUMPY6789{'descr': '>u2"); - NativeOps nativeOps; - ASSERT_EQ(nd4j::DataType::UINT16, nativeOps.dataTypeFromNpyHeader(const_cast(header.data()))); + ASSERT_EQ(nd4j::DataType::UINT16, dataTypeFromNpyHeader(const_cast(header.data()))); } /* @@ -88,12 +84,11 @@ TEST_F(LoadFromStringTest,PathTest) { ASSERT_EQ(4.0,data[3]); Nd4jPointer pointer = reinterpret_cast(&loadedArr); int *shapeBuffer = shape::shapeBufferOfNpy(loadedArr); - NativeOps nativeOps; - Nd4jPointer pointer1 = nativeOps.dataPointForNumpy(loaded); + Nd4jPointer pointer1 = dataPointForNumpy(loaded); delete[] shapeBuffer; double *data2 = reinterpret_cast(pointer1); delete[] loaded; } -*/ \ No newline at end of file +*/ diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp index e4aa5a9e0..bbc69a449 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests1.cpp @@ -472,9 +472,7 @@ TEST_F(DeclarableOpsTests1, TestRng1) { /* Nd4jLong *buffer = new Nd4jLong[100000]; - NativeOps nativeOps; - - nd4j::random::RandomBuffer *rng = (nd4j::random::RandomBuffer *) nativeOps.initRandom(nullptr, 123, 100000, (Nd4jPointer) buffer); + nd4j::random::RandomBuffer *rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, 123, 100000, (Nd4jPointer) buffer); if (rng == nullptr) throw std::runtime_error("RNG initialization failed"); @@ -496,7 +494,7 @@ TEST_F(DeclarableOpsTests1, TestRng1) { ASSERT_TRUE(x->sumNumber() > 0.0); - nativeOps.destroyRandom((Nd4jPointer) rng); + destroyRandom((Nd4jPointer) rng); delete[] buffer; delete variableSpace; @@ -1450,8 +1448,6 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { // ////////////////////////////////////////////////////////////////////// // TEST_F(DeclarableOpsTests1, TestLegacyExecution1) { -// NativeOps nativeOps; - // auto x = NDArrayFactory::create_('c', {10, 10}); // x->assign(1.0f); @@ -1483,8 +1479,8 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { // outputShapes[0] = (Nd4jPointer) z->getShapeInfo(); -// //auto status = nativeOps.execCustomOp(nullptr, hash, inputBuffers, inputShapes, 2, outputBuffers, outputShapes, 1, nullptr, 0, nullptr, 0, false); -// auto status = nativeOps.execCustomOp(nullptr, hash, inputBuffers, inputShapes, 2, outputBuffers, outputShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); +// //auto status = execCustomOp(nullptr, hash, inputBuffers, inputShapes, 2, outputBuffers, outputShapes, 1, nullptr, 0, nullptr, 0, false); +// auto status = execCustomOp(nullptr, hash, inputBuffers, inputShapes, 2, outputBuffers, outputShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); // ASSERT_EQ(ND4J_STATUS_OK, status); // // z->printIndexedBuffer("Output add"); // ASSERT_NEAR(2.0f, y->meanNumber().e(0), 1e-5); @@ -1503,8 +1499,6 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { // ////////////////////////////////////////////////////////////////////// // TEST_F(DeclarableOpsTests1, TestLegacyExecution2) { -// NativeOps nativeOps; - // auto x = NDArrayFactory::create_('c', {10, 10}); // x->assign(1.0f); @@ -1532,7 +1526,7 @@ TEST_F(DeclarableOpsTests1, TestRegistrator1) { // auto outputBuffers = new Nd4jPointer[1]; // auto outputShapes = new Nd4jPointer[1]; -// nativeOps.execCustomOp(nullptr, hash, inputBuffers, inputShapes, 2, outputBuffers, outputShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, true); +// execCustomOp(nullptr, hash, inputBuffers, inputShapes, 2, outputBuffers, outputShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, true); // ASSERT_NEAR(2.0, y->meanNumber().e(0), 1e-5); // ASSERT_NEAR(3.0, x->meanNumber().e(0), 1e-5); diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp index f9f525199..b4ffc0844 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests12.cpp @@ -876,14 +876,13 @@ TEST_F(DeclarableOpsTests12, pullRows_1) { auto xTadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), dims); auto zTadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.getShapeInfo(), dims); - NativeOps op; Nd4jPointer nativeStart[2]; #ifdef __CUDABLAS__ nativeStart[1] = *(x.getContext()->getCudaStream()); #endif - op.pullRows(nativeStart, x.buffer(), x.getShapeInfo(), x.getSpecialBuffer(), x.getSpecialShapeInfo(), + pullRows(nativeStart, x.buffer(), x.getShapeInfo(), x.getSpecialBuffer(), x.getSpecialShapeInfo(), z.buffer(), z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), 4, pidx, xTadPack.platformShapeInfo(), xTadPack.platformOffsets(), @@ -912,12 +911,11 @@ TEST_F(DeclarableOpsTests12, pullRows_2) { auto xTadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.getShapeInfo(), dims); auto zTadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(z.getShapeInfo(), dims); - NativeOps op; Nd4jPointer nativeStart[2]; #ifdef __CUDABLAS__ nativeStart[1] = *(x.getContext()->getCudaStream()); #endif - op.pullRows(nativeStart, x.buffer(), x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + pullRows(nativeStart, x.buffer(), x.getShapeInfo(), x.specialBuffer(), x.specialShapeInfo(), z.buffer(), z.getShapeInfo(), z.specialBuffer(), z.specialShapeInfo(), 4, pidx, xTadPack.platformShapeInfo(), xTadPack.platformOffsets(), diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp index 8d97c660d..591ccf558 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests9.cpp @@ -110,8 +110,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistributionInv_test1) { double extraParams[] = {lambda}; Nd4jLong *buffer = new Nd4jLong[N]; - NativeOps nativeOps; - auto rng = (nd4j::random::RandomBuffer *) nativeOps.initRandom(nullptr, 123, N, (Nd4jPointer) buffer); + auto rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, 123, N, (Nd4jPointer) buffer); if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistributionInv_test1: RNG initialization failed !"); @@ -122,7 +121,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistributionInv_test1) { ASSERT_NEAR(mean, actualMean, 0.01); ASSERT_NEAR(std, actualStd, 0.01); - nativeOps.destroyRandom((Nd4jPointer) rng); + destroyRandom((Nd4jPointer) rng); delete[] buffer; } @@ -142,8 +141,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistributionInv_test2) { Nd4jLong *buffer = new Nd4jLong[N]; - NativeOps nativeOps; - auto rng = (nd4j::random::RandomBuffer *) nativeOps.initRandom(nullptr, 123, N, (Nd4jPointer) buffer); + auto rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, 123, N, (Nd4jPointer) buffer); if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistributionInv_test2: RNG initialization failed !"); @@ -155,7 +153,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistributionInv_test2) { ASSERT_NEAR(mean, actualMean, 0.01); ASSERT_NEAR(std, actualStd, 0.01); - nativeOps.destroyRandom((Nd4jPointer) rng); + destroyRandom((Nd4jPointer) rng); delete[] buffer; } @@ -172,8 +170,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistribution_test1) { double extraParams[] = {lambda}; Nd4jLong *buffer = new Nd4jLong[N]; - NativeOps nativeOps; - auto rng = (nd4j::random::RandomBuffer *) nativeOps.initRandom(nullptr, 123, N, (Nd4jPointer) buffer); + auto rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, 123, N, (Nd4jPointer) buffer); if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistribution_test1: RNG initialization failed !"); @@ -184,7 +181,7 @@ TEST_F(DeclarableOpsTests9, exponentialDistribution_test1) { ASSERT_NEAR(mean, actualMean, 0.01); ASSERT_NEAR(std, actualStd, 0.01); - nativeOps.destroyRandom((Nd4jPointer) rng); + destroyRandom((Nd4jPointer) rng); delete[] buffer; } */ @@ -206,14 +203,13 @@ TEST_F(DeclarableOpsTests9, exponentialDistribution_test2) { Nd4jLong *buffer = new Nd4jLong[N]; // Nd4jPointer extra[2]; #ifndef __CUDABLAS__ - NativeOps nativeOps; - nd4j::random::RandomBuffer* rng = (nd4j::random::RandomBuffer *) nativeOps.initRandom(nullptr, 123, N, (Nd4jPointer) buffer); + nd4j::random::RandomBuffer* rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, 123, N, (Nd4jPointer) buffer); if (rng == nullptr) throw std::runtime_error("DeclarableOpsTests9.exponentialDistribution_test2: RNG initialization failed !"); functions::random::RandomFunction::template execTransform>(rng, y.getBuffer(), y.getShapeInfo(), x.getBuffer(), x.getShapeInfo(), extraParams); - nativeOps.destroyRandom((Nd4jPointer) rng); + destroyRandom((Nd4jPointer) rng); #endif const double actualMean = x.meanNumber().e(0); const double actualStd = x.varianceNumber(variance::SummaryStatsStandardDeviation, true).e(0); @@ -1005,12 +1001,10 @@ TEST_F(DeclarableOpsTests9, Test_DropoutInverted_01) { x0.linspace(1); x1.linspace(1); /* - NativeOps nativeOps; - float prob[] = {0.5f}; Nd4jLong* _bufferA = new Nd4jLong[100000]; long _seed = 119L; - auto _rngA = (nd4j::random::RandomBuffer *) nativeOps.initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferA); + auto _rngA = (nd4j::random::RandomBuffer *) initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferA); x0. applyTransform(random::DropOutInverted, &x0, prob); // x1.template applyRandom>(_rngB, nullptr, &x1, prob); @@ -1026,7 +1020,7 @@ TEST_F(DeclarableOpsTests9, Test_DropoutInverted_01) { // ASSERT_FALSE(x0.equalsTo(nexp0)); // ASSERT_FALSE(x0.equalsTo(nexp1)); // ASSERT_FALSE(x0.equalsTo(nexp2)); - nativeOps.destroyRandom(_rngA); + destroyRandom(_rngA); delete [] _bufferA; */ nd4j::ops::dropout op; @@ -2911,4 +2905,4 @@ TEST_F(DeclarableOpsTests9, Cholesky_Test_3) { // ASSERT_TRUE(isGradCorrect); // } -// \ No newline at end of file +// diff --git a/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp b/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp index 714707076..81e2acd85 100644 --- a/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/GraphStateTests.cpp @@ -51,9 +51,7 @@ public: */ TEST_F(GraphStateTests, Basic_Tests_1) { - NativeOps nativeOps; - - auto state = (GraphState *) nativeOps.getGraphState(117L); + auto state = (GraphState *) getGraphState(117L); ASSERT_EQ(117L, state->id()); // this call will create scope internally @@ -72,14 +70,12 @@ TEST_F(GraphStateTests, Basic_Tests_1) { ASSERT_TRUE(scope != nullptr); ASSERT_EQ(2, scope->size()); - nativeOps.deleteGraphState(state); + deleteGraphState(state); } // just separate case for doubles wrapper in NativeOps, nothing else TEST_F(GraphStateTests, Basic_Tests_2) { - NativeOps nativeOps; - - auto state = (GraphState *) nativeOps.getGraphState(117L); + auto state = (GraphState *) getGraphState(117L); ASSERT_EQ(117L, state->id()); // this call will create scope internally @@ -98,46 +94,40 @@ TEST_F(GraphStateTests, Basic_Tests_2) { ASSERT_TRUE(scope != nullptr); ASSERT_EQ(2, scope->size()); - nativeOps.deleteGraphState(state); + deleteGraphState(state); } TEST_F(GraphStateTests, Stateful_Execution_1) { - NativeOps nativeOps; - - auto state = nativeOps.getGraphState(117L); + auto state = getGraphState(117L); Nd4jLong scopes[] = {22, 33}; - //auto status = nativeOps.execCustomOpWithScope(nullptr, state, 10, scopes, 2, nullptr, nullptr, 0, nullptr, nullptr, 0); - auto status = nativeOps.execCustomOpWithScope(nullptr, state, 10, scopes, 2, nullptr, nullptr, 0, nullptr, nullptr, 0); + //auto status = execCustomOpWithScope(nullptr, state, 10, scopes, 2, nullptr, nullptr, 0, nullptr, nullptr, 0); + auto status = execCustomOpWithScope(nullptr, state, 10, scopes, 2, nullptr, nullptr, 0, nullptr, nullptr, 0); ASSERT_EQ(Status::THROW(), status); - nativeOps.deleteGraphState(state); + deleteGraphState(state); } TEST_F(GraphStateTests, Stateful_Execution_2) { - NativeOps nativeOps; - - auto state = (GraphState *) nativeOps.getGraphState(117L); + auto state = (GraphState *) getGraphState(117L); state->registerScope(22); state->registerScope(33); Nd4jLong scopes[] = {22, 33}; - auto status = nativeOps.execCustomOpWithScope(nullptr, state, 10, scopes, 2, nullptr, nullptr, 0, nullptr, nullptr, 0); + auto status = execCustomOpWithScope(nullptr, state, 10, scopes, 2, nullptr, nullptr, 0, nullptr, nullptr, 0); // it's no-op: just LogicScope ASSERT_EQ(Status::OK(), status); - nativeOps.deleteGraphState(state); + deleteGraphState(state); } /** * This test checks WHILE loop */ TEST_F(GraphStateTests, Stateful_Execution_3) { - NativeOps nativeOps; - auto var0 = NDArrayFactory::create('c', {2, 2}, {1, 2, 3, 4}); auto var1 = NDArrayFactory::create(11.0f); auto var2 = NDArrayFactory::create(2.0f); @@ -147,7 +137,7 @@ TEST_F(GraphStateTests, Stateful_Execution_3) { auto res2 = NDArrayFactory::create(0.0f); // registering our GraphState holder - auto state = (GraphState *) nativeOps.getGraphState(117L); + auto state = (GraphState *) getGraphState(117L); // we're prepping pointers to input/output buffers Nd4jPointer ptrBuffers[] = {(Nd4jPointer) var0.buffer(), (Nd4jPointer) var1.buffer(), (Nd4jPointer)var2.buffer()}; @@ -197,7 +187,7 @@ TEST_F(GraphStateTests, Stateful_Execution_3) { Nd4jLong scopes[] = {22, 33}; // we're executing while loop - auto status = nativeOps.execCustomOpWithScope(nullptr, state, 0, scopes, 2, ptrBuffers, ptrShapes, 3, outBuffers, outShapes, 3); + auto status = execCustomOpWithScope(nullptr, state, 0, scopes, 2, ptrBuffers, ptrShapes, 3, outBuffers, outShapes, 3); ASSERT_EQ(Status::OK(), status); // now we check provided result array @@ -211,7 +201,7 @@ TEST_F(GraphStateTests, Stateful_Execution_3) { // nd4j_printf("0 ------------------\n",""); - nativeOps.deleteGraphState(state); + deleteGraphState(state); // nd4j_printf("1 ------------------\n",""); } @@ -220,8 +210,6 @@ TEST_F(GraphStateTests, Stateful_Execution_3) { * This test checks CONDITIONAL execution for FALSE */ TEST_F(GraphStateTests, Stateful_Execution_4) { - NativeOps nativeOps; - auto var0 = NDArrayFactory::create('c', {2, 2}, {1, 2, 3, 4}); auto var1 = NDArrayFactory::create(5.0f); @@ -232,7 +220,7 @@ TEST_F(GraphStateTests, Stateful_Execution_4) { // registering our GraphState holder - auto state = (GraphState *) nativeOps.getGraphState(117L); + auto state = (GraphState *) getGraphState(117L); // we're prepping pointers to input/output buffers Nd4jPointer ptrBuffers[] = {(Nd4jPointer) var0.buffer(), (Nd4jPointer) var1.buffer()}; @@ -283,14 +271,14 @@ TEST_F(GraphStateTests, Stateful_Execution_4) { Nd4jLong scopes[] = {22, 33, 44}; // we're executing conditional op - auto status = nativeOps.execCustomOpWithScope(nullptr, state, 20, scopes, 3, ptrBuffers, ptrShapes, 2, outBuffers, outShapes, 2); + auto status = execCustomOpWithScope(nullptr, state, 20, scopes, 3, ptrBuffers, ptrShapes, 2, outBuffers, outShapes, 2); ASSERT_EQ(Status::OK(), status); ASSERT_TRUE(exp.isSameShape(&res0)); ASSERT_TRUE(exp.equalsTo(&res0)); - nativeOps.deleteGraphState(state); + deleteGraphState(state); } @@ -298,8 +286,6 @@ TEST_F(GraphStateTests, Stateful_Execution_4) { * This test checks CONDITIONAL execution for TRUE */ TEST_F(GraphStateTests, Stateful_Execution_5) { - NativeOps nativeOps; - auto var0 = NDArrayFactory::create('c', {2, 2}, {1, 2, 3, 4}); auto var1 = NDArrayFactory::create(5.0f); @@ -310,7 +296,7 @@ TEST_F(GraphStateTests, Stateful_Execution_5) { // registering our GraphState holder - auto state = (GraphState *) nativeOps.getGraphState(117L); + auto state = (GraphState *) getGraphState(117L); // we're prepping pointers to input/output buffers Nd4jPointer ptrBuffers[] = {(Nd4jPointer) var0.buffer(), (Nd4jPointer) var1.buffer()}; @@ -361,12 +347,11 @@ TEST_F(GraphStateTests, Stateful_Execution_5) { Nd4jLong scopes[] = {22, 33, 44}; // we're executing conditional op - auto status = nativeOps.execCustomOpWithScope(nullptr, state, 20, scopes, 3, ptrBuffers, ptrShapes, 2, outBuffers, outShapes, 2); + auto status = execCustomOpWithScope(nullptr, state, 20, scopes, 3, ptrBuffers, ptrShapes, 2, outBuffers, outShapes, 2); ASSERT_EQ(Status::OK(), status); ASSERT_TRUE(exp.isSameShape(&res0)); ASSERT_TRUE(exp.equalsTo(&res0)); - - nativeOps.deleteGraphState(state); + deleteGraphState(state); } diff --git a/libnd4j/tests_cpu/layers_tests/JavaInteropCudaTests.cu b/libnd4j/tests_cpu/layers_tests/JavaInteropCudaTests.cu index 19d5165b8..294e03c12 100644 --- a/libnd4j/tests_cpu/layers_tests/JavaInteropCudaTests.cu +++ b/libnd4j/tests_cpu/layers_tests/JavaInteropCudaTests.cu @@ -42,7 +42,6 @@ TEST_F(JavaInteropCudaTests, test_DeclarableOp_execution_1) { e.assign(2.f); nd4j::ops::add op; - NativeOps nativeOps; Context context(1); context.setCudaContext(LaunchContext::defaultContext()->getCudaStream(), LaunchContext::defaultContext()->getReductionPointer(), LaunchContext::defaultContext()->getAllocationPointer()); @@ -53,7 +52,7 @@ TEST_F(JavaInteropCudaTests, test_DeclarableOp_execution_1) { nd4j_printf("Starting execution...\n",""); PointersManager pm(LaunchContext::defaultContext(), "test_DeclarableOp_execution_1"); - nativeOps.execCustomOp(nullptr, op.getOpHash(), &context); + execCustomOp2(nullptr, op.getOpHash(), &context); pm.synchronize(); @@ -71,7 +70,6 @@ TEST_F(JavaInteropCudaTests, test_DeclarableOp_execution_2) { e.assign(false); nd4j::ops::equals op; - NativeOps nativeOps; Context context(1); context.setCudaContext(LaunchContext::defaultContext()->getCudaStream(), LaunchContext::defaultContext()->getReductionPointer(), LaunchContext::defaultContext()->getAllocationPointer()); @@ -82,7 +80,7 @@ TEST_F(JavaInteropCudaTests, test_DeclarableOp_execution_2) { nd4j_printf("Starting execution...\n",""); PointersManager pm(LaunchContext::defaultContext(), "test_DeclarableOp_execution_2"); - nativeOps.execCustomOp(nullptr, op.getOpHash(), &context); + execCustomOp2(nullptr, op.getOpHash(), &context); pm.synchronize(); diff --git a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp index 0bb5a5b01..02eae35d8 100644 --- a/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/JavaInteropTests.cpp @@ -41,8 +41,6 @@ TEST_F(JavaInteropTests, TestShapeExposure1) { auto weights = NDArrayFactory::create('c', {2, 2, 2, 3}); auto exp = NDArrayFactory::create('c', {1, 3, 5, 4}); - NativeOps nativeOps; - nd4j::ops::conv2d op; std::vector tArgs({}); @@ -50,7 +48,7 @@ TEST_F(JavaInteropTests, TestShapeExposure1) { Nd4jPointer ptrs[] = {(Nd4jPointer) input.getShapeInfo(), (Nd4jPointer) weights.getShapeInfo()}; - auto shapeList = nativeOps.calculateOutputShapes(nullptr, op.getOpHash(), ptrs, 2, tArgs.data(), tArgs.size(), iArgs.data(), iArgs.size()); + auto shapeList = calculateOutputShapes(nullptr, op.getOpHash(), ptrs, 2, tArgs.data(), tArgs.size(), iArgs.data(), iArgs.size()); ASSERT_EQ(1, shapeList->size()); @@ -64,7 +62,7 @@ TEST_F(JavaInteropTests, TestShapeExposure1) { //delete[] ptr; //delete shapeList; - nativeOps.deleteShapeList((Nd4jPointer) shapeList); + deleteShapeList((Nd4jPointer) shapeList); } @@ -72,9 +70,6 @@ TEST_F(JavaInteropTests, TestShapeExposure2) { auto input = NDArrayFactory::create('c', {1, 2, 5, 4}); auto exp = NDArrayFactory::create('c', {4}, {1, 2, 5, 4}); - - NativeOps nativeOps; - nd4j::ops::shape_of op; std::vector tArgs({}); @@ -83,14 +78,14 @@ TEST_F(JavaInteropTests, TestShapeExposure2) { Nd4jPointer ptrs[] = {(Nd4jPointer) input.getShapeInfo()}; - auto shapeList = nativeOps.calculateOutputShapes(nullptr, op.getOpHash(), ptrs, 1, tArgs.data(), tArgs.size(), iArgs.data(), iArgs.size()); + auto shapeList = calculateOutputShapes(nullptr, op.getOpHash(), ptrs, 1, tArgs.data(), tArgs.size(), iArgs.data(), iArgs.size()); ASSERT_EQ(1, shapeList->size()); ASSERT_EQ(exp.rankOf(), shape::rank((Nd4jLong *)shapeList->at(0))); ASSERT_EQ(exp.sizeAt(0), shape::shapeOf((Nd4jLong *)shapeList->at(0))[0]); - nativeOps.deleteShapeList((Nd4jPointer) shapeList); + deleteShapeList((Nd4jPointer) shapeList); } TEST_F(JavaInteropTests, TestShapeExposure3) { @@ -112,13 +107,12 @@ TEST_F(JavaInteropTests, TestShapeExposure3) { Nd4jPointer inputBuffers[] = {x.buffer(), sizes.buffer()}; Nd4jPointer inputShapes[] = {x.shapeInfo(), sizes.shapeInfo()}; - NativeOps nativeOps; nd4j::ops::split_v op; Nd4jLong iArgs[] = {1}; auto hash = op.getOpHash(); - auto shapeList = nativeOps.calculateOutputShapes(nullptr, hash, inputBuffers, inputShapes, 2, nullptr, 0, iArgs, 1, nullptr, 0); + auto shapeList = calculateOutputShapes2(nullptr, hash, inputBuffers, inputShapes, 2, nullptr, 0, iArgs, 1, nullptr, 0); ASSERT_EQ(3, shapeList->size()); @@ -126,7 +120,7 @@ TEST_F(JavaInteropTests, TestShapeExposure3) { ASSERT_TRUE(shape::equalsSoft(sub1.shapeInfo(), shapeList->at(1))); ASSERT_TRUE(shape::equalsSoft(sub2.shapeInfo(), shapeList->at(2))); - nativeOps.deleteShapeList((Nd4jPointer) shapeList); + deleteShapeList((Nd4jPointer) shapeList); } TEST_F(JavaInteropTests, Test_Squeeze_1) { @@ -143,10 +137,7 @@ TEST_F(JavaInteropTests, Test_Squeeze_1) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo()}; - - NativeOps nativeOps; - - auto status = nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); + auto status = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); ASSERT_EQ(e, z); @@ -167,10 +158,7 @@ TEST_F(JavaInteropTests, Test_RDiv_1) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo()}; - - NativeOps nativeOps; - - auto status = nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); + auto status = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); ASSERT_EQ(e, z); @@ -203,11 +191,9 @@ TEST_F(JavaInteropTests, TestSconv2d_1) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo()}; - NativeOps nativeOps; - Nd4jLong exp[] = {1, 1, 1, 1, 0, 0, 1, 1, 0, 0}; - nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 4, ptrsOutBuffers, ptrsOutShapes, 1, + execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 4, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 9, nullptr, 0, false); //output.printBuffer("output"); @@ -238,11 +224,9 @@ TEST_F(JavaInteropTests, TestSconv2d_2) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo()}; - NativeOps nativeOps; - Nd4jLong exp[] = {1, 1, 1, 1, 0, 0, 1, 1, 0}; - nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 9, nullptr, 0, false); + execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 9, nullptr, 0, false); //output.printBuffer("output"); @@ -266,9 +250,7 @@ TEST_F(JavaInteropTests, TestMaxPooling2d_1) { nd4j::ops::maxpool2d op; - NativeOps nativeOps; - - Nd4jStatus status = nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, iArgs.data(), 9, nullptr, 0, false); + Nd4jStatus status = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, iArgs.data(), 9, nullptr, 0, false); ASSERT_EQ(ND4J_STATUS_OK, status); } @@ -294,13 +276,11 @@ TEST_F(JavaInteropTests, TestCol2Im_1) { nd4j::ops::col2im op; - NativeOps nativeOps; - Nd4jLong exp[] = {1, 1, 1, 1, 4, 5, 1, 1, 1}; auto hash = op.getOpHash(); - nativeOps.execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 9, nullptr, 0, false); + execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 9, nullptr, 0, false); ASSERT_TRUE(output.meanNumber().e(0) > 0.0f); } @@ -320,8 +300,6 @@ TEST_F(JavaInteropTests, TestPNorm_1) { auto output = NDArrayFactory::create('c', {1, 3, 3, 3}); input.linspace(1); - NativeOps nativeOps; - nd4j::ops::pnormpool2d op; Nd4jLong exp[] = {2, 2, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0}; @@ -332,7 +310,7 @@ TEST_F(JavaInteropTests, TestPNorm_1) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) output.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) output.getShapeInfo()}; - nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 11, nullptr, 0, false); + execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 11, nullptr, 0, false); ASSERT_TRUE(output.meanNumber().e(0) > 0.0); } @@ -343,8 +321,6 @@ TEST_F(JavaInteropTests, TestInplace_1) { //auto exp('c', {10, 10}); input.linspace(1); - NativeOps nativeOps; - nd4j::ops::clipbyvalue op; double extras[] = {-1.0f, 1.0f}; @@ -353,7 +329,7 @@ TEST_F(JavaInteropTests, TestInplace_1) { Nd4jPointer ptrsInShapes[] = {(Nd4jPointer) input.getShapeInfo()}; - Nd4jStatus result = nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, nullptr, nullptr, 0, extras, 2, nullptr, 0, nullptr, 0, true); + Nd4jStatus result = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, nullptr, nullptr, 0, extras, 2, nullptr, 0, nullptr, 0, true); ASSERT_EQ(ND4J_STATUS_OK, result); @@ -415,7 +391,6 @@ TEST_F(JavaInteropTests, test_avgpooling_edge_1) { x.linspace(1.0); z.linspace(1.0); - NativeOps nativeOps; nd4j::ops::avgpool2d op; //auto result = op.execute({&x}, {}, {3,3, 1,1, 0,0, 1,1, 1, 0, 1}); @@ -427,7 +402,7 @@ TEST_F(JavaInteropTests, test_avgpooling_edge_1) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo()}; - auto result = nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 11, nullptr, 0, false); + auto result = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, exp, 11, nullptr, 0, false); ASSERT_EQ(Status::OK(), result); @@ -496,15 +471,13 @@ TEST_F(JavaInteropTests, test_avgpooling_edge_1) { /* TEST_F(JavaInteropTests, Test_GraphReuse_1) { - NativeOps nativeOps; - uint8_t* data = nd4j::graph::readFlatBuffers("./resources/reduce_dim_false.fb"); - nativeOps.registerGraph(nullptr, 119, (Nd4jPointer) data); + registerGraph(nullptr, 119, (Nd4jPointer) data); ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(119)); - nativeOps.unregisterGraph(nullptr, 119); + unregisterGraph(nullptr, 119); ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(119)); @@ -520,8 +493,6 @@ TEST_F(JavaInteropTests, Test_GraphReuse_2) { auto exp1 = NDArrayFactory::create('c', {3}, {6, 6, 6}); auto exp2 = NDArrayFactory::create('c', {3}, {9, 9, 9}); - NativeOps nativeOps; - // we load graph from file, because we're not in java here, and dont have buffer ready uint8_t* data = nd4j::graph::readFlatBuffers("./resources/reduce_dim_false.fb"); @@ -529,7 +500,7 @@ TEST_F(JavaInteropTests, Test_GraphReuse_2) { ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(119)); // register the graph, to call for it later - nativeOps.registerGraph(nullptr, 119, (Nd4jPointer) data); + registerGraph(nullptr, 119, (Nd4jPointer) data); // and ensure we're ok ASSERT_TRUE(GraphHolder::getInstance()->hasGraph(119)); @@ -547,7 +518,7 @@ TEST_F(JavaInteropTests, Test_GraphReuse_2) { Nd4jPointer shapes_0[] = {(Nd4jPointer) input_0.shapeInfo()}; // now we're executing stored graph and providing replacement for input variable - auto res_0 = nativeOps.executeStoredGraph(nullptr, 119, inputs_0, shapes_0, idx, 1); + auto res_0 = executeStoredGraph(nullptr, 119, inputs_0, shapes_0, idx, 1); ASSERT_EQ(ND4J_STATUS_OK, res_0->status()); ASSERT_EQ(1, res_0->size()); @@ -562,7 +533,7 @@ TEST_F(JavaInteropTests, Test_GraphReuse_2) { Nd4jPointer shapes_1[] = {(Nd4jPointer) input_1.shapeInfo()}; // doing it again - auto res_1 = nativeOps.executeStoredGraph(nullptr, 119, inputs_1, shapes_1, idx, 1); + auto res_1 = executeStoredGraph(nullptr, 119, inputs_1, shapes_1, idx, 1); ASSERT_EQ(ND4J_STATUS_OK, res_1->status()); ASSERT_EQ(1, res_1->size()); @@ -577,7 +548,7 @@ TEST_F(JavaInteropTests, Test_GraphReuse_2) { Nd4jPointer shapes_2[] = {(Nd4jPointer) input_2.shapeInfo()}; // and again - auto res_2 = nativeOps.executeStoredGraph(nullptr, 119, inputs_2, shapes_2, idx, 1); + auto res_2 = executeStoredGraph(nullptr, 119, inputs_2, shapes_2, idx, 1); ASSERT_EQ(ND4J_STATUS_OK, res_1->status()); ASSERT_EQ(1, res_2->size()); @@ -586,7 +557,7 @@ TEST_F(JavaInteropTests, Test_GraphReuse_2) { //////// clean out - nativeOps.unregisterGraph(nullptr, 119); + unregisterGraph(nullptr, 119); ASSERT_FALSE(GraphHolder::getInstance()->hasGraph(119)); @@ -616,9 +587,7 @@ TEST_F(JavaInteropTests, Test_Greater_1) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.getShapeInfo()}; - NativeOps nativeOps; - - nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); + execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); o.printIndexedBuffer("Greater JIT"); ASSERT_TRUE(exp.equalsTo(&o)); } @@ -641,9 +610,7 @@ TEST_F(JavaInteropTests, Test_Greater_2) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.getShapeInfo()}; - NativeOps nativeOps; - - nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); + execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_TRUE(exp.equalsTo(&o)); } @@ -662,9 +629,8 @@ TEST_F(JavaInteropTests, Test_Boolean_Op_1) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) o.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) o.getShapeInfo()}; - NativeOps nativeOps; auto hash = op.getOpHash(); - auto status = nativeOps.execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); + auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); ASSERT_TRUE(exp.equalsTo(&o)); @@ -685,9 +651,8 @@ TEST_F(JavaInteropTests, Test_Inplace_Outputs_1) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo()}; - NativeOps nativeOps; auto hash = op.getOpHash(); - auto status = nativeOps.execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); + auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); ASSERT_TRUE(exp.isSameShape(z)); @@ -710,9 +675,8 @@ TEST_F(JavaInteropTests, Test_Inplace_Outputs_2) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo()}; - NativeOps nativeOps; auto hash = op.getOpHash(); - auto status = nativeOps.execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); + auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); ASSERT_TRUE(e.isSameShape(z)); @@ -736,9 +700,8 @@ TEST_F(JavaInteropTests, Test_Inplace_Outputs_3) { Nd4jLong iArgs[] = {1}; - NativeOps nativeOps; auto hash = op.getOpHash(); - auto status = nativeOps.execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, iArgs, 1, nullptr, 0, false); + auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, iArgs, 1, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); ASSERT_TRUE(e.isSameShape(output)); @@ -753,8 +716,7 @@ TEST_F(JavaInteropTests, Test_Reduce3_EdgeCase) { auto dims = NDArrayFactory::create('c', {2}, {0, 1}); - NativeOps nativeOps; - nativeOps.execReduce3(nullptr, 2, x.buffer(), x.shapeInfo(), nullptr, nullptr, nullptr, + execReduce3Tad(nullptr, 2, x.buffer(), x.shapeInfo(), nullptr, nullptr, nullptr, y.buffer(), y.shapeInfo(), nullptr, nullptr, z.buffer(), z.shapeInfo(), nullptr, nullptr, dims.buffer(), dims.shapeInfo(), dims.specialBuffer(), dims.specialShapeInfo(), nullptr, nullptr, nullptr, nullptr); @@ -764,10 +726,8 @@ TEST_F(JavaInteropTests, Test_SimpleIf_Output) { Environment::getInstance()->setDebug(true); Environment::getInstance()->setVerbose(false); - NativeOps ops; - auto pl = nd4j::graph::readFlatBuffers("./resources/simpleif_0_1.fb"); - auto ptr = ops.executeFlatGraph(nullptr, pl); + auto ptr = executeFlatGraph(nullptr, pl); Environment::getInstance()->setDebug(false); Environment::getInstance()->setVerbose(false); @@ -792,9 +752,8 @@ TEST_F(JavaInteropTests, Test_AveragePooling_FF_TF_double) { Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo())}; Nd4jLong iArgs[] = {3,3, 3,3, 0,0, 1,1,1, 0,1}; - NativeOps nativeOps; auto hash = op.getOpHash(); - auto status = nativeOps.execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, iArgs, 11, nullptr, 0, false); + auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, iArgs, 11, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); @@ -818,9 +777,8 @@ TEST_F(JavaInteropTests, Test_MaxPool2D_float_1) { nd4j::ops::maxpool2d op; - NativeOps nativeOps; auto hash = op.getOpHash(); - auto status = nativeOps.execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, iArgs, 11, nullptr, 0, false); + auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, iArgs, 11, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); } @@ -843,9 +801,8 @@ TEST_F(JavaInteropTests, Test_Unstack_1) { nd4j::ops::unstack op; - NativeOps nativeOps; auto hash = op.getOpHash(); - auto status = nativeOps.execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 5, nullptr, 0, iArgs, 1, nullptr, 0, false); + auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 5, nullptr, 0, iArgs, 1, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); } @@ -864,9 +821,8 @@ TEST_F(JavaInteropTests, Test_AveragePooling_FF_TF_float) { Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo())}; Nd4jLong iArgs[] = {3,3, 3,3, 0,0, 1,1,1, 0,1}; - NativeOps nativeOps; auto hash = op.getOpHash(); - auto status = nativeOps.execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, iArgs, 11, nullptr, 0, false); + auto status = execCustomOp(nullptr, hash, ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, iArgs, 11, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); @@ -883,8 +839,7 @@ TEST_F(JavaInteropTests, Test_Mixed_Add_1) { auto arrayZ = NDArrayFactory::create({0, 0, 0, 0}); auto arrayE = NDArrayFactory::create({2, 4, 6, 8}); - NativeOps ops; - ops.execPairwiseTransform(nullptr, pairwise::Add, + execPairwiseTransform(nullptr, pairwise::Add, arrayX.buffer(), arrayX.shapeInfo(), nullptr, nullptr, arrayY.buffer(), arrayY.shapeInfo(), nullptr, nullptr, arrayZ.buffer(), arrayZ.shapeInfo(), nullptr, nullptr, @@ -898,7 +853,6 @@ TEST_F(JavaInteropTests, Test_Add_1) { auto y = NDArrayFactory::create('c', {5}, {1, 1, 1, 1, 1}); auto e = NDArrayFactory::create('c', {5}, {2, 2, 2, 2, 2}); - NativeOps nativeOps; nd4j::ops::add op; Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), y.getBuffer()}; @@ -907,7 +861,7 @@ TEST_F(JavaInteropTests, Test_Add_1) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) x.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) x.getShapeInfo()}; - nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); + execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_EQ(e, x); } @@ -920,7 +874,6 @@ TEST_F(JavaInteropTests, zeta_test10) { auto e = NDArrayFactory::create('c', {3, 4}, {23.014574, 12.184081, 8.275731, 6.1532226, 4.776538, 3.7945523, 3.0541048, 2.4765317, 2.0163891, 205.27448, 21.090889, 19.477398}); - NativeOps nativeOps; nd4j::ops::zeta op; Nd4jPointer ptrsInBuffer[] = {(Nd4jPointer) x.getBuffer(), q.getBuffer()}; @@ -929,7 +882,7 @@ TEST_F(JavaInteropTests, zeta_test10) { Nd4jPointer ptrsOutBuffers[] = {(Nd4jPointer) z.getBuffer()}; Nd4jPointer ptrsOutShapes[] = {(Nd4jPointer) z.getShapeInfo()}; - nativeOps.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); + execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, ptrsOutBuffers, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_EQ(e, z); } @@ -939,8 +892,7 @@ TEST_F(JavaInteropTests, Test_Is_Max_1) { auto arrayZ = NDArrayFactory::create({0, 0, 0, 0}); auto arrayE = NDArrayFactory::create({0, 1, 0, 0}); - NativeOps ops; - ops.execTransformAny(nullptr, transform::IsMax, + execTransformAny(nullptr, transform::IsMax, arrayX.buffer(), arrayX.shapeInfo(), nullptr, nullptr, arrayZ.buffer(), arrayZ.shapeInfo(), nullptr, nullptr, nullptr); @@ -953,8 +905,7 @@ TEST_F(JavaInteropTests, Test_Is_Max_1_2) { auto arrayZ = NDArrayFactory::create({0, 0, 0, 0}); auto arrayE = NDArrayFactory::create({0, 1, 0, 0}); - NativeOps ops; - ops.execTransformAny(nullptr, transform::IsMax, + execTransformAny(nullptr, transform::IsMax, arrayX.buffer(), arrayX.shapeInfo(), nullptr, nullptr, arrayZ.buffer(), arrayZ.shapeInfo(), nullptr, nullptr, nullptr); @@ -970,8 +921,7 @@ TEST_F(JavaInteropTests, Test_Is_Max_2) { Nd4jLong *ex[] = {tad, off}; float ea[] = {2, 1, 2}; - NativeOps ops; - ops.execTransformBool(reinterpret_cast(ex), transform::IsMax, + execTransformBool(reinterpret_cast(ex), transform::IsMax, arrayX.buffer(), arrayX.shapeInfo(), nullptr, nullptr, arrayZ.buffer(), arrayZ.shapeInfo(), nullptr, nullptr, ea); @@ -995,8 +945,7 @@ TEST_F(JavaInteropTests, Test_Boolean_Broadcastables_1) { nd4j::ops::greater_equal op; - NativeOps ops; - auto shapeList = ops.calculateOutputShapes(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, nullptr, 0, nullptr, 0, nullptr, 0); + auto shapeList = calculateOutputShapes2(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 2, nullptr, 0, nullptr, 0, nullptr, 0); delete shapeList; } @@ -1013,8 +962,7 @@ TEST_F(JavaInteropTests, Test_L2_Loss_3) { Nd4jPointer ptrsOutShapes[] = {reinterpret_cast(z.shapeInfo())}; nd4j::ops::l2_loss op; - NativeOps ops; - auto status = ops.execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffer, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); + auto status = execCustomOp(nullptr, op.getOpHash(), ptrsInBuffer, ptrsInShapes, 1, ptrsOutBuffer, ptrsOutShapes, 1, nullptr, 0, nullptr, 0, nullptr, 0, false); ASSERT_EQ(Status::OK(), status); z.printIndexedBuffer("z"); @@ -1036,9 +984,8 @@ TEST_F(JavaInteropTests, Test_Fastpath_3) { ASSERT_EQ(2, ctx.width()); - NativeOps nativeOps; nd4j::ops::add op; - nativeOps.execCustomOp(nullptr, op.getOpHash(), &ctx); + execCustomOp2(nullptr, op.getOpHash(), &ctx); ASSERT_EQ(exp, z); } @@ -1054,9 +1001,8 @@ TEST_F(JavaInteropTests, Test_Fastpath_4) { ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); ctx.setIArguments(iArgs, 3); - NativeOps nativeOps; nd4j::ops::tri op; - nativeOps.execCustomOp(nullptr, op.getOpHash(), &ctx); + execCustomOp2(nullptr, op.getOpHash(), &ctx); ASSERT_EQ(exp, z); } @@ -1074,9 +1020,8 @@ TEST_F(JavaInteropTests, Test_Fastpath_5) { ctx.setInputArray(1, b.buffer(), b.shapeInfo(), b.specialBuffer(), b.specialShapeInfo()); ctx.setOutputArray(0, c.buffer(), c.shapeInfo(), c.specialBuffer(), c.specialShapeInfo()); - NativeOps nativeOps; nd4j::ops::matmul op; - auto status = nativeOps.execCustomOp(nullptr, op.getOpHash(), &ctx); + auto status = execCustomOp2(nullptr, op.getOpHash(), &ctx); ASSERT_EQ(Status::OK(), status); } @@ -1104,9 +1049,8 @@ TEST_F(JavaInteropTests, Test_Fastpath_6) { ctx.setIArguments(iArgs, 3); - NativeOps nativeOps; nd4j::ops::matmul_bp op; - auto status = nativeOps.execCustomOp(nullptr, op.getOpHash(), &ctx); + auto status = execCustomOp2(nullptr, op.getOpHash(), &ctx); ASSERT_EQ(Status::OK(), status); } @@ -1122,7 +1066,6 @@ TEST_F(JavaInteropTests, Test_Fastpath_7) { ctx.setIArguments(iArgs, 1); - NativeOps nativeOps; nd4j::ops::concat op; ctx.setInputArray(0, a.buffer(), a.shapeInfo(), a.specialBuffer(), a.specialShapeInfo()); @@ -1130,7 +1073,7 @@ TEST_F(JavaInteropTests, Test_Fastpath_7) { ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo()); - auto status = nativeOps.execCustomOp(nullptr, op.getOpHash(), &ctx); + auto status = execCustomOp2(nullptr, op.getOpHash(), &ctx); ASSERT_EQ(Status::OK(), status); ASSERT_EQ(e, z); @@ -1138,10 +1081,8 @@ TEST_F(JavaInteropTests, Test_Fastpath_7) { /* TEST_F(JavaInteropTests, Test_Results_Conversion_1) { - NativeOps ops; - auto pl = nd4j::graph::readFlatBuffers("./resources/gru_dynamic_mnist.fb"); - auto ptr = ops.executeFlatGraph(nullptr, pl); + auto ptr = executeFlatGraph(nullptr, pl); // at this point we have FlatResults auto flatResult = GetFlatResult(ptr->pointer()); @@ -1190,8 +1131,6 @@ TEST_F(JavaInteropTests, Test_Results_Conversion_1) { } */ // TEST_F(JavaInteropTests, Test_NLP_Aggregations_1) { -// NativeOps ops; - // std::array syn0 = {-0.022756476f, 0.0126427775f, 0.011029151f, -0.013542821f, -0.012327666f, -0.0032439455f, -0.008405109f, -0.016651405f, 0.0015980572f, -0.007442479f, 0.019937921f, -0.016222188f, -0.016541665f, 0.013372547f, 0.006625724f, 0.0058958204f, -0.01281835f, -6.2343775E-4f, 0.0019826533f, 0.010253737f, -0.010291531f, 0.0019767822f, 0.018071089f, -0.0117441565f, 0.023176769f, 0.0032820583f, 0.0061427564f, -0.01696018f, 0.0054971874f, 0.0043818625f, 0.019323621f, 0.0036080598f, 0.024376748f, -0.0024499625f, 0.019496754f, 0.010563821f, -2.0503551E-4f, -0.0146056535f, 0.009949291f, 0.017604528f, -0.0050302492f, -0.022060446f, 0.016468976f, -0.0034482107f, 0.010270384f, -0.0063356445f, -0.019934833f, -0.02325993f, 0.016109904f, -0.0031106502f, -0.0020592287f, 0.024031803f, 0.005184144f, -0.024887865f, 0.02100272f, 3.395051E-4f, 0.018432347f, 5.673498E-4f, -0.020073576f, 0.010949242f}; // std::array syn1; // std::array exp; @@ -1283,5 +1222,5 @@ TEST_F(JavaInteropTests, Test_Results_Conversion_1) { // ptrptr[idx+2] = reinterpret_cast(exp.data()); -// ops.execAggregateBatchFloat(nullptr, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIndexArguments, maxRealArguments, pointer.data()); -// } \ No newline at end of file +// execAggregateBatchFloat(nullptr, numAggregates, opNum, maxArgs, maxShapes, maxIntArrays, maxIntArraySize, maxIndexArguments, maxRealArguments, pointer.data()); +// } diff --git a/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu b/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu index 44e4eb02b..354051f81 100644 --- a/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu +++ b/libnd4j/tests_cpu/layers_tests/LegacyOpsCudaTests.cu @@ -53,9 +53,8 @@ TEST_F(LegacyOpsCudaTests, test_sortTad_1) { Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; x.syncToDevice(); - NativeOps nativeOps; - nativeOps.sortTad(extras, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), &axis, 1, packX.platformShapeInfo(), packX.platformOffsets(), false); + sortTad(extras, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), &axis, 1, packX.platformShapeInfo(), packX.platformOffsets(), false); x.tickWriteDevice(); ASSERT_EQ(e, x); -} \ No newline at end of file +} diff --git a/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp b/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp index 090d89697..680cbd85b 100644 --- a/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/LegacyOpsTests.cpp @@ -501,8 +501,7 @@ TEST_F(LegacyOpsTests, Reduce3_2) { auto dim = NDArrayFactory::create('c', {1}, {1}); - NativeOps nativeOps; - nativeOps.execReduce3(nullptr, reduce3::CosineSimilarity, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), dim.buffer(), dim.shapeInfo(), dim.specialBuffer(), dim.specialShapeInfo(), + execReduce3Tad(nullptr, reduce3::CosineSimilarity, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), dim.buffer(), dim.shapeInfo(), dim.specialBuffer(), dim.specialShapeInfo(), nullptr, nullptr, nullptr, nullptr); } @@ -517,9 +516,8 @@ TEST_F(LegacyOpsTests, Reduce3_3) { auto dim = NDArrayFactory::create('c', {1}, {1}); - NativeOps nativeOps; - nativeOps.execReduce3(nullptr, reduce3::CosineDistance, + execReduce3Tad(nullptr, reduce3::CosineDistance, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), @@ -543,9 +541,8 @@ TEST_F(LegacyOpsTests, Reduce3_4) { auto dim = NDArrayFactory::create('c', {1}, {1}); - NativeOps nativeOps; - nativeOps.execReduce3(nullptr, reduce3::CosineDistance, + execReduce3Tad(nullptr, reduce3::CosineDistance, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), @@ -569,9 +566,8 @@ TEST_F(LegacyOpsTests, Reduce3_5) { auto dim = NDArrayFactory::create('c', {1}, {1}); - NativeOps nativeOps; - nativeOps.execReduce3(nullptr, reduce3::CosineDistance, + execReduce3Tad(nullptr, reduce3::CosineDistance, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), @@ -593,8 +589,7 @@ TEST_F(LegacyOpsTests, test_Reduce3_All_1) { auto tadPackX = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(x.shapeInfo(), -1); auto tadPackY = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(y.shapeInfo(), -1); - NativeOps ops; - ops.execReduce3All(nullptr, reduce3::EuclideanDistance, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), + execReduce3All(nullptr, reduce3::EuclideanDistance, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), dim.buffer(), dim.shapeInfo(), dim.specialBuffer(), dim.specialShapeInfo(), @@ -697,4 +692,4 @@ TEST_F(LegacyOpsTests, test_legacy_transform_float_1) { auto x = NDArrayFactory::create('c', {1, 0, 4}); NativeOpExecutioner::execTransformFloat(LaunchContext::defaultContext(), transform::FloatOps::RSqrt, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, nullptr, nullptr); -} \ No newline at end of file +} diff --git a/libnd4j/tests_cpu/layers_tests/MmapTests.cpp b/libnd4j/tests_cpu/layers_tests/MmapTests.cpp index 663e6627b..cc0239c5d 100644 --- a/libnd4j/tests_cpu/layers_tests/MmapTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/MmapTests.cpp @@ -33,8 +33,6 @@ public: }; TEST_F(MmapTests, Test_Basic_Mmap_1) { - NativeOps nativeOps; - // just 10GB Nd4jLong size = 100000L; @@ -43,11 +41,11 @@ TEST_F(MmapTests, Test_Basic_Mmap_1) { ofs.write("", 1); ofs.close(); - auto result = nativeOps.mmapFile(nullptr, "file", size); + auto result = mmapFile(nullptr, "file", size); ASSERT_FALSE(result == nullptr); - nativeOps.munmapFile(nullptr, result, size); + munmapFile(nullptr, result, size); remove("file"); -} \ No newline at end of file +} diff --git a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu index ada45dfa6..f26f2f35a 100644 --- a/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu +++ b/libnd4j/tests_cpu/layers_tests/NDArrayCudaBasicsTests.cu @@ -2258,7 +2258,6 @@ TEST_F(NDArrayCudaBasicsTests, Test_Empty_4) { TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_1) { auto x = NDArrayFactory::create('c', {5,2}, {0,1,2,3,4,5,6,7,8,9}); - NativeOps native; auto z = NDArrayFactory::create('c', {5, 8}); auto stream = x.getContext()->getCudaStream();//reinterpret_cast(&nativeStream); std::vector buffers(4); @@ -2272,7 +2271,7 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_1) { } Nd4jPointer extra[2]; extra[1] = *stream; - native.concat(extra, 1, 4, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); + ::concat(extra, 1, 4, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); z.syncToHost(); z.printIndexedBuffer("Concat result"); z.printBuffer("C Concat result linear"); @@ -2281,7 +2280,6 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_1) { TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_2) { auto x = NDArrayFactory::create('c', {5,2}, {0,1,2,3,4,5,6,7,8,9}); - NativeOps native; auto z = NDArrayFactory::create('f', {5, 8}); auto stream = x.getContext()->getCudaStream();//reinterpret_cast(&nativeStream); std::vector buffers(4); @@ -2295,7 +2293,7 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_2) { } Nd4jPointer extra[2]; extra[1] = *stream; - native.concat(extra, 1, 4, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); + ::concat(extra, 1, 4, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); z.syncToHost(); z.printIndexedBuffer("Concat result"); z.printBuffer("F Concat result linear"); @@ -2304,7 +2302,6 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_2) { TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_3) { auto x = NDArrayFactory::create('c', {2,3}, {1,2,3,4,5,6}); auto y = NDArrayFactory::create('c', {1,3}, {7,8,9}); - NativeOps native; auto z = NDArrayFactory::create('f', {3, 3}); auto stream = x.getContext()->getCudaStream();//reinterpret_cast(&nativeStream); std::vector buffers(2); @@ -2321,7 +2318,7 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_3) { //} Nd4jPointer extra[2]; extra[1] = *stream; - native.concat(extra, 0, 2, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); + ::concat(extra, 0, 2, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); z.syncToHost(); z.printIndexedBuffer("Concat result"); z.printBuffer("F Concat result linear"); @@ -2331,7 +2328,6 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_3) { TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_4) { auto x = NDArrayFactory::create('c', {2,3}, {1,2,3,4,5,6}); auto y = NDArrayFactory::create('c', {1,3}, {7,8,9}); - NativeOps native; auto z = NDArrayFactory::create('c', {3, 3}); auto stream = x.getContext()->getCudaStream();//reinterpret_cast(&nativeStream); std::vector buffers(2); @@ -2348,7 +2344,7 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_4) { //} Nd4jPointer extra[2]; extra[1] = *stream; - native.concat(extra, 0, 2, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); + ::concat(extra, 0, 2, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); z.syncToHost(); z.printIndexedBuffer("Concat result"); z.printBuffer("C Concat result linear"); @@ -2358,7 +2354,6 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_4) { TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_5) { auto x = NDArrayFactory::create('c', {1,2,3}, {1,2,3,4,5,6}); auto y = NDArrayFactory::create('c', {1,2,3}, {7,8,9,10,11, 12}); - NativeOps native; auto z = NDArrayFactory::create('c', {2, 2, 3}); auto stream = x.getContext()->getCudaStream();//reinterpret_cast(&nativeStream); std::vector buffers(2); @@ -2375,7 +2370,7 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_5) { //} Nd4jPointer extra[2]; extra[1] = *stream; - native.concat(extra, 0, 2, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); + ::concat(extra, 0, 2, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); z.syncToHost(); z.printIndexedBuffer("Concat result"); z.printBuffer("C Concat result linear"); @@ -2385,7 +2380,6 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_6) { auto x1 = NDArrayFactory::create('c', {2,2,3}, {1,2,3,4,5,6,7,8, 9, 10,11,12}); auto x2 = NDArrayFactory::create('c', {1,2,3}, {13,14,15,16,17, 18}); auto x3 = NDArrayFactory::create('c', {1,2,3}, {19,20,21,22,23, 24}); - NativeOps native; auto z = NDArrayFactory::create('c', {4, 2, 3}); auto stream = x1.getContext()->getCudaStream();//reinterpret_cast(&nativeStream); std::vector buffers(3); @@ -2406,7 +2400,7 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_6) { printf("The third array is %p\n", buffers[2]); Nd4jPointer extra[2]; extra[1] = *stream; - native.concat(extra, 0, 3, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); + ::concat(extra, 0, 3, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); z.syncToHost(); z.printIndexedBuffer("Concat result"); z.printBuffer("C Concat3D result linear"); @@ -2417,7 +2411,6 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_7) { auto x1 = NDArrayFactory::create(1); auto x2 = NDArrayFactory::create(2); auto x3 = NDArrayFactory::create(3); - NativeOps native; auto z = NDArrayFactory::create('c', {3}, {1,2,3}); auto stream = x1.getContext()->getCudaStream();//reinterpret_cast(&nativeStream); std::vector buffers(3); @@ -2438,7 +2431,7 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_7) { printf("The third array is %p\n", buffers[2]); Nd4jPointer extra[2]; extra[1] = *stream; - native.concat(extra, 0, 3, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); + ::concat(extra, 0, 3, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); z.syncToHost(); z.printIndexedBuffer("Concat result"); z.printBuffer("C Concat scalar result linear"); @@ -2462,7 +2455,6 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_8) { lx[i].assign(i); } - NativeOps native; auto z = NDArrayFactory::create('c', {totalCount, width}); auto stream = nd4j::LaunchContext ::defaultContext()->getCudaStream();//reinterpret_cast(&nativeStream); std::vector buffers(totalCount); @@ -2478,7 +2470,7 @@ TEST_F(NDArrayCudaBasicsTests, Test_ConcatNative_8) { printf("The third array is %p\n", buffers[2]); Nd4jPointer extra[2]; extra[1] = *stream; - native.concat(extra, 0, totalCount, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); + ::concat(extra, 0, totalCount, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); z.syncToHost(); nd4j_printf("%f %f %f\n", z.e(0), z.e(width * totalCount / 2), z.e(width * (totalCount - 1))); //z.printIndexedBuffer("Concat result"); @@ -2496,7 +2488,6 @@ TEST_F(NDArrayCudaBasicsTests, TestTear_1) { arrays.emplace_back(input); } auto z = NDArrayFactory::create('c', {total, 10, 10}); - NativeOps native; auto stream = input.getContext()->getCudaStream();//reinterpret_cast(&nativeStream); Nd4jPointer extra[2]; @@ -2512,7 +2503,7 @@ TEST_F(NDArrayCudaBasicsTests, TestTear_1) { hostShapes[i] = arrays[i].shapeInfo(); } - native.concat(extra, 0, total, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); + ::concat(extra, 0, total, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); nd4j::ops::tear op; auto result = op.execute({&z}, {}, {1, 2}); @@ -2536,7 +2527,6 @@ TEST_F(NDArrayCudaBasicsTests, TestTear_2) { arrays.emplace_back(input); } auto z = NDArrayFactory::create('c', {10, 10, 10}); - NativeOps native; auto stream = input.getContext()->getCudaStream();//reinterpret_cast(&nativeStream); Nd4jPointer extra[2]; @@ -2552,7 +2542,7 @@ TEST_F(NDArrayCudaBasicsTests, TestTear_2) { hostShapes[i] = arrays[i].shapeInfo(); } std::vector dimsToExclude({1,2}); - native.concat(extra, 0, 10, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); + ::concat(extra, 0, 10, nullptr, (Nd4jPointer*)hostShapes.data(), (Nd4jPointer*)buffers.data(), (Nd4jPointer*)shapes.data(), nullptr, z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), nullptr, nullptr); // z.syncToHost(); // z.printBuffer("Pile OK"); // z.printIndexedBuffer("Pile 10x10"); @@ -2569,7 +2559,7 @@ TEST_F(NDArrayCudaBasicsTests, TestTear_2) { Nd4jPointer target = arrays[i].specialBuffer(); cudaMemcpy(&arraysData[i], &target, sizeof(Nd4jPointer), cudaMemcpyHostToDevice); } - native.tear(extra, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), arraysData, input.specialShapeInfo(), packX.specialShapeInfo(), packX.specialOffsets()); + ::tear(extra, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), arraysData, input.specialShapeInfo(), packX.specialShapeInfo(), packX.specialOffsets()); // auto result = op.execute({&z}, {}, {1, 2}); // nd4j_printf("Result count is %lu\n", result->size()); //ASSERT_EQ(10, result->size()); diff --git a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp index c94758c5a..c453f57d5 100644 --- a/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/PlaygroundTests.cpp @@ -313,12 +313,10 @@ TEST_F(PlaygroundTests, test_reduce_3) { Nd4jLong max = 0L; Nd4jLong min = DataTypeUtils::max(); - NativeOps nativeOps; - for (int e = 0; e < iterations; e++) { auto timeStart = std::chrono::system_clock::now(); - nativeOps.execReduce3(nullptr, reduce3::CosineDistance, x.buffer(), x.shapeInfo(), x.specialBuffer(), + execReduce3Tad(nullptr, reduce3::CosineDistance, x.buffer(), x.shapeInfo(), x.specialBuffer(), x.specialShapeInfo(), nullptr, y.buffer(), y.shapeInfo(), y.specialBuffer(), y.specialShapeInfo(), z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo(), dim.buffer(), dim.shapeInfo(), dim.specialBuffer(), dim.specialShapeInfo(), nullptr, @@ -964,8 +962,6 @@ TEST_F(PlaygroundTests, Test_Im2Col_1) { auto legacyPermTime = std::chrono::duration_cast (legacyPermEnd - legacyPermStart).count(); - NativeOps nativeOps; - Nd4jLong iArgs[] = {kH, kW, sH, sW, pH, pW, dH, dW, 0}; Nd4jPointer inputBuffers[] = {input.buffer()}; Nd4jPointer inputShapes[] = {input.shapeInfo()}; @@ -976,7 +972,7 @@ TEST_F(PlaygroundTests, Test_Im2Col_1) { auto javaStart = std::chrono::system_clock::now(); for (int e = 0; e < iterations; e++) { - nativeOps.execCustomOp(nullptr, op.getOpHash(), inputBuffers, inputShapes, 1, outputBuffers, outputShapes, 1, nullptr, 0, iArgs, 9, nullptr, 0, false); + execCustomOp(nullptr, op.getOpHash(), inputBuffers, inputShapes, 1, outputBuffers, outputShapes, 1, nullptr, 0, iArgs, 9, nullptr, 0, false); } auto javaEnd = std::chrono::system_clock::now(); @@ -990,7 +986,7 @@ TEST_F(PlaygroundTests, Test_Im2Col_1) { for (int e = 0; e < iterations; e++) { - nativeOps.execCustomOp(nullptr, op.getOpHash(), inputBuffers, inputShapes, 1, outputPermBuffers, outputPermShapes, 1, nullptr, 0, iArgs, 9, nullptr, 0, false); + execCustomOp(nullptr, op.getOpHash(), inputBuffers, inputShapes, 1, outputPermBuffers, outputPermShapes, 1, nullptr, 0, iArgs, 9, nullptr, 0, false); } auto javaPermEnd = std::chrono::system_clock::now(); @@ -1020,9 +1016,7 @@ TEST_F(PlaygroundTests, Test_Im2Col_2) { Nd4jPointer outputPermBuffers[] = {outputPermuted.buffer()}; Nd4jPointer outputPermShapes[] = {outputPermuted.shapeInfo()}; - NativeOps nativeOps; - - nativeOps.execCustomOp(nullptr, op.getOpHash(), inputBuffers, inputShapes, 1, outputPermBuffers, outputPermShapes, 1, nullptr, 0, iArgs, 9, nullptr, 0, false); + execCustomOp(nullptr, op.getOpHash(), inputBuffers, inputShapes, 1, outputPermBuffers, outputPermShapes, 1, nullptr, 0, iArgs, 9, nullptr, 0, false); } TEST_F(PlaygroundTests, Test_Col2Im_1) { @@ -1140,8 +1134,6 @@ TEST_F(PlaygroundTests, loop_test_1) { int length = (int) array->lengthOf(); int span = (int) (array->lengthOf() / 6) + 8; - NativeOps ops; - auto t = new int[1000000]; @@ -1150,7 +1142,7 @@ TEST_F(PlaygroundTests, loop_test_1) { FloatBits fb; float threshold = 0.99f; fb.f_ = threshold; - int le = ops.estimateThreshold(nullptr, reinterpret_cast(array->buffer()), array->shapeInfo(), static_cast(array->lengthOf()), threshold); + int le = estimateThreshold(nullptr, reinterpret_cast(array->buffer()), array->shapeInfo(), static_cast(array->lengthOf()), threshold); t[0] = le; t[1] = length; @@ -1162,7 +1154,7 @@ TEST_F(PlaygroundTests, loop_test_1) { for (int x = 0; x < iterations; x++) { auto permStart = std::chrono::system_clock::now(); - ops.estimateThreshold(nullptr, reinterpret_cast(array->buffer()), array->shapeInfo(), static_cast(array->lengthOf()), threshold); + estimateThreshold(nullptr, reinterpret_cast(array->buffer()), array->shapeInfo(), static_cast(array->lengthOf()), threshold); TypeCast::convertToThreshold(nullptr, buffer, array->lengthOf(), t); auto permEnd = std::chrono::system_clock::now(); diff --git a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp index e563c8683..6621e8b6a 100644 --- a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp @@ -29,7 +29,6 @@ using namespace nd4j; class RNGTests : public testing::Test { private: - NativeOps nativeOps; //Nd4jLong *_bufferA; //Nd4jLong *_bufferB; @@ -47,8 +46,8 @@ public: RNGTests() { //_bufferA = new Nd4jLong[100000]; //_bufferB = new Nd4jLong[100000]; - //_rngA = (nd4j::random::RandomBuffer *) nativeOps.initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferA); - //_rngB = (nd4j::random::RandomBuffer *) nativeOps.initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferB); + //_rngA = (nd4j::random::RandomBuffer *) initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferA); + //_rngB = (nd4j::random::RandomBuffer *) initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferB); _rngA.setStates(_seed, _seed); _rngB.setStates(_seed, _seed); nexp0->assign(-1.0f); @@ -57,8 +56,8 @@ public: } ~RNGTests() { - //nativeOps.destroyRandom(_rngA); - //nativeOps.destroyRandom(_rngB); + //destroyRandom(_rngA); + //destroyRandom(_rngB); //delete[] _bufferA; //delete[] _bufferB; @@ -791,14 +790,13 @@ namespace nd4j { } TEST_F(RNGTests, Test_Reproducibility_9) { - NativeOps ops; Nd4jLong seed = 123; std::vector shape = {32, 3, 28, 28}; const int bufferSize = 10000; int64_t buffer[bufferSize]; - auto rng = (nd4j::random::RandomBuffer *) ops.initRandom(nullptr, seed, bufferSize, buffer); + auto rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, seed, bufferSize, buffer); const int length = 4000000; int *arrayE = new int[length]; @@ -809,7 +807,7 @@ TEST_F(RNGTests, Test_Reproducibility_9) { rng->rewindH(static_cast(length)); - ops.refreshBuffer(nullptr, seed, reinterpret_cast(rng)); + refreshBuffer(nullptr, seed, reinterpret_cast(rng)); for (int e = 0; e < length; e++) arrayT[e] = rng->relativeInt(e); @@ -825,18 +823,17 @@ TEST_F(RNGTests, Test_Reproducibility_9) { delete[] arrayE; delete[] arrayT; - ops.destroyRandom(reinterpret_cast(rng)); + destroyRandom(reinterpret_cast(rng)); } TEST_F(RNGTests, Test_Reproducibility_8) { - NativeOps ops; Nd4jLong seed = 123; std::vector shape = {32, 3, 28, 28}; const int bufferSize = 10000; int64_t buffer[bufferSize]; - auto rng = (nd4j::random::RandomBuffer *) ops.initRandom(nullptr, seed, bufferSize, buffer); + auto rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, seed, bufferSize, buffer); const int length = 4000000; int *arrayE = new int[length]; @@ -847,7 +844,7 @@ TEST_F(RNGTests, Test_Reproducibility_8) { rng->rewindH(static_cast(length)); - ops.refreshBuffer(nullptr, seed, reinterpret_cast(rng)); + refreshBuffer(nullptr, seed, reinterpret_cast(rng)); for (int e = 0; e < length; e++) arrayT[e] = static_cast(rng->relativeT(e)); @@ -863,29 +860,27 @@ TEST_F(RNGTests, Test_Reproducibility_8) { delete[] arrayE; delete[] arrayT; - ops.destroyRandom(reinterpret_cast(rng)); + destroyRandom(reinterpret_cast(rng)); } TEST_F(RNGTests, Test_RandomBuffer_Half_1) { - NativeOps ops; Nd4jLong seed = 123; std::vector shape = {32, 3, 28, 28}; const int bufferSize = 10000; int64_t buffer[bufferSize]; - auto rng = (nd4j::random::RandomBuffer *) ops.initRandom(nullptr, seed, bufferSize, buffer); + auto rng = (nd4j::random::RandomBuffer *) initRandom(nullptr, seed, bufferSize, buffer); auto r0 = rng->relativeT(12L); auto r1 = rng->relativeT(13L); ASSERT_NE(r0, r1); - ops.destroyRandom(reinterpret_cast(rng)); + destroyRandom(reinterpret_cast(rng)); } TEST_F(RNGTests, Test_Reproducibility_1) { - NativeOps ops; Nd4jLong seed = 123; std::vector shape = {32, 3, 28, 28}; @@ -918,7 +913,6 @@ TEST_F(RNGTests, Test_Reproducibility_1) { #ifndef DEBUG_BUILD TEST_F(RNGTests, Test_Reproducibility_2) { - NativeOps ops; Nd4jLong seed = 123; std::vector shape = {32, 3, 64, 64}; diff --git a/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp b/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp index 708cb0482..122a16e45 100644 --- a/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/SortCpuTests.cpp @@ -44,8 +44,7 @@ TEST_F(SortCpuTests, test_linear_sort_by_key_1) { auto ev = NDArrayFactory::create('c', {10}, {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5}); - NativeOps nativeOps; - nativeOps.sortByKey(nullptr, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false); + sortByKey(nullptr, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false); ASSERT_EQ(ek, k); ASSERT_EQ(ev, v); @@ -62,8 +61,7 @@ TEST_F(SortCpuTests, test_linear_sort_by_val_1) { auto ev = NDArrayFactory::create('c', {10}, {0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5}); - NativeOps nativeOps; - nativeOps.sortByValue(nullptr, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false); + sortByValue(nullptr, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false); ASSERT_EQ(ek, k); ASSERT_EQ(ev, v); @@ -81,8 +79,7 @@ TEST_F(SortCpuTests, test_tad_sort_by_key_1) { int axis = 1; - NativeOps nativeOps; - nativeOps.sortTadByKey(nullptr, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false); + sortTadByKey(nullptr, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false); ASSERT_EQ(ek, k); ASSERT_EQ(ev, v); @@ -100,9 +97,8 @@ TEST_F(SortCpuTests, test_tad_sort_by_val_1) { int axis = 1; - NativeOps nativeOps; - nativeOps.sortTadByValue(nullptr, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false); + sortTadByValue(nullptr, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false); ASSERT_EQ(ek, k); ASSERT_EQ(ev, v); -} \ No newline at end of file +} diff --git a/libnd4j/tests_cpu/layers_tests/SortCudaTests.cu b/libnd4j/tests_cpu/layers_tests/SortCudaTests.cu index 71144527a..49c1f7a95 100644 --- a/libnd4j/tests_cpu/layers_tests/SortCudaTests.cu +++ b/libnd4j/tests_cpu/layers_tests/SortCudaTests.cu @@ -42,8 +42,7 @@ TEST_F(SortCudaTests, test_linear_sort_by_key_1) { Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; - NativeOps nativeOps; - nativeOps.sortByKey(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false); + sortByKey(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false); k.tickWriteDevice(); v.tickWriteDevice(); @@ -60,8 +59,7 @@ TEST_F(SortCudaTests, test_linear_sort_by_val_1) { Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; - NativeOps nativeOps; - nativeOps.sortByValue(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false); + sortByValue(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), false); k.tickWriteDevice(); v.tickWriteDevice(); @@ -78,8 +76,7 @@ TEST_F(SortCudaTests, test_linear_sort_by_val_2) { Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; - NativeOps nativeOps; - nativeOps.sortByValue(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), true); + sortByValue(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), true); k.tickWriteDevice(); v.tickWriteDevice(); k.printIndexedBuffer("KEYS"); @@ -97,8 +94,7 @@ TEST_F(SortCudaTests, test_tad_sort_by_key_1) { Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; int axis = 1; - NativeOps nativeOps; - nativeOps.sortTadByKey(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false); + sortTadByKey(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false); k.tickWriteDevice(); v.tickWriteDevice(); @@ -119,11 +115,10 @@ TEST_F(SortCudaTests, test_tad_sort_by_val_1) { Nd4jPointer extras[2] = {nullptr, LaunchContext::defaultContext()->getCudaStream()}; int axis = 1; - NativeOps nativeOps; - nativeOps.sortTadByValue(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false); + sortTadByValue(extras, k.buffer(), k.shapeInfo(), k.specialBuffer(), k.specialShapeInfo(), v.buffer(), v.shapeInfo(), v.specialBuffer(), v.specialShapeInfo(), &axis, 1, false); k.tickWriteDevice(); v.tickWriteDevice(); ASSERT_EQ(ek, k); ASSERT_EQ(ev, v); -} \ No newline at end of file +} diff --git a/libnd4j/tests_cpu/layers_tests/TypeCastTests.cpp b/libnd4j/tests_cpu/layers_tests/TypeCastTests.cpp index ea53cd906..6bab9cf6d 100644 --- a/libnd4j/tests_cpu/layers_tests/TypeCastTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/TypeCastTests.cpp @@ -58,9 +58,8 @@ TEST_F(TypeCastTests, Test_ConvertDtype_1) { float16 dst[5]; float16 exp[] = {(float16) 1.0f, (float16) 2.0f, (float16) 3.0f, (float16) 4.0f, (float16) 5.0f}; - NativeOps ops; - ops.convertTypes(nullptr, ND4J_FLOAT32, src, 5, ND4J_FLOAT16, dst); + convertTypes(nullptr, ND4J_FLOAT32, src, 5, ND4J_FLOAT16, dst); for (int e = 0; e < 5; e++) ASSERT_NEAR(exp[e], dst[e], (float16) 0.01f); -} \ No newline at end of file +}