Adapt the Java wrappers in ND4J generated with JavaCPP

master
Samuel Audet 2019-07-23 14:49:45 +09:00 committed by AlexDBlack
parent 780ae628a9
commit 8881bfe7aa
14 changed files with 4526 additions and 4510 deletions

View File

@ -28,58 +28,14 @@ import org.nd4j.linalg.api.buffer.Utf8Buffer;
* op execution on cpu * op execution on cpu
* @author Adam Gibson * @author Adam Gibson
*/ */
public abstract class NativeOps extends Pointer { public interface NativeOps {
public NativeOps(Pointer p) {
super(p);
}
public static int getCores(int totals) {
// that's special case for Xeon Phi
if (totals >= 256)
return 64;
int ht_off = totals / 2; // we count off HyperThreading without any excuses
if (ht_off <= 4)
return 4; // special case for Intel i5. and nobody likes i3 anyway
if (ht_off > 24) {
int rounds = 0;
while (ht_off > 24) { // we loop until final value gets below 24 cores, since that's reasonable threshold as of 2016
if (ht_off > 24) {
ht_off /= 2; // we dont' have any cpus that has higher number then 24 physical cores
rounds++;
}
}
// 20 threads is special case in this branch
if (ht_off == 20 && rounds < 2)
ht_off /= 2;
} else { // low-core models are known, but there's a gap, between consumer cpus and xeons
if (ht_off <= 6) {
// that's more likely consumer-grade cpu, so leave this value alone
return ht_off;
} else {
if (isOdd(ht_off)) // if that's odd number, it's final result
return ht_off;
// 20 threads & 16 threads are special case in this branch, where we go min value
if (ht_off == 20 || ht_off == 16)
ht_off /= 2;
}
}
return ht_off;
}
private static boolean isOdd(int value) {
return (value % 2 != 0);
}
/** /**
* This method allows you to specify minimal number of elements per thread/block during op call * This method allows you to specify minimal number of elements per thread/block during op call
* PLEASE NOTE: Changing this value might and will affect performance. * PLEASE NOTE: Changing this value might and will affect performance.
* *
* @param value * @param value
*/ */
public native void setElementThreshold(int value); void setElementThreshold(int value);
/** /**
* This method allows you to specify minimal number of TADs per thread/block during op call * This method allows you to specify minimal number of TADs per thread/block during op call
@ -87,7 +43,7 @@ public abstract class NativeOps extends Pointer {
* *
* @param value * @param value
*/ */
public abstract void setTADThreshold(int value); void setTADThreshold(int value);
/** /**
* @param opNum * @param opNum
@ -95,7 +51,7 @@ public abstract class NativeOps extends Pointer {
* @param xShapeInfo * @param xShapeInfo
* @param extraParams * @param extraParams
*/ */
public abstract void execIndexReduceScalar(PointerPointer extraPointers, void execIndexReduceScalar(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -117,7 +73,7 @@ public abstract class NativeOps extends Pointer {
* @param dimension * @param dimension
* @param dimensionLength * @param dimensionLength
*/ */
public abstract void execIndexReduce(PointerPointer extraPointers, void execIndexReduce(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -142,7 +98,7 @@ public abstract class NativeOps extends Pointer {
* @param dimension * @param dimension
* @param dimensionLength * @param dimensionLength
*/ */
public abstract void execBroadcast(PointerPointer extraPointers, void execBroadcast(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -159,7 +115,7 @@ public abstract class NativeOps extends Pointer {
Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape, Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape,
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape); Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
public abstract void execBroadcastBool(PointerPointer extraPointers, void execBroadcastBool(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -187,7 +143,7 @@ public abstract class NativeOps extends Pointer {
* @param resultShapeInfo * @param resultShapeInfo
* @param extraParams * @param extraParams
*/ */
public abstract void execPairwiseTransform(PointerPointer extraPointers, void execPairwiseTransform(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -203,7 +159,7 @@ public abstract class NativeOps extends Pointer {
@Cast("Nd4jLong *") LongPointer dresultShapeInfo, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
Pointer extraParams); Pointer extraParams);
public abstract void execPairwiseTransformBool(PointerPointer extraPointers, void execPairwiseTransformBool(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -227,7 +183,7 @@ public abstract class NativeOps extends Pointer {
* @param result * @param result
* @param resultShapeInfo * @param resultShapeInfo
*/ */
public abstract void execReduceFloat(PointerPointer extraPointers, void execReduceFloat(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -240,7 +196,7 @@ public abstract class NativeOps extends Pointer {
@Cast("Nd4jLong *") LongPointer dresultShapeInfo); @Cast("Nd4jLong *") LongPointer dresultShapeInfo);
public abstract void execReduceSame(PointerPointer extraPointers, void execReduceSame(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -253,7 +209,7 @@ public abstract class NativeOps extends Pointer {
@Cast("Nd4jLong *") LongPointer dresultShapeInfo); @Cast("Nd4jLong *") LongPointer dresultShapeInfo);
public abstract void execReduceBool(PointerPointer extraPointers, void execReduceBool(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -266,7 +222,7 @@ public abstract class NativeOps extends Pointer {
@Cast("Nd4jLong *") LongPointer dresultShapeInfo); @Cast("Nd4jLong *") LongPointer dresultShapeInfo);
public abstract void execReduceLong(PointerPointer extraPointers, void execReduceLong(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -286,7 +242,7 @@ public abstract class NativeOps extends Pointer {
* @param result * @param result
* @param resultShapeInfo * @param resultShapeInfo
*/ */
public abstract void execReduceFloat(PointerPointer extraPointers, void execReduceFloat2(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -301,7 +257,7 @@ public abstract class NativeOps extends Pointer {
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape); Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
public abstract void execReduceSame(PointerPointer extraPointers, void execReduceSame2(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -315,7 +271,7 @@ public abstract class NativeOps extends Pointer {
Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape, Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape,
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape); Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
public abstract void execReduceBool(PointerPointer extraPointers, void execReduceBool2(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -329,7 +285,7 @@ public abstract class NativeOps extends Pointer {
Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape, Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape,
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape); Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
public abstract void execReduceLong(PointerPointer extraPointers, void execReduceLong2(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, Pointer x,
@Cast("Nd4jLong *") LongPointer xShapeInfo, @Cast("Nd4jLong *") LongPointer xShapeInfo,
@ -353,7 +309,7 @@ public abstract class NativeOps extends Pointer {
* @param result * @param result
* @param resultShapeInfo * @param resultShapeInfo
*/ */
public abstract void execReduce3(PointerPointer extraPointers, void execReduce3(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -371,7 +327,7 @@ public abstract class NativeOps extends Pointer {
* @param y * @param y
* @param yShapeInfo * @param yShapeInfo
*/ */
public abstract void execReduce3Scalar(PointerPointer extraPointers, int opNum, void execReduce3Scalar(PointerPointer extraPointers, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
Pointer extraParamsVals, Pointer extraParamsVals,
@ -392,7 +348,7 @@ public abstract class NativeOps extends Pointer {
* @param dimension * @param dimension
* @param dimensionLength * @param dimensionLength
*/ */
public abstract void execReduce3(PointerPointer extraPointers, void execReduce3Tad(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -406,7 +362,7 @@ public abstract class NativeOps extends Pointer {
@Cast("Nd4jLong *") LongPointer tadOnlyShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets, @Cast("Nd4jLong *") LongPointer tadOnlyShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets,
@Cast("Nd4jLong *") LongPointer yTadOnlyShapeInfo, @Cast("Nd4jLong *") LongPointer yTadOffsets); @Cast("Nd4jLong *") LongPointer yTadOnlyShapeInfo, @Cast("Nd4jLong *") LongPointer yTadOffsets);
public abstract void execReduce3All(PointerPointer extraPointers, void execReduce3All(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -432,7 +388,7 @@ public abstract class NativeOps extends Pointer {
* @param scalar * @param scalar
* @param extraParams * @param extraParams
*/ */
public abstract void execScalar(PointerPointer extraPointers, void execScalar(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -442,7 +398,7 @@ public abstract class NativeOps extends Pointer {
Pointer dscalar, @Cast("Nd4jLong *") LongPointer dscalarShapeInfo, Pointer dscalar, @Cast("Nd4jLong *") LongPointer dscalarShapeInfo,
Pointer extraParams); Pointer extraParams);
public abstract void execScalarBool(PointerPointer extraPointers, void execScalarBool(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -459,7 +415,7 @@ public abstract class NativeOps extends Pointer {
* @param extraParams * @param extraParams
* @param biasCorrected * @param biasCorrected
*/ */
public abstract void execSummaryStatsScalar(PointerPointer extraPointers, void execSummaryStatsScalar(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -477,7 +433,7 @@ public abstract class NativeOps extends Pointer {
* @param resultShapeInfo * @param resultShapeInfo
* @param biasCorrected * @param biasCorrected
*/ */
public abstract void execSummaryStats(PointerPointer extraPointers, void execSummaryStats(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -496,7 +452,7 @@ public abstract class NativeOps extends Pointer {
* @param dimension * @param dimension
* @param dimensionLength * @param dimensionLength
*/ */
public abstract void execSummaryStats(PointerPointer extraPointers, void execSummaryStatsTad(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -519,7 +475,7 @@ public abstract class NativeOps extends Pointer {
* @param resultShapeInfo * @param resultShapeInfo
* @param extraParams * @param extraParams
*/ */
public abstract void execTransformFloat(PointerPointer extraPointers, void execTransformFloat(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -527,7 +483,7 @@ public abstract class NativeOps extends Pointer {
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo, Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
Pointer extraParams); Pointer extraParams);
public abstract void execTransformSame(PointerPointer extraPointers, void execTransformSame(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -535,7 +491,7 @@ public abstract class NativeOps extends Pointer {
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo, Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
Pointer extraParams); Pointer extraParams);
public abstract void execTransformStrict(PointerPointer extraPointers, void execTransformStrict(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -543,7 +499,7 @@ public abstract class NativeOps extends Pointer {
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo, Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
Pointer extraParams); Pointer extraParams);
public abstract void execTransformBool(PointerPointer extraPointers, void execTransformBool(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -551,7 +507,7 @@ public abstract class NativeOps extends Pointer {
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo, Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
Pointer extraParams); Pointer extraParams);
public abstract void execTransformAny(PointerPointer extraPointers, void execTransformAny(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -573,7 +529,7 @@ public abstract class NativeOps extends Pointer {
* @param dimension * @param dimension
* @param dimensionLength * @param dimensionLength
*/ */
public abstract void execScalar(PointerPointer extraPointers, void execScalarTad(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -587,7 +543,7 @@ public abstract class NativeOps extends Pointer {
@Cast("Nd4jLong *") LongPointer tadShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets, @Cast("Nd4jLong *") LongPointer tadShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets,
@Cast("Nd4jLong *") LongPointer tadShapeInfoZ, @Cast("Nd4jLong *") LongPointer tadOffsetsZ); @Cast("Nd4jLong *") LongPointer tadShapeInfoZ, @Cast("Nd4jLong *") LongPointer tadOffsetsZ);
public abstract void execScalarBool(PointerPointer extraPointers, void execScalarBoolTad(PointerPointer extraPointers,
int opNum, int opNum,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
@ -610,7 +566,7 @@ public abstract class NativeOps extends Pointer {
* @param input * @param input
* @param inputShapeInfo * @param inputShapeInfo
*/ */
public abstract void flatten(PointerPointer extraPointers, void flatten(PointerPointer extraPointers,
int offset, int offset,
char order, char order,
Pointer results, @Cast("Nd4jLong *") LongPointer resultShapeInfo, Pointer results, @Cast("Nd4jLong *") LongPointer resultShapeInfo,
@ -629,7 +585,7 @@ public abstract class NativeOps extends Pointer {
* @param tadPointers * @param tadPointers
* @param tadOffsets * @param tadOffsets
*/ */
public abstract void concat(PointerPointer extraPointers, void concat(PointerPointer extraPointers,
int dimension, int dimension,
int numArrays, int numArrays,
PointerPointer data, PointerPointer inputShapeInfo, PointerPointer data, PointerPointer inputShapeInfo,
@ -639,7 +595,7 @@ public abstract class NativeOps extends Pointer {
PointerPointer tadPointers, PointerPointer tadPointers,
PointerPointer tadOffsets); PointerPointer tadOffsets);
public abstract void specialConcat(PointerPointer extraPointers, void specialConcat(PointerPointer extraPointers,
int dimension, int dimension,
int numArrays, int numArrays,
PointerPointer data, PointerPointer inputShapeInfo, PointerPointer data, PointerPointer inputShapeInfo,
@ -653,99 +609,99 @@ public abstract class NativeOps extends Pointer {
* *
* @return * @return
*/ */
public abstract int ompGetMaxThreads(); int ompGetMaxThreads();
/** /**
* Gets the number of open mp threads * Gets the number of open mp threads
* *
* @return * @return
*/ */
public abstract int ompGetNumThreads(); int ompGetNumThreads();
/** /**
* Sets the number of openmp threads * Sets the number of openmp threads
* *
* @param threads * @param threads
*/ */
public abstract void setOmpNumThreads(int threads); void setOmpNumThreads(int threads);
/** /**
* Sets the minimal number of openmp threads for variative methods * Sets the minimal number of openmp threads for variative methods
* *
* @param threads * @param threads
*/ */
public abstract void setOmpMinThreads(int threads); void setOmpMinThreads(int threads);
/** /**
* NEVER EVER USE THIS METHOD OUTSIDE OF CUDA * NEVER EVER USE THIS METHOD OUTSIDE OF CUDA
*/ */
public abstract void initializeDevicesAndFunctions(); void initializeDevicesAndFunctions();
public abstract void initializeFunctions(PointerPointer functions); void initializeFunctions(PointerPointer functions);
public abstract Pointer mallocHost(long memorySize, int flags); Pointer mallocHost(long memorySize, int flags);
public abstract Pointer mallocDevice(long memorySize, int ptrToDeviceId, int flags); Pointer mallocDevice(long memorySize, int ptrToDeviceId, int flags);
public abstract int freeHost(Pointer pointer); int freeHost(Pointer pointer);
public abstract int freeDevice(Pointer pointer, int deviceId); int freeDevice(Pointer pointer, int deviceId);
public abstract Pointer createContext(); Pointer createContext();
public abstract Pointer createStream(); Pointer createStream();
public abstract Pointer createEvent(); Pointer createEvent();
public abstract int registerEvent(Pointer event, Pointer stream); int registerEvent(Pointer event, Pointer stream);
public abstract int destroyEvent(Pointer event); int destroyEvent(Pointer event);
public abstract int setDevice(int ptrToDeviceId); int setDevice(int ptrToDeviceId);
public abstract int getDevice(); int getDevice();
public abstract int streamSynchronize(Pointer stream); int streamSynchronize(Pointer stream);
public abstract int eventSynchronize(Pointer event); int eventSynchronize(Pointer event);
public abstract long getDeviceFreeMemory(int ptrToDeviceId); long getDeviceFreeMemory(int ptrToDeviceId);
public abstract long getDeviceFreeMemory(); long getDeviceFreeMemoryDefault();
public abstract long getDeviceTotalMemory(int ptrToDeviceId); long getDeviceTotalMemory(int ptrToDeviceId);
public abstract int getDeviceMajor(int ptrToDeviceId); int getDeviceMajor(int ptrToDeviceId);
public abstract int getDeviceMinor(int ptrToDeviceId); int getDeviceMinor(int ptrToDeviceId);
public abstract String getDeviceName(int ptrToDeviceId); String getDeviceName(int ptrToDeviceId);
public abstract int memcpy(Pointer dst, Pointer src, long size, int flags, Pointer reserved); int memcpySync(Pointer dst, Pointer src, long size, int flags, Pointer reserved);
public abstract int memcpyAsync(Pointer dst, Pointer src, long size, int flags, Pointer reserved); int memcpyAsync(Pointer dst, Pointer src, long size, int flags, Pointer reserved);
public abstract int memcpyConstantAsync(long dst, Pointer src, long size, int flags, Pointer reserved); int memcpyConstantAsync(long dst, Pointer src, long size, int flags, Pointer reserved);
public abstract int memset(Pointer dst, int value, long size, int flags, Pointer reserved); int memsetSync(Pointer dst, int value, long size, int flags, Pointer reserved);
public abstract int memsetAsync(Pointer dst, int value, long size, int flags, Pointer reserved); int memsetAsync(Pointer dst, int value, long size, int flags, Pointer reserved);
public abstract Pointer getConstantSpace(); Pointer getConstantSpace();
public abstract int getAvailableDevices(); int getAvailableDevices();
public abstract void enableDebugMode(boolean reallyEnable); void enableDebugMode(boolean reallyEnable);
public abstract void enableVerboseMode(boolean reallyEnable); void enableVerboseMode(boolean reallyEnable);
public abstract void setGridLimit(int gridSize); void setGridLimit(int gridSize);
public abstract Pointer tadOnlyShapeInfo(@Cast("Nd4jLong *") LongPointer shapeInfo, IntPointer dimension, int dimensionLength); Pointer tadOnlyShapeInfo(@Cast("Nd4jLong *") LongPointer shapeInfo, IntPointer dimension, int dimensionLength);
/////////////// ///////////////
public abstract void pullRows(PointerPointer extraPointers, void pullRows(PointerPointer extraPointers,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo, Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo,
@ -760,7 +716,7 @@ public abstract class NativeOps extends Pointer {
/////////////////////// ///////////////////////
public abstract void average(PointerPointer extraPointers, void average(PointerPointer extraPointers,
PointerPointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, PointerPointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
PointerPointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, PointerPointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo, Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo,
@ -771,7 +727,7 @@ public abstract class NativeOps extends Pointer {
/////////////////////// ///////////////////////
public abstract void accumulate(PointerPointer extraPointers, void accumulate(PointerPointer extraPointers,
PointerPointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, PointerPointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
PointerPointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, PointerPointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo, Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo,
@ -781,15 +737,15 @@ public abstract class NativeOps extends Pointer {
/////////////////////// ///////////////////////
public abstract void enableP2P(boolean reallyEnable); void enableP2P(boolean reallyEnable);
public abstract void checkP2P(); void checkP2P();
public abstract boolean isP2PAvailable(); boolean isP2PAvailable();
// //
public abstract void shuffle(PointerPointer extraPointers, void shuffle(PointerPointer extraPointers,
PointerPointer x, @Cast("Nd4jLong *") PointerPointer xShapeInfo, PointerPointer x, @Cast("Nd4jLong *") PointerPointer xShapeInfo,
PointerPointer dx, @Cast("Nd4jLong *") PointerPointer dxShapeInfo, PointerPointer dx, @Cast("Nd4jLong *") PointerPointer dxShapeInfo,
PointerPointer z, @Cast("Nd4jLong *") PointerPointer zShapeInfo, PointerPointer z, @Cast("Nd4jLong *") PointerPointer zShapeInfo,
@ -802,15 +758,15 @@ public abstract class NativeOps extends Pointer {
// opType conversion // opType conversion
public abstract void convertTypes(PointerPointer extras, int srcType, Pointer x, long N, int dstType, Pointer z); void convertTypes(PointerPointer extras, int srcType, Pointer x, long N, int dstType, Pointer z);
public abstract boolean isExperimentalEnabled(); boolean isExperimentalEnabled();
// GridOps // GridOps
/* /*
// MetaOps // MetaOps
public abstract void execMetaPredicateShape(PointerPointer extras, void execMetaPredicateShape(PointerPointer extras,
int opTypeA, int opNumA, int opTypeA, int opNumA,
int opTypeB, int opNumB, int opTypeB, int opNumB,
long N, long N,
@ -826,7 +782,7 @@ public abstract class NativeOps extends Pointer {
*/ */
///////////////////////// /////////////////////////
public abstract void execAggregate(PointerPointer extras, int opNum, void execAggregate(PointerPointer extras, int opNum,
PointerPointer arguments, PointerPointer arguments,
int numArguments, int numArguments,
@Cast("Nd4jLong **") PointerPointer shapes, @Cast("Nd4jLong **") PointerPointer shapes,
@ -839,20 +795,20 @@ public abstract class NativeOps extends Pointer {
int numRealArguments, int numRealArguments,
@Cast("nd4j::DataType") int dataType); @Cast("nd4j::DataType") int dataType);
public abstract void execAggregateBatch(PointerPointer extras, int numAggregates, int opNum, int maxArgs, void execAggregateBatch(PointerPointer extras, int numAggregates, int opNum, int maxArgs,
int maxShapes, int maxIntArrays, int maxIntArraySize, int maxIdx, int maxReals, int maxShapes, int maxIntArrays, int maxIntArraySize, int maxIdx, int maxReals,
Pointer ptrToArguments, @Cast("nd4j::DataType") int dataType); Pointer ptrToArguments, @Cast("nd4j::DataType") int dataType);
////////////// //////////////
public abstract void execRandom(PointerPointer extraPointers, void execRandom(PointerPointer extraPointers,
int opNum, int opNum,
Pointer state, Pointer state,
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeBuffer, Pointer z, @Cast("Nd4jLong *") LongPointer zShapeBuffer,
Pointer dz, @Cast("Nd4jLong *") LongPointer dzShapeBuffer, Pointer dz, @Cast("Nd4jLong *") LongPointer dzShapeBuffer,
Pointer extraArguments); Pointer extraArguments);
public abstract void execRandom(PointerPointer extraPointers, void execRandom3(PointerPointer extraPointers,
int opNum, int opNum,
Pointer state, Pointer state,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeBuffer, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeBuffer,
@ -863,7 +819,7 @@ public abstract class NativeOps extends Pointer {
Pointer dz, @Cast("Nd4jLong *") LongPointer dzShapeBuffer, Pointer dz, @Cast("Nd4jLong *") LongPointer dzShapeBuffer,
Pointer extraArguments); Pointer extraArguments);
public abstract void execRandom(PointerPointer extraPointers, void execRandom2(PointerPointer extraPointers,
int opNum, int opNum,
Pointer state, Pointer state,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeBuffer, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeBuffer,
@ -875,13 +831,13 @@ public abstract class NativeOps extends Pointer {
//////////////////// ////////////////////
public abstract Pointer initRandom(PointerPointer extraPointers, long seed, long numberOfElements, Pointer pointerToBuffer); Pointer initRandom(PointerPointer extraPointers, long seed, long numberOfElements, Pointer pointerToBuffer);
public abstract void refreshBuffer(PointerPointer extraPointers, long seed, Pointer pointer); void refreshBuffer(PointerPointer extraPointers, long seed, Pointer pointer);
public abstract void reSeedBuffer(PointerPointer extraPointers, long seed, Pointer pointer); void reSeedBuffer(PointerPointer extraPointers, long seed, Pointer pointer);
public abstract void destroyRandom(Pointer pointer); void destroyRandom(Pointer pointer);
/** /**
@ -893,7 +849,7 @@ public abstract class NativeOps extends Pointer {
* @param wordSize the word size (4 for float, 8 for doubles) * @param wordSize the word size (4 for float, 8 for doubles)
* @return a pointer to a numpy array * @return a pointer to a numpy array
*/ */
public abstract Pointer numpyFromNd4j(Pointer data, Pointer shapeBuffer, long wordSize); Pointer numpyFromNd4j(Pointer data, Pointer shapeBuffer, long wordSize);
/** /**
@ -903,14 +859,14 @@ public abstract class NativeOps extends Pointer {
* to get the length for * to get the length for
* @return * @return
*/ */
public abstract int elementSizeForNpyArrayHeader(Pointer npyArray); int elementSizeForNpyArrayHeader(Pointer npyArray);
/** /**
* @param npyArrayStruct * @param npyArrayStruct
* @return * @return
*/ */
public abstract Pointer dataPointForNumpyStruct(Pointer npyArrayStruct); Pointer dataPointForNumpyStruct(Pointer npyArrayStruct);
/** /**
@ -921,7 +877,7 @@ public abstract class NativeOps extends Pointer {
* @param wordSize the word size * @param wordSize the word size
* @return * @return
*/ */
public abstract Pointer numpyHeaderForNd4j(Pointer data, Pointer shapeBuffer, long wordSize, LongPointer length); Pointer numpyHeaderForNd4j(Pointer data, Pointer shapeBuffer, long wordSize, LongPointer length);
/** /**
* Load numpy from a header * Load numpy from a header
@ -930,13 +886,13 @@ public abstract class NativeOps extends Pointer {
* @param data the header data to parse * @param data the header data to parse
* @return a pointer to a numpy cnpy:NpyArray struct * @return a pointer to a numpy cnpy:NpyArray struct
*/ */
public abstract Pointer loadNpyFromHeader(Pointer data); Pointer loadNpyFromHeader(Pointer data);
/** /**
* @param npyArray * @param npyArray
* @return * @return
*/ */
public abstract Pointer dataPointForNumpyHeader(Pointer npyArray); Pointer dataPointForNumpyHeader(Pointer npyArray);
/** /**
* Get the shape buffer from a * Get the shape buffer from a
@ -946,7 +902,7 @@ public abstract class NativeOps extends Pointer {
* @param npyArray * @param npyArray
* @return * @return
*/ */
public abstract Pointer shapeBufferForNumpyHeader(Pointer npyArray); Pointer shapeBufferForNumpyHeader(Pointer npyArray);
/** /**
* Used in {@link org.nd4j.linalg.factory.NDArrayFactory#createFromNpyPointer(Pointer)} * Used in {@link org.nd4j.linalg.factory.NDArrayFactory#createFromNpyPointer(Pointer)}
@ -956,7 +912,7 @@ public abstract class NativeOps extends Pointer {
* @param npyArray the pointer to the numpy array to use * @param npyArray the pointer to the numpy array to use
* @return the pointer for the numpy array * @return the pointer for the numpy array
*/ */
public abstract Pointer dataPointForNumpy(Pointer npyArray); Pointer dataPointForNumpy(Pointer npyArray);
/** /**
* Get a shape buffer for a numpy array. * Get a shape buffer for a numpy array.
@ -965,7 +921,7 @@ public abstract class NativeOps extends Pointer {
* @param npyArray the numpy array to get the shape buffer for * @param npyArray the numpy array to get the shape buffer for
* @return a pointer representing the shape buffer for numpy * @return a pointer representing the shape buffer for numpy
*/ */
public abstract Pointer shapeBufferForNumpy(Pointer npyArray); Pointer shapeBufferForNumpy(Pointer npyArray);
/** /**
* Thie method releases numpy pointer * Thie method releases numpy pointer
@ -974,7 +930,7 @@ public abstract class NativeOps extends Pointer {
* *
* @param npyArray * @param npyArray
*/ */
public abstract void releaseNumpy(Pointer npyArray); void releaseNumpy(Pointer npyArray);
/** /**
@ -984,7 +940,7 @@ public abstract class NativeOps extends Pointer {
* @param path the path to the file * @param path the path to the file
* @return * @return
*/ */
public abstract Pointer numpyFromFile(BytePointer path); Pointer numpyFromFile(BytePointer path);
/** /**
@ -994,7 +950,7 @@ public abstract class NativeOps extends Pointer {
* @param buffer the buffer pointer to check * @param buffer the buffer pointer to check
* @return * @return
*/ */
public abstract int lengthForShapeBufferPointer(Pointer buffer); int lengthForShapeBufferPointer(Pointer buffer);
/** /**
* Calculate the element size * Calculate the element size
@ -1004,7 +960,7 @@ public abstract class NativeOps extends Pointer {
* element size for * element size for
* @return the element size for a given array * @return the element size for a given array
*/ */
public abstract int elementSizeForNpyArray(Pointer npyArray); int elementSizeForNpyArray(Pointer npyArray);
/** /**
@ -1013,31 +969,31 @@ public abstract class NativeOps extends Pointer {
* @param address the address to get the pointer * @param address the address to get the pointer
* @return the pointer for the given address * @return the pointer for the given address
*/ */
public abstract Pointer pointerForAddress(long address); Pointer pointerForAddress(long address);
////// NPZ /////// ////// NPZ ///////
public abstract Pointer mapFromNpzFile(BytePointer path); Pointer mapFromNpzFile(BytePointer path);
public abstract int getNumNpyArraysInMap(Pointer map); int getNumNpyArraysInMap(Pointer map);
public abstract String getNpyArrayNameFromMap(Pointer map, int index); String getNpyArrayNameFromMap(Pointer map, int index);
public abstract Pointer getNpyArrayFromMap(Pointer map, int index); Pointer getNpyArrayFromMap(Pointer map, int index);
public abstract Pointer getNpyArrayData(Pointer npArray); Pointer getNpyArrayData(Pointer npArray);
public abstract LongPointer getNpyArrayShape(Pointer npArray); LongPointer getNpyArrayShape(Pointer npArray);
public abstract int getNpyArrayRank(Pointer npArray); int getNpyArrayRank(Pointer npArray);
public abstract char getNpyArrayOrder(Pointer npArray); char getNpyArrayOrder(Pointer npArray);
public abstract int getNpyArrayElemSize(Pointer npArray); int getNpyArrayElemSize(Pointer npArray);
/////// ///////
public abstract void tear(PointerPointer extras, void tear(PointerPointer extras,
Pointer tensor, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer tensor, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dtensor, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dtensor, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
PointerPointer targets, @Cast("Nd4jLong *") LongPointer zShapeInfo, PointerPointer targets, @Cast("Nd4jLong *") LongPointer zShapeInfo,
@ -1045,26 +1001,26 @@ public abstract class NativeOps extends Pointer {
@Cast("Nd4jLong *") LongPointer tadOffsets); @Cast("Nd4jLong *") LongPointer tadOffsets);
public abstract long encodeBitmap(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, long N, IntPointer dz, float threshold); long encodeBitmap(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, long N, IntPointer dz, float threshold);
public abstract void decodeBitmap(PointerPointer extraPointers, Pointer dx, long N, Pointer dz, LongPointer zShapeInfo); void decodeBitmap(PointerPointer extraPointers, Pointer dx, long N, Pointer dz, LongPointer zShapeInfo);
public abstract void encodeThresholdP1(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, long N, IntPointer dz, float threshold); void encodeThresholdP1(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, long N, IntPointer dz, float threshold);
public abstract void encodeThresholdP2Int(PointerPointer extraPointers, IntPointer dx, long N, IntPointer dz); void encodeThresholdP2Int(PointerPointer extraPointers, IntPointer dx, long N, IntPointer dz);
public abstract void encodeThresholdP3(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, IntPointer offsets, long N, IntPointer dz); void encodeThresholdP3(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, IntPointer offsets, long N, IntPointer dz);
public abstract void decodeThreshold(PointerPointer extraPointers, Pointer dx, long N, Pointer dz, LongPointer zShapeInfo); void decodeThreshold(PointerPointer extraPointers, Pointer dx, long N, Pointer dz, LongPointer zShapeInfo);
public abstract void sort(PointerPointer extraPointers, void sort(PointerPointer extraPointers,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
boolean descending); boolean descending);
public abstract void sortTad(PointerPointer extraPointers, void sortTad(PointerPointer extraPointers,
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo, Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo, Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
IntPointer dimension, IntPointer dimension,
@ -1074,81 +1030,81 @@ public abstract class NativeOps extends Pointer {
boolean descending); boolean descending);
public abstract void sortCooIndices(PointerPointer extraPointers, @Cast("Nd4jLong *") LongPointer indices, Pointer values, long length, int rank); void sortCooIndices(PointerPointer extraPointers, @Cast("Nd4jLong *") LongPointer indices, Pointer values, long length, int rank);
public abstract LongPointer mmapFile(PointerPointer extraPointers, String fileName, long length); LongPointer mmapFile(PointerPointer extraPointers, String fileName, long length);
public abstract void munmapFile(PointerPointer extraPointers, LongPointer ptrMap, long length); void munmapFile(PointerPointer extraPointers, LongPointer ptrMap, long length);
public abstract ResultWrapperAbstraction executeFlatGraph(PointerPointer extraPointers, Pointer flatBufferPointer); ResultWrapperAbstraction executeFlatGraph(PointerPointer extraPointers, Pointer flatBufferPointer);
public abstract String getAllCustomOps(); String getAllCustomOps();
public abstract String getAllOperations(); String getAllOperations();
public abstract int execCustomOp(PointerPointer extraPointers, long opHashCode, Pointer context); int execCustomOp2(PointerPointer extraPointers, long opHashCode, Pointer context);
public abstract int execCustomOp(PointerPointer extraPointers, long opHashCode, PointerPointer inputBuffers, PointerPointer inputShapes, int numInput, PointerPointer outputBuffers, PointerPointer outputShapes, int numOutputs, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs, @Cast("bool *") BooleanPointer bArgs, int numBArgs, boolean isInplace); int execCustomOp(PointerPointer extraPointers, long opHashCode, PointerPointer inputBuffers, PointerPointer inputShapes, int numInput, PointerPointer outputBuffers, PointerPointer outputShapes, int numOutputs, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs, @Cast("bool *") BooleanPointer bArgs, int numBArgs, boolean isInplace);
public abstract Pointer calculateOutputShapes(PointerPointer extraPointers, long hash, PointerPointer inputShapes, int numInputShapes, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs); Pointer calculateOutputShapes(PointerPointer extraPointers, long hash, PointerPointer inputShapes, int numInputShapes, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs);
public abstract Pointer calculateOutputShapes(PointerPointer extraPointers, long hash, PointerPointer inputBunffers, PointerPointer inputShapes, int numInputShapes, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs, @Cast("bool *") BooleanPointer bArgs, int numBArgs); Pointer calculateOutputShapes2(PointerPointer extraPointers, long hash, PointerPointer inputBunffers, PointerPointer inputShapes, int numInputShapes, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs, @Cast("bool *") BooleanPointer bArgs, int numBArgs);
public abstract int registerGraph(PointerPointer extraPointers, long graphId, Pointer flatBufferPointer); int registerGraph(PointerPointer extraPointers, long graphId, Pointer flatBufferPointer);
public abstract Pointer executeStoredGraph(PointerPointer extraPointers, long graphId, PointerPointer inputBuffers, PointerPointer inputShapes, IntPointer inputIndices, int numInputs); Pointer executeStoredGraph(PointerPointer extraPointers, long graphId, PointerPointer inputBuffers, PointerPointer inputShapes, IntPointer inputIndices, int numInputs);
public abstract void deleteResultWrapper(Pointer ptr); void deleteResultWrapper(Pointer ptr);
public abstract void deleteShapeList(Pointer ptr); void deleteShapeList(Pointer ptr);
public abstract int unregisterGraph(PointerPointer extraPointers, long graphId); int unregisterGraph(PointerPointer extraPointers, long graphId);
public abstract void deleteIntArray(Pointer pointer); void deleteIntArray(Pointer pointer);
public abstract void deleteLongArray(Pointer pointer); void deleteLongArray(Pointer pointer);
public abstract void deletePointerArray(Pointer pointer); void deletePointerArray(Pointer pointer);
public abstract void deleteNPArrayStruct(Pointer pointer); void deleteNPArrayStruct(Pointer pointer);
public abstract void deleteNPArrayMap(Pointer pointer); void deleteNPArrayMap(Pointer pointer);
public abstract void deleteVariablesSet(Pointer pointer); void deleteVariablesSet(Pointer pointer);
// GraphState creation // GraphState creation
public abstract Pointer getGraphState(long id); Pointer getGraphState(long id);
public abstract void deleteShapeBuffer(Pointer state); void deleteShapeBuffer(Pointer state);
public abstract void deleteTadPack(Pointer pointer); void deleteTadPack(Pointer pointer);
public abstract void deleteGraphState(Pointer state); void deleteGraphState(Pointer state);
public abstract int estimateThreshold(PointerPointer extraPointers, Pointer x, LongPointer xShapeInfo, int N, float threshold); int estimateThreshold(PointerPointer extraPointers, Pointer x, LongPointer xShapeInfo, int N, float threshold);
// this method executes op that requires scope to be present: if/while/cond/whatever // this method executes op that requires scope to be present: if/while/cond/whatever
public abstract int execCustomOpWithScope(PointerPointer extraPointers, Pointer state, long opHash, long[] scopes, int numScopes, PointerPointer inputBuffers, PointerPointer inputShapes, int numInputs, PointerPointer outputBuffers, PointerPointer outputShapes, int numOutputs); int execCustomOpWithScope(PointerPointer extraPointers, Pointer state, long opHash, long[] scopes, int numScopes, PointerPointer inputBuffers, PointerPointer inputShapes, int numInputs, PointerPointer outputBuffers, PointerPointer outputShapes, int numOutputs);
public abstract void scatterUpdate(PointerPointer extraPointers, int opCode, int numOfUpdates, void scatterUpdate(PointerPointer extraPointers, int opCode, int numOfUpdates,
Pointer hX, @Cast("Nd4jLong *") LongPointer hXShapeInfo, @Cast("Nd4jLong *") LongPointer hxOffsets, Pointer hX, @Cast("Nd4jLong *") LongPointer hXShapeInfo, @Cast("Nd4jLong *") LongPointer hxOffsets,
Pointer dX, @Cast("Nd4jLong *") LongPointer dXShapeInfo, @Cast("Nd4jLong *") LongPointer dxOffsets, Pointer dX, @Cast("Nd4jLong *") LongPointer dXShapeInfo, @Cast("Nd4jLong *") LongPointer dxOffsets,
Pointer hY, @Cast("Nd4jLong *") LongPointer hYShapeInfo, @Cast("Nd4jLong *") LongPointer hyOffsets, Pointer hY, @Cast("Nd4jLong *") LongPointer hYShapeInfo, @Cast("Nd4jLong *") LongPointer hyOffsets,
Pointer dY, @Cast("Nd4jLong *") LongPointer dYShapeInfo, @Cast("Nd4jLong *") LongPointer dyOffsets, Pointer dY, @Cast("Nd4jLong *") LongPointer dYShapeInfo, @Cast("Nd4jLong *") LongPointer dyOffsets,
IntPointer hIndices, IntPointer dIndices); IntPointer hIndices, IntPointer dIndices);
//public abstract void fillUtf8String(PointerPointer extraPointers, String[] string, int numStrings, Pointer buffer); //void fillUtf8String(PointerPointer extraPointers, String[] string, int numStrings, Pointer buffer);
public abstract Pointer createUtf8String(PointerPointer extraPointers, String string, int length); Pointer createUtf8String(PointerPointer extraPointers, String string, int length);
public abstract void deleteUtf8String(PointerPointer extraPointers, Pointer ptr); void deleteUtf8String(PointerPointer extraPointers, Pointer ptr);
public abstract void inspectArray(PointerPointer extraPointers, Pointer buffer, @Cast("Nd4jLong *") LongPointer shapeInfo, Pointer specialBuffer, @Cast("Nd4jLong *") LongPointer specialShapeInfo, @Cast("nd4j::DebugInfo *") Pointer debugInfo); void inspectArray(PointerPointer extraPointers, Pointer buffer, @Cast("Nd4jLong *") LongPointer shapeInfo, Pointer specialBuffer, @Cast("Nd4jLong *") LongPointer specialShapeInfo, @Cast("nd4j::DebugInfo *") Pointer debugInfo);
/** /**
* this method tries to read numBytes bytes from buffer to provoke crash in certain scenarios * this method tries to read numBytes bytes from buffer to provoke crash in certain scenarios
*/ */
public abstract void tryPointer(Pointer extras, Pointer buffer, int numBytesToRead); void tryPointer(Pointer extras, Pointer buffer, int numBytesToRead);
/** /**
@ -1158,17 +1114,17 @@ public abstract class NativeOps extends Pointer {
* @param numpyHeader * @param numpyHeader
* @return * @return
*/ */
public abstract int dataTypeFromNpyHeader(Pointer numpyHeader); int dataTypeFromNpyHeader(Pointer numpyHeader);
public abstract Pointer shapeBuffer(int rank, @Cast("Nd4jLong *") LongPointer shape, @Cast("Nd4jLong *") LongPointer strides, int dtype, char order, long ews, boolean empty); Pointer shapeBuffer(int rank, @Cast("Nd4jLong *") LongPointer shape, @Cast("Nd4jLong *") LongPointer strides, int dtype, char order, long ews, boolean empty);
public abstract Pointer constantBuffer(int dtype, DoublePointer data, int length); Pointer constantBufferDouble(int dtype, DoublePointer data, int length);
public abstract Pointer constantBuffer(int dtype, @Cast("Nd4jLong *") LongPointer data, int length); Pointer constantBufferLong(int dtype, @Cast("Nd4jLong *") LongPointer data, int length);
public abstract String runLightBenchmarkSuit(boolean printOut); String runLightBenchmarkSuit(boolean printOut);
public abstract String runFullBenchmarkSuit(boolean printOut); String runFullBenchmarkSuit(boolean printOut);
public abstract long getCachedMemory(int deviceId); long getCachedMemory(int deviceId);
} }

View File

@ -36,6 +36,46 @@ public class NativeOpsHolder {
@Getter @Getter
private final NativeOps deviceNativeOps; private final NativeOps deviceNativeOps;
public static int getCores(int totals) {
// that's special case for Xeon Phi
if (totals >= 256)
return 64;
int ht_off = totals / 2; // we count off HyperThreading without any excuses
if (ht_off <= 4)
return 4; // special case for Intel i5. and nobody likes i3 anyway
if (ht_off > 24) {
int rounds = 0;
while (ht_off > 24) { // we loop until final value gets below 24 cores, since that's reasonable threshold as of 2016
if (ht_off > 24) {
ht_off /= 2; // we dont' have any cpus that has higher number then 24 physical cores
rounds++;
}
}
// 20 threads is special case in this branch
if (ht_off == 20 && rounds < 2)
ht_off /= 2;
} else { // low-core models are known, but there's a gap, between consumer cpus and xeons
if (ht_off <= 6) {
// that's more likely consumer-grade cpu, so leave this value alone
return ht_off;
} else {
if (isOdd(ht_off)) // if that's odd number, it's final result
return ht_off;
// 20 threads & 16 threads are special case in this branch, where we go min value
if (ht_off == 20 || ht_off == 16)
ht_off /= 2;
}
}
return ht_off;
}
private static boolean isOdd(int value) {
return (value % 2 != 0);
}
private NativeOpsHolder() { private NativeOpsHolder() {
try { try {
Properties props = Nd4jContext.getInstance().getConf(); Properties props = Nd4jContext.getInstance().getConf();
@ -57,7 +97,7 @@ public class NativeOpsHolder {
deviceNativeOps.setOmpNumThreads(Math.max(1, cores / chips)); deviceNativeOps.setOmpNumThreads(Math.max(1, cores / chips));
} else } else
deviceNativeOps.setOmpNumThreads( deviceNativeOps.setOmpNumThreads(
deviceNativeOps.getCores(Runtime.getRuntime().availableProcessors())); getCores(Runtime.getRuntime().availableProcessors()));
} }
//deviceNativeOps.setOmpNumThreads(4); //deviceNativeOps.setOmpNumThreads(4);

View File

@ -48,7 +48,7 @@ public abstract class Nd4jBlas implements Blas {
if (cores > 0 && chips > 0) if (cores > 0 && chips > 0)
numThreads = Math.max(1, cores / chips); numThreads = Math.max(1, cores / chips);
else else
numThreads = NativeOps.getCores(Runtime.getRuntime().availableProcessors()); numThreads = NativeOpsHolder.getCores(Runtime.getRuntime().availableProcessors());
setMaxThreads(numThreads); setMaxThreads(numThreads);
} }
log.info("Number of threads used for BLAS: {}", getMaxThreads()); log.info("Number of threads used for BLAS: {}", getMaxThreads());

View File

@ -318,7 +318,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
} else { } else {
nativeOps.execSummaryStats(xShapeInfoHostPointer, op.opNum(), nativeOps.execSummaryStatsTad(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
@ -360,7 +360,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context)); null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context));
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y()); AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
} else { } else {
nativeOps.execReduce3(xShapeInfoHostPointer, op.opNum(), nativeOps.execReduce3Tad(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostYShapeInfo, AtomicAllocator.getInstance().getPointer(op.y(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), null, (LongPointer) hostYShapeInfo, AtomicAllocator.getInstance().getPointer(op.y(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context),
@ -408,7 +408,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
} else { } else {
switch (op.getOpType()) { switch (op.getOpType()) {
case REDUCE_FLOAT: case REDUCE_FLOAT:
nativeOps.execReduceFloat(xShapeInfoHostPointer, op.opNum(), nativeOps.execReduceFloat2(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
@ -418,7 +418,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
null); null);
break; break;
case REDUCE_BOOL: case REDUCE_BOOL:
nativeOps.execReduceBool(xShapeInfoHostPointer, op.opNum(), nativeOps.execReduceBool2(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
@ -428,7 +428,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
null); null);
break; break;
case REDUCE_SAME: case REDUCE_SAME:
nativeOps.execReduceSame(xShapeInfoHostPointer, op.opNum(), nativeOps.execReduceSame2(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
@ -438,7 +438,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
null); null);
break; break;
case REDUCE_LONG: case REDUCE_LONG:
nativeOps.execReduceLong(xShapeInfoHostPointer, op.opNum(), nativeOps.execReduceLong2(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
@ -1027,7 +1027,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
if (op.y() != null) { if (op.y() != null) {
val y = AtomicAllocator.getInstance().getPointer(op.y(), context); val y = AtomicAllocator.getInstance().getPointer(op.y(), context);
val yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context); val yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
nativeOps.execReduce3(xShapeInfoHostPointer, op.opNum(), nativeOps.execReduce3Tad(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo, null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo,
@ -1037,7 +1037,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
dimensionPointer, null, (LongPointer) devTadShapeInfo, (LongPointer) devTadOffsets, (LongPointer) yDevTadShapeInfo, (LongPointer) yDevTadOffsets); dimensionPointer, null, (LongPointer) devTadShapeInfo, (LongPointer) devTadOffsets, (LongPointer) yDevTadShapeInfo, (LongPointer) yDevTadOffsets);
} else { } else {
if (op instanceof Variance) { if (op instanceof Variance) {
nativeOps.execSummaryStats(xShapeInfoHostPointer, op.opNum(), nativeOps.execSummaryStatsTad(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo, null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
@ -1051,7 +1051,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
} else { } else {
switch (op.getOpType()) { switch (op.getOpType()) {
case REDUCE_FLOAT: case REDUCE_FLOAT:
nativeOps.execReduceFloat(xShapeInfoHostPointer, op.opNum(), nativeOps.execReduceFloat2(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo, null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
@ -1061,7 +1061,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
null); null);
break; break;
case REDUCE_SAME: case REDUCE_SAME:
nativeOps.execReduceSame(xShapeInfoHostPointer, op.opNum(), nativeOps.execReduceSame2(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo, null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
@ -1071,7 +1071,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
null); null);
break; break;
case REDUCE_BOOL: case REDUCE_BOOL:
nativeOps.execReduceBool(xShapeInfoHostPointer, op.opNum(), nativeOps.execReduceBool2(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo, null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
@ -1081,7 +1081,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
null); null);
break; break;
case REDUCE_LONG: case REDUCE_LONG:
nativeOps.execReduceLong(xShapeInfoHostPointer, op.opNum(), nativeOps.execReduceLong2(xShapeInfoHostPointer, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
extraArgs, extraArgs,
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo, null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
@ -1159,7 +1159,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
switch (op.getOpType()) { switch (op.getOpType()) {
case SCALAR: case SCALAR:
nativeOps.execScalar(extraPointers, op.opNum(), nativeOps.execScalarTad(extraPointers, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo, null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo, null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo,
@ -1172,7 +1172,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
(LongPointer) devTadShapeInfoZ, (LongPointer) devTadOffsetsZ); (LongPointer) devTadShapeInfoZ, (LongPointer) devTadOffsetsZ);
break; break;
case SCALAR_BOOL: case SCALAR_BOOL:
nativeOps.execScalarBool(extraPointers, op.opNum(), nativeOps.execScalarBoolTad(extraPointers, op.opNum(),
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo, null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo, null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo, null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo,
@ -1777,7 +1777,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
if (op.x() != null && op.y() != null && op.z() != null) { if (op.x() != null && op.y() != null && op.z() != null) {
// triple arg call // triple arg call
nativeOps.execRandom(extraZZ, op.opNum(), rng.getStatePointer(), // rng state ptr nativeOps.execRandom3(extraZZ, op.opNum(), rng.getStatePointer(), // rng state ptr
null, (LongPointer) hostXShapeInfo, AtomicAllocator.getInstance().getPointer(op.x(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context), null, (LongPointer) hostXShapeInfo, AtomicAllocator.getInstance().getPointer(op.x(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context),
null, (LongPointer) hostYShapeInfo, AtomicAllocator.getInstance().getPointer(op.y(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), null, (LongPointer) hostYShapeInfo, AtomicAllocator.getInstance().getPointer(op.y(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context),
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
@ -1785,7 +1785,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
} else if (op.x() != null && op.z() != null) { } else if (op.x() != null && op.z() != null) {
//double arg call //double arg call
nativeOps.execRandom(extraZZ, op.opNum(), rng.getStatePointer(), // rng state ptr nativeOps.execRandom2(extraZZ, op.opNum(), rng.getStatePointer(), // rng state ptr
null, (LongPointer) hostXShapeInfo, AtomicAllocator.getInstance().getPointer(op.x(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context), null, (LongPointer) hostXShapeInfo, AtomicAllocator.getInstance().getPointer(op.x(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context),
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(op.z().dataType()),context)); AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(op.z().dataType()),context));
@ -2208,7 +2208,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
for (val t: op.tArgs()) for (val t: op.tArgs())
tArgs.put(cnt++, (float) t); tArgs.put(cnt++, (float) t);
val ptrptr = (Nd4jCuda.ShapeList) nativeOps.calculateOutputShapes(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length, bArgs, op.numBArguments()); val ptrptr = (Nd4jCuda.ShapeList) nativeOps.calculateOutputShapes2(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length, bArgs, op.numBArguments());
if (ptrptr == null) if (ptrptr == null)
throw new RuntimeException(); throw new RuntimeException();
@ -2539,7 +2539,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
val ctx = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); val ctx = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
((CudaOpContext) context).setCudaStream(ctx.getOldStream(), ctx.getBufferReduction(), ctx.getBufferAllocation()); ((CudaOpContext) context).setCudaStream(ctx.getOldStream(), ctx.getBufferReduction(), ctx.getBufferAllocation());
nativeOps.execCustomOp(null, op.opHash(), context.contextPointer()); nativeOps.execCustomOp2(null, op.opHash(), context.contextPointer());
if (context.getOutputArrays().isEmpty()) if (context.getOutputArrays().isEmpty())
return new INDArray[0]; return new INDArray[0];
@ -2607,7 +2607,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
@Override @Override
public DataBuffer createConstantBuffer(long[] values, DataType desiredType) { public DataBuffer createConstantBuffer(long[] values, DataType desiredType) {
val dbf = (Nd4jCuda.ConstantDataBuffer) nativeOps.constantBuffer(desiredType.toInt(), new LongPointer(values), values.length); val dbf = (Nd4jCuda.ConstantDataBuffer) nativeOps.constantBufferLong(desiredType.toInt(), new LongPointer(values), values.length);
val buffer = Nd4j.createBuffer(dbf.primary(), dbf.special(), values.length, desiredType); val buffer = Nd4j.createBuffer(dbf.primary(), dbf.special(), values.length, desiredType);
buffer.setConstant(true); buffer.setConstant(true);
@ -2617,7 +2617,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
@Override @Override
public DataBuffer createConstantBuffer(double[] values, DataType desiredType) { public DataBuffer createConstantBuffer(double[] values, DataType desiredType) {
val dbf = (Nd4jCuda.ConstantDataBuffer) nativeOps.constantBuffer(desiredType.toInt(), new DoublePointer(values), values.length); val dbf = (Nd4jCuda.ConstantDataBuffer) nativeOps.constantBufferDouble(desiredType.toInt(), new DoublePointer(values), values.length);
val buffer = Nd4j.createBuffer(dbf.primary(), dbf.special(), values.length, desiredType); val buffer = Nd4j.createBuffer(dbf.primary(), dbf.special(), values.length, desiredType);
buffer.setConstant(true); buffer.setConstant(true);

View File

@ -0,0 +1,20 @@
/*******************************************************************************
* Copyright (c) 2015-2019 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.nd4j.nativeblas;
public abstract class Nd4jCudaHelper extends Nd4jCudaPresets implements NativeOps {
}

View File

@ -26,7 +26,7 @@ import org.bytedeco.javacpp.tools.InfoMapper;
* *
* @author saudet * @author saudet
*/ */
@Properties(target = "org.nd4j.nativeblas.Nd4jCuda", @Properties(target = "org.nd4j.nativeblas.Nd4jCuda", helper = "org.nd4j.nativeblas.Nd4jCudaHelper",
value = {@Platform(define = "LIBND4J_ALL_OPS", include = { value = {@Platform(define = "LIBND4J_ALL_OPS", include = {
"array/DataType.h", "array/DataType.h",
"array/ConstantDescriptor.h", "array/ConstantDescriptor.h",
@ -112,7 +112,7 @@ public class Nd4jCudaPresets implements InfoMapper {
public void map(InfoMap infoMap) { public void map(InfoMap infoMap) {
infoMap.put(new Info("thread_local", "ND4J_EXPORT", "INLINEDEF", "CUBLASWINAPI", "FORCEINLINE", infoMap.put(new Info("thread_local", "ND4J_EXPORT", "INLINEDEF", "CUBLASWINAPI", "FORCEINLINE",
"_CUDA_H", "_CUDA_D", "_CUDA_G", "_CUDA_HD", "LIBND4J_ALL_OPS", "NOT_EXCLUDED").cppTypes().annotations()) "_CUDA_H", "_CUDA_D", "_CUDA_G", "_CUDA_HD", "LIBND4J_ALL_OPS", "NOT_EXCLUDED").cppTypes().annotations())
.put(new Info("NativeOps").base("org.nd4j.nativeblas.NativeOps")) .put(new Info("NativeOps.h").objectify())
.put(new Info("const char").valueTypes("byte").pointerTypes("@Cast(\"char*\") String", .put(new Info("const char").valueTypes("byte").pointerTypes("@Cast(\"char*\") String",
"@Cast(\"char*\") BytePointer")) "@Cast(\"char*\") BytePointer"))
.put(new Info("char").valueTypes("char").pointerTypes("@Cast(\"char*\") BytePointer", .put(new Info("char").valueTypes("char").pointerTypes("@Cast(\"char*\") BytePointer",

View File

@ -20,7 +20,7 @@ dtype = float
complex.double.class = org.nd4j.linalg.jcublas.complex.ComplexDouble complex.double.class = org.nd4j.linalg.jcublas.complex.ComplexDouble
complex.float.class = org.nd4j.linalg.jcublas.complex.ComplexFloat complex.float.class = org.nd4j.linalg.jcublas.complex.ComplexFloat
blas.ops= org.nd4j.linalg.jcublas.JCublasWrapper blas.ops= org.nd4j.linalg.jcublas.JCublasWrapper
native.ops= org.nd4j.nativeblas.Nd4jCuda$NativeOps native.ops= org.nd4j.nativeblas.Nd4jCuda
ndarrayfactory.class = org.nd4j.linalg.jcublas.JCublasNDArrayFactory ndarrayfactory.class = org.nd4j.linalg.jcublas.JCublasNDArrayFactory
affinitymanager = org.nd4j.jita.concurrency.CudaAffinityManager affinitymanager = org.nd4j.jita.concurrency.CudaAffinityManager
memorymanager = org.nd4j.jita.memory.CudaMemoryManager memorymanager = org.nd4j.jita.memory.CudaMemoryManager

View File

@ -392,7 +392,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
} else { } else {
Variance var = (Variance) op; Variance var = (Variance) op;
try { try {
loop.execSummaryStats(null, op.opNum(), loop.execSummaryStatsTad(null, op.opNum(),
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(), op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
null, null, null, null,
getPointerForExtraArgs(op, op.z().dataType()), getPointerForExtraArgs(op, op.z().dataType()),
@ -445,7 +445,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
null, null); null, null);
} else { } else {
try { try {
loop.execReduce3(null, op.opNum(), loop.execReduce3Tad(null, op.opNum(),
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(), op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
null, null, null, null,
getPointerForExtraArgs(op, op.z().dataType()), getPointerForExtraArgs(op, op.z().dataType()),
@ -505,7 +505,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
} else { } else {
switch (op.getOpType()) { switch (op.getOpType()) {
case REDUCE_FLOAT: case REDUCE_FLOAT:
loop.execReduceFloat(null, op.opNum(), loop.execReduceFloat2(null, op.opNum(),
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(), op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
null, null, null, null,
getPointerForExtraArgs(op, op.z().dataType()), getPointerForExtraArgs(op, op.z().dataType()),
@ -517,7 +517,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
null); null);
break; break;
case REDUCE_LONG: case REDUCE_LONG:
loop.execReduceLong(null, op.opNum(), loop.execReduceLong2(null, op.opNum(),
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(), op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
null, null, null, null,
getPointerForExtraArgs(op, op.x().dataType()), getPointerForExtraArgs(op, op.x().dataType()),
@ -529,7 +529,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
null); null);
break; break;
case REDUCE_SAME: case REDUCE_SAME:
loop.execReduceSame(null, op.opNum(), loop.execReduceSame2(null, op.opNum(),
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(), op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
null, null, null, null,
getPointerForExtraArgs(op, op.z().dataType()), getPointerForExtraArgs(op, op.z().dataType()),
@ -541,7 +541,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
null); null);
break; break;
case REDUCE_BOOL: case REDUCE_BOOL:
loop.execReduceBool(null, op.opNum(), loop.execReduceBool2(null, op.opNum(),
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(), op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
null, null, null, null,
getPointerForExtraArgs(op, op.x().dataType()), getPointerForExtraArgs(op, op.x().dataType()),
@ -604,7 +604,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
switch (op.getOpType()) { switch (op.getOpType()) {
case SCALAR: case SCALAR:
loop.execScalar(null, op.opNum(), loop.execScalarTad(null, op.opNum(),
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(), op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
null, null, null, null,
op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(), op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(),
@ -620,7 +620,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
(LongPointer) devTadShapeInfoZ, (LongPointer) devTadOffsetsZ); (LongPointer) devTadShapeInfoZ, (LongPointer) devTadOffsetsZ);
break; break;
case SCALAR_BOOL: case SCALAR_BOOL:
loop.execScalarBool(null, op.opNum(), loop.execScalarBoolTad(null, op.opNum(),
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(), op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
null, null, null, null,
op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(), op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(),
@ -1255,7 +1255,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
if (op.x() != null && op.y() != null && op.z() != null) { if (op.x() != null && op.y() != null && op.z() != null) {
// triple arg call // triple arg call
loop.execRandom(null, op.opNum(), rng.getStatePointer(), // rng state ptr loop.execRandom3(null, op.opNum(), rng.getStatePointer(), // rng state ptr
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(), op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
null, null, null, null,
op.y().data().addressPointer(), (LongPointer) op.y().shapeInfoDataBuffer().addressPointer(), op.y().data().addressPointer(), (LongPointer) op.y().shapeInfoDataBuffer().addressPointer(),
@ -1265,7 +1265,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
op.extraArgsDataBuff(op.z().dataType()).addressPointer()); op.extraArgsDataBuff(op.z().dataType()).addressPointer());
} else if (op.x() != null && op.z() != null) { } else if (op.x() != null && op.z() != null) {
//double arg call //double arg call
loop.execRandom(null, op.opNum(), rng.getStatePointer(), // rng state ptr loop.execRandom2(null, op.opNum(), rng.getStatePointer(), // rng state ptr
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(), op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
null, null, null, null,
op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(), op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(),
@ -1862,7 +1862,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
Nd4jCpu.ShapeList ptrptr; Nd4jCpu.ShapeList ptrptr;
try { try {
ptrptr = (Nd4jCpu.ShapeList) loop.calculateOutputShapes(null, ptrptr = (Nd4jCpu.ShapeList) loop.calculateOutputShapes2(null,
hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs, hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs,
op.numTArguments(), iArgs, op.numIArguments(), bArgs, op.numBArguments()); op.numTArguments(), iArgs, op.numIArguments(), bArgs, op.numBArguments());
} catch (Throwable t){ } catch (Throwable t){
@ -2070,7 +2070,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
} }
} }
loop.execCustomOp(null, op.opHash(), context.contextPointer()); loop.execCustomOp2(null, op.opHash(), context.contextPointer());
if (context.getOutputArrays().isEmpty()) if (context.getOutputArrays().isEmpty())
return new INDArray[0]; return new INDArray[0];

View File

@ -0,0 +1,20 @@
/*******************************************************************************
* Copyright (c) 2015-2019 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.nd4j.nativeblas;
public abstract class Nd4jCpuHelper extends Nd4jCpuPresets implements NativeOps {
}

View File

@ -31,7 +31,7 @@ import java.util.Scanner;
* *
* @author saudet * @author saudet
*/ */
@Properties(target = "org.nd4j.nativeblas.Nd4jCpu", @Properties(target = "org.nd4j.nativeblas.Nd4jCpu", helper = "org.nd4j.nativeblas.Nd4jCpuHelper",
value = {@Platform(define = "LIBND4J_ALL_OPS", include = { value = {@Platform(define = "LIBND4J_ALL_OPS", include = {
"memory/MemoryType.h", "memory/MemoryType.h",
"array/DataType.h", "array/DataType.h",
@ -155,7 +155,7 @@ public class Nd4jCpuPresets implements InfoMapper, BuildEnabled {
public void map(InfoMap infoMap) { public void map(InfoMap infoMap) {
infoMap.put(new Info("thread_local", "ND4J_EXPORT", "INLINEDEF", "CUBLASWINAPI", "FORCEINLINE", infoMap.put(new Info("thread_local", "ND4J_EXPORT", "INLINEDEF", "CUBLASWINAPI", "FORCEINLINE",
"_CUDA_H", "_CUDA_D", "_CUDA_G", "_CUDA_HD", "LIBND4J_ALL_OPS", "NOT_EXCLUDED").cppTypes().annotations()) "_CUDA_H", "_CUDA_D", "_CUDA_G", "_CUDA_HD", "LIBND4J_ALL_OPS", "NOT_EXCLUDED").cppTypes().annotations())
.put(new Info("NativeOps").base("org.nd4j.nativeblas.NativeOps")) .put(new Info("NativeOps.h").objectify())
.put(new Info("const char").valueTypes("byte").pointerTypes("@Cast(\"char*\") String", .put(new Info("const char").valueTypes("byte").pointerTypes("@Cast(\"char*\") String",
"@Cast(\"char*\") BytePointer")) "@Cast(\"char*\") BytePointer"))
.put(new Info("char").valueTypes("char").pointerTypes("@Cast(\"char*\") BytePointer", .put(new Info("char").valueTypes("char").pointerTypes("@Cast(\"char*\") BytePointer",

View File

@ -25,7 +25,7 @@ dtype = float
complex.double.class = org.nd4j.linalg.cpu.nativecpu.complex.ComplexDouble complex.double.class = org.nd4j.linalg.cpu.nativecpu.complex.ComplexDouble
blas.ops = org.nd4j.linalg.cpu.nativecpu.BlasWrapper blas.ops = org.nd4j.linalg.cpu.nativecpu.BlasWrapper
sparseblas.ops = org.nd4j.linalg.cpu.nativecpu.SparseBlasWrapper sparseblas.ops = org.nd4j.linalg.cpu.nativecpu.SparseBlasWrapper
native.ops= org.nd4j.nativeblas.Nd4jCpu$NativeOps native.ops= org.nd4j.nativeblas.Nd4jCpu
ndarrayfactory.class = org.nd4j.linalg.cpu.nativecpu.CpuNDArrayFactory ndarrayfactory.class = org.nd4j.linalg.cpu.nativecpu.CpuNDArrayFactory
sparsendarrayfactory.class = org.nd4j.linalg.cpu.nativecpu.CpuSparseNDArrayFactory sparsendarrayfactory.class = org.nd4j.linalg.cpu.nativecpu.CpuSparseNDArrayFactory
ndarray.order = c ndarray.order = c
@ -36,4 +36,4 @@ alloc = javacpp
fft = org.nd4j.linalg.fft.DefaultFFTInstance fft = org.nd4j.linalg.fft.DefaultFFTInstance
opexec= org.nd4j.linalg.cpu.nativecpu.ops.NativeOpExecutioner opexec= org.nd4j.linalg.cpu.nativecpu.ops.NativeOpExecutioner
opexec.mode= native opexec.mode= native
random=org.nd4j.linalg.cpu.nativecpu.rng.CpuNativeRandom random=org.nd4j.linalg.cpu.nativecpu.rng.CpuNativeRandom

View File

@ -288,7 +288,7 @@
<javacpp.platform.extension/> <!-- -Djavacpp.platform.extension=-avx512 --> <javacpp.platform.extension/> <!-- -Djavacpp.platform.extension=-avx512 -->
<javacpp.platform.properties>${javacpp.platform}</javacpp.platform.properties> <javacpp.platform.properties>${javacpp.platform}</javacpp.platform.properties>
<javacpp.version>1.5.1</javacpp.version> <javacpp.version>1.5.2-SNAPSHOT</javacpp.version>
<javacpp-presets.version>1.5.1</javacpp-presets.version> <javacpp-presets.version>1.5.1</javacpp-presets.version>
<javacv.version>1.5.1</javacv.version> <javacv.version>1.5.1</javacv.version>
@ -298,7 +298,7 @@
<openblas.version>0.3.6</openblas.version> <openblas.version>0.3.6</openblas.version>
<mkl.version>2019.4</mkl.version> <mkl.version>2019.4</mkl.version>
<mkl-dnn.version>0.20</mkl-dnn.version> <mkl-dnn.version>0.20</mkl-dnn.version>
<mkl-dnn.javacpp.version>${mkl-dnn.version}-${javacpp.version}</mkl-dnn.javacpp.version> <mkl-dnn.javacpp.version>${mkl-dnn.version}-${javacpp-presets.version}</mkl-dnn.javacpp.version>
<opencv.version>4.1.0</opencv.version> <opencv.version>4.1.0</opencv.version>
<ffmpeg.version>4.1.3</ffmpeg.version> <ffmpeg.version>4.1.3</ffmpeg.version>
<leptonica.version>1.78.0</leptonica.version> <leptonica.version>1.78.0</leptonica.version>