Adapt the Java wrappers in ND4J generated with JavaCPP
parent
780ae628a9
commit
8881bfe7aa
|
@ -28,58 +28,14 @@ import org.nd4j.linalg.api.buffer.Utf8Buffer;
|
||||||
* op execution on cpu
|
* op execution on cpu
|
||||||
* @author Adam Gibson
|
* @author Adam Gibson
|
||||||
*/
|
*/
|
||||||
public abstract class NativeOps extends Pointer {
|
public interface NativeOps {
|
||||||
public NativeOps(Pointer p) {
|
|
||||||
super(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int getCores(int totals) {
|
|
||||||
// that's special case for Xeon Phi
|
|
||||||
if (totals >= 256)
|
|
||||||
return 64;
|
|
||||||
|
|
||||||
int ht_off = totals / 2; // we count off HyperThreading without any excuses
|
|
||||||
if (ht_off <= 4)
|
|
||||||
return 4; // special case for Intel i5. and nobody likes i3 anyway
|
|
||||||
|
|
||||||
if (ht_off > 24) {
|
|
||||||
int rounds = 0;
|
|
||||||
while (ht_off > 24) { // we loop until final value gets below 24 cores, since that's reasonable threshold as of 2016
|
|
||||||
if (ht_off > 24) {
|
|
||||||
ht_off /= 2; // we dont' have any cpus that has higher number then 24 physical cores
|
|
||||||
rounds++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// 20 threads is special case in this branch
|
|
||||||
if (ht_off == 20 && rounds < 2)
|
|
||||||
ht_off /= 2;
|
|
||||||
} else { // low-core models are known, but there's a gap, between consumer cpus and xeons
|
|
||||||
if (ht_off <= 6) {
|
|
||||||
// that's more likely consumer-grade cpu, so leave this value alone
|
|
||||||
return ht_off;
|
|
||||||
} else {
|
|
||||||
if (isOdd(ht_off)) // if that's odd number, it's final result
|
|
||||||
return ht_off;
|
|
||||||
|
|
||||||
// 20 threads & 16 threads are special case in this branch, where we go min value
|
|
||||||
if (ht_off == 20 || ht_off == 16)
|
|
||||||
ht_off /= 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ht_off;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean isOdd(int value) {
|
|
||||||
return (value % 2 != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method allows you to specify minimal number of elements per thread/block during op call
|
* This method allows you to specify minimal number of elements per thread/block during op call
|
||||||
* PLEASE NOTE: Changing this value might and will affect performance.
|
* PLEASE NOTE: Changing this value might and will affect performance.
|
||||||
*
|
*
|
||||||
* @param value
|
* @param value
|
||||||
*/
|
*/
|
||||||
public native void setElementThreshold(int value);
|
void setElementThreshold(int value);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method allows you to specify minimal number of TADs per thread/block during op call
|
* This method allows you to specify minimal number of TADs per thread/block during op call
|
||||||
|
@ -87,7 +43,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
*
|
*
|
||||||
* @param value
|
* @param value
|
||||||
*/
|
*/
|
||||||
public abstract void setTADThreshold(int value);
|
void setTADThreshold(int value);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param opNum
|
* @param opNum
|
||||||
|
@ -95,7 +51,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param xShapeInfo
|
* @param xShapeInfo
|
||||||
* @param extraParams
|
* @param extraParams
|
||||||
*/
|
*/
|
||||||
public abstract void execIndexReduceScalar(PointerPointer extraPointers,
|
void execIndexReduceScalar(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -117,7 +73,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param dimension
|
* @param dimension
|
||||||
* @param dimensionLength
|
* @param dimensionLength
|
||||||
*/
|
*/
|
||||||
public abstract void execIndexReduce(PointerPointer extraPointers,
|
void execIndexReduce(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -142,7 +98,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param dimension
|
* @param dimension
|
||||||
* @param dimensionLength
|
* @param dimensionLength
|
||||||
*/
|
*/
|
||||||
public abstract void execBroadcast(PointerPointer extraPointers,
|
void execBroadcast(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -159,7 +115,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape,
|
Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape,
|
||||||
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
|
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
|
||||||
|
|
||||||
public abstract void execBroadcastBool(PointerPointer extraPointers,
|
void execBroadcastBool(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -187,7 +143,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param resultShapeInfo
|
* @param resultShapeInfo
|
||||||
* @param extraParams
|
* @param extraParams
|
||||||
*/
|
*/
|
||||||
public abstract void execPairwiseTransform(PointerPointer extraPointers,
|
void execPairwiseTransform(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -203,7 +159,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
@Cast("Nd4jLong *") LongPointer dresultShapeInfo,
|
@Cast("Nd4jLong *") LongPointer dresultShapeInfo,
|
||||||
Pointer extraParams);
|
Pointer extraParams);
|
||||||
|
|
||||||
public abstract void execPairwiseTransformBool(PointerPointer extraPointers,
|
void execPairwiseTransformBool(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -227,7 +183,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param result
|
* @param result
|
||||||
* @param resultShapeInfo
|
* @param resultShapeInfo
|
||||||
*/
|
*/
|
||||||
public abstract void execReduceFloat(PointerPointer extraPointers,
|
void execReduceFloat(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -240,7 +196,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
@Cast("Nd4jLong *") LongPointer dresultShapeInfo);
|
@Cast("Nd4jLong *") LongPointer dresultShapeInfo);
|
||||||
|
|
||||||
|
|
||||||
public abstract void execReduceSame(PointerPointer extraPointers,
|
void execReduceSame(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -253,7 +209,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
@Cast("Nd4jLong *") LongPointer dresultShapeInfo);
|
@Cast("Nd4jLong *") LongPointer dresultShapeInfo);
|
||||||
|
|
||||||
|
|
||||||
public abstract void execReduceBool(PointerPointer extraPointers,
|
void execReduceBool(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -266,7 +222,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
@Cast("Nd4jLong *") LongPointer dresultShapeInfo);
|
@Cast("Nd4jLong *") LongPointer dresultShapeInfo);
|
||||||
|
|
||||||
|
|
||||||
public abstract void execReduceLong(PointerPointer extraPointers,
|
void execReduceLong(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -286,7 +242,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param result
|
* @param result
|
||||||
* @param resultShapeInfo
|
* @param resultShapeInfo
|
||||||
*/
|
*/
|
||||||
public abstract void execReduceFloat(PointerPointer extraPointers,
|
void execReduceFloat2(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -301,7 +257,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
|
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
|
||||||
|
|
||||||
|
|
||||||
public abstract void execReduceSame(PointerPointer extraPointers,
|
void execReduceSame2(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -315,7 +271,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape,
|
Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape,
|
||||||
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
|
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
|
||||||
|
|
||||||
public abstract void execReduceBool(PointerPointer extraPointers,
|
void execReduceBool2(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -329,7 +285,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape,
|
Pointer hDimension, @Cast("Nd4jLong *") LongPointer hDimensionShape,
|
||||||
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
|
Pointer dDimension, @Cast("Nd4jLong *") LongPointer dDimensionShape);
|
||||||
|
|
||||||
public abstract void execReduceLong(PointerPointer extraPointers,
|
void execReduceLong2(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x,
|
Pointer x,
|
||||||
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
@Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
|
@ -353,7 +309,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param result
|
* @param result
|
||||||
* @param resultShapeInfo
|
* @param resultShapeInfo
|
||||||
*/
|
*/
|
||||||
public abstract void execReduce3(PointerPointer extraPointers,
|
void execReduce3(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -371,7 +327,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param y
|
* @param y
|
||||||
* @param yShapeInfo
|
* @param yShapeInfo
|
||||||
*/
|
*/
|
||||||
public abstract void execReduce3Scalar(PointerPointer extraPointers, int opNum,
|
void execReduce3Scalar(PointerPointer extraPointers, int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
Pointer extraParamsVals,
|
Pointer extraParamsVals,
|
||||||
|
@ -392,7 +348,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param dimension
|
* @param dimension
|
||||||
* @param dimensionLength
|
* @param dimensionLength
|
||||||
*/
|
*/
|
||||||
public abstract void execReduce3(PointerPointer extraPointers,
|
void execReduce3Tad(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -406,7 +362,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
@Cast("Nd4jLong *") LongPointer tadOnlyShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets,
|
@Cast("Nd4jLong *") LongPointer tadOnlyShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets,
|
||||||
@Cast("Nd4jLong *") LongPointer yTadOnlyShapeInfo, @Cast("Nd4jLong *") LongPointer yTadOffsets);
|
@Cast("Nd4jLong *") LongPointer yTadOnlyShapeInfo, @Cast("Nd4jLong *") LongPointer yTadOffsets);
|
||||||
|
|
||||||
public abstract void execReduce3All(PointerPointer extraPointers,
|
void execReduce3All(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -432,7 +388,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param scalar
|
* @param scalar
|
||||||
* @param extraParams
|
* @param extraParams
|
||||||
*/
|
*/
|
||||||
public abstract void execScalar(PointerPointer extraPointers,
|
void execScalar(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -442,7 +398,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
Pointer dscalar, @Cast("Nd4jLong *") LongPointer dscalarShapeInfo,
|
Pointer dscalar, @Cast("Nd4jLong *") LongPointer dscalarShapeInfo,
|
||||||
Pointer extraParams);
|
Pointer extraParams);
|
||||||
|
|
||||||
public abstract void execScalarBool(PointerPointer extraPointers,
|
void execScalarBool(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -459,7 +415,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param extraParams
|
* @param extraParams
|
||||||
* @param biasCorrected
|
* @param biasCorrected
|
||||||
*/
|
*/
|
||||||
public abstract void execSummaryStatsScalar(PointerPointer extraPointers,
|
void execSummaryStatsScalar(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -477,7 +433,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param resultShapeInfo
|
* @param resultShapeInfo
|
||||||
* @param biasCorrected
|
* @param biasCorrected
|
||||||
*/
|
*/
|
||||||
public abstract void execSummaryStats(PointerPointer extraPointers,
|
void execSummaryStats(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -496,7 +452,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param dimension
|
* @param dimension
|
||||||
* @param dimensionLength
|
* @param dimensionLength
|
||||||
*/
|
*/
|
||||||
public abstract void execSummaryStats(PointerPointer extraPointers,
|
void execSummaryStatsTad(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -519,7 +475,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param resultShapeInfo
|
* @param resultShapeInfo
|
||||||
* @param extraParams
|
* @param extraParams
|
||||||
*/
|
*/
|
||||||
public abstract void execTransformFloat(PointerPointer extraPointers,
|
void execTransformFloat(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -527,7 +483,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
|
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
|
||||||
Pointer extraParams);
|
Pointer extraParams);
|
||||||
|
|
||||||
public abstract void execTransformSame(PointerPointer extraPointers,
|
void execTransformSame(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -535,7 +491,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
|
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
|
||||||
Pointer extraParams);
|
Pointer extraParams);
|
||||||
|
|
||||||
public abstract void execTransformStrict(PointerPointer extraPointers,
|
void execTransformStrict(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -543,7 +499,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
|
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
|
||||||
Pointer extraParams);
|
Pointer extraParams);
|
||||||
|
|
||||||
public abstract void execTransformBool(PointerPointer extraPointers,
|
void execTransformBool(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -551,7 +507,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
|
Pointer dresult, @Cast("Nd4jLong *") LongPointer dresultShapeInfo,
|
||||||
Pointer extraParams);
|
Pointer extraParams);
|
||||||
|
|
||||||
public abstract void execTransformAny(PointerPointer extraPointers,
|
void execTransformAny(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -573,7 +529,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param dimension
|
* @param dimension
|
||||||
* @param dimensionLength
|
* @param dimensionLength
|
||||||
*/
|
*/
|
||||||
public abstract void execScalar(PointerPointer extraPointers,
|
void execScalarTad(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -587,7 +543,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
@Cast("Nd4jLong *") LongPointer tadShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets,
|
@Cast("Nd4jLong *") LongPointer tadShapeInfo, @Cast("Nd4jLong *") LongPointer tadOffsets,
|
||||||
@Cast("Nd4jLong *") LongPointer tadShapeInfoZ, @Cast("Nd4jLong *") LongPointer tadOffsetsZ);
|
@Cast("Nd4jLong *") LongPointer tadShapeInfoZ, @Cast("Nd4jLong *") LongPointer tadOffsetsZ);
|
||||||
|
|
||||||
public abstract void execScalarBool(PointerPointer extraPointers,
|
void execScalarBoolTad(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
|
@ -610,7 +566,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param input
|
* @param input
|
||||||
* @param inputShapeInfo
|
* @param inputShapeInfo
|
||||||
*/
|
*/
|
||||||
public abstract void flatten(PointerPointer extraPointers,
|
void flatten(PointerPointer extraPointers,
|
||||||
int offset,
|
int offset,
|
||||||
char order,
|
char order,
|
||||||
Pointer results, @Cast("Nd4jLong *") LongPointer resultShapeInfo,
|
Pointer results, @Cast("Nd4jLong *") LongPointer resultShapeInfo,
|
||||||
|
@ -629,7 +585,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param tadPointers
|
* @param tadPointers
|
||||||
* @param tadOffsets
|
* @param tadOffsets
|
||||||
*/
|
*/
|
||||||
public abstract void concat(PointerPointer extraPointers,
|
void concat(PointerPointer extraPointers,
|
||||||
int dimension,
|
int dimension,
|
||||||
int numArrays,
|
int numArrays,
|
||||||
PointerPointer data, PointerPointer inputShapeInfo,
|
PointerPointer data, PointerPointer inputShapeInfo,
|
||||||
|
@ -639,7 +595,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
PointerPointer tadPointers,
|
PointerPointer tadPointers,
|
||||||
PointerPointer tadOffsets);
|
PointerPointer tadOffsets);
|
||||||
|
|
||||||
public abstract void specialConcat(PointerPointer extraPointers,
|
void specialConcat(PointerPointer extraPointers,
|
||||||
int dimension,
|
int dimension,
|
||||||
int numArrays,
|
int numArrays,
|
||||||
PointerPointer data, PointerPointer inputShapeInfo,
|
PointerPointer data, PointerPointer inputShapeInfo,
|
||||||
|
@ -653,99 +609,99 @@ public abstract class NativeOps extends Pointer {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract int ompGetMaxThreads();
|
int ompGetMaxThreads();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the number of open mp threads
|
* Gets the number of open mp threads
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract int ompGetNumThreads();
|
int ompGetNumThreads();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the number of openmp threads
|
* Sets the number of openmp threads
|
||||||
*
|
*
|
||||||
* @param threads
|
* @param threads
|
||||||
*/
|
*/
|
||||||
public abstract void setOmpNumThreads(int threads);
|
void setOmpNumThreads(int threads);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the minimal number of openmp threads for variative methods
|
* Sets the minimal number of openmp threads for variative methods
|
||||||
*
|
*
|
||||||
* @param threads
|
* @param threads
|
||||||
*/
|
*/
|
||||||
public abstract void setOmpMinThreads(int threads);
|
void setOmpMinThreads(int threads);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* NEVER EVER USE THIS METHOD OUTSIDE OF CUDA
|
* NEVER EVER USE THIS METHOD OUTSIDE OF CUDA
|
||||||
*/
|
*/
|
||||||
public abstract void initializeDevicesAndFunctions();
|
void initializeDevicesAndFunctions();
|
||||||
|
|
||||||
public abstract void initializeFunctions(PointerPointer functions);
|
void initializeFunctions(PointerPointer functions);
|
||||||
|
|
||||||
public abstract Pointer mallocHost(long memorySize, int flags);
|
Pointer mallocHost(long memorySize, int flags);
|
||||||
|
|
||||||
public abstract Pointer mallocDevice(long memorySize, int ptrToDeviceId, int flags);
|
Pointer mallocDevice(long memorySize, int ptrToDeviceId, int flags);
|
||||||
|
|
||||||
public abstract int freeHost(Pointer pointer);
|
int freeHost(Pointer pointer);
|
||||||
|
|
||||||
public abstract int freeDevice(Pointer pointer, int deviceId);
|
int freeDevice(Pointer pointer, int deviceId);
|
||||||
|
|
||||||
public abstract Pointer createContext();
|
Pointer createContext();
|
||||||
|
|
||||||
public abstract Pointer createStream();
|
Pointer createStream();
|
||||||
|
|
||||||
public abstract Pointer createEvent();
|
Pointer createEvent();
|
||||||
|
|
||||||
public abstract int registerEvent(Pointer event, Pointer stream);
|
int registerEvent(Pointer event, Pointer stream);
|
||||||
|
|
||||||
public abstract int destroyEvent(Pointer event);
|
int destroyEvent(Pointer event);
|
||||||
|
|
||||||
public abstract int setDevice(int ptrToDeviceId);
|
int setDevice(int ptrToDeviceId);
|
||||||
|
|
||||||
public abstract int getDevice();
|
int getDevice();
|
||||||
|
|
||||||
public abstract int streamSynchronize(Pointer stream);
|
int streamSynchronize(Pointer stream);
|
||||||
|
|
||||||
public abstract int eventSynchronize(Pointer event);
|
int eventSynchronize(Pointer event);
|
||||||
|
|
||||||
public abstract long getDeviceFreeMemory(int ptrToDeviceId);
|
long getDeviceFreeMemory(int ptrToDeviceId);
|
||||||
|
|
||||||
public abstract long getDeviceFreeMemory();
|
long getDeviceFreeMemoryDefault();
|
||||||
|
|
||||||
public abstract long getDeviceTotalMemory(int ptrToDeviceId);
|
long getDeviceTotalMemory(int ptrToDeviceId);
|
||||||
|
|
||||||
public abstract int getDeviceMajor(int ptrToDeviceId);
|
int getDeviceMajor(int ptrToDeviceId);
|
||||||
|
|
||||||
public abstract int getDeviceMinor(int ptrToDeviceId);
|
int getDeviceMinor(int ptrToDeviceId);
|
||||||
|
|
||||||
public abstract String getDeviceName(int ptrToDeviceId);
|
String getDeviceName(int ptrToDeviceId);
|
||||||
|
|
||||||
public abstract int memcpy(Pointer dst, Pointer src, long size, int flags, Pointer reserved);
|
int memcpySync(Pointer dst, Pointer src, long size, int flags, Pointer reserved);
|
||||||
|
|
||||||
public abstract int memcpyAsync(Pointer dst, Pointer src, long size, int flags, Pointer reserved);
|
int memcpyAsync(Pointer dst, Pointer src, long size, int flags, Pointer reserved);
|
||||||
|
|
||||||
public abstract int memcpyConstantAsync(long dst, Pointer src, long size, int flags, Pointer reserved);
|
int memcpyConstantAsync(long dst, Pointer src, long size, int flags, Pointer reserved);
|
||||||
|
|
||||||
public abstract int memset(Pointer dst, int value, long size, int flags, Pointer reserved);
|
int memsetSync(Pointer dst, int value, long size, int flags, Pointer reserved);
|
||||||
|
|
||||||
public abstract int memsetAsync(Pointer dst, int value, long size, int flags, Pointer reserved);
|
int memsetAsync(Pointer dst, int value, long size, int flags, Pointer reserved);
|
||||||
|
|
||||||
public abstract Pointer getConstantSpace();
|
Pointer getConstantSpace();
|
||||||
|
|
||||||
public abstract int getAvailableDevices();
|
int getAvailableDevices();
|
||||||
|
|
||||||
public abstract void enableDebugMode(boolean reallyEnable);
|
void enableDebugMode(boolean reallyEnable);
|
||||||
|
|
||||||
public abstract void enableVerboseMode(boolean reallyEnable);
|
void enableVerboseMode(boolean reallyEnable);
|
||||||
|
|
||||||
public abstract void setGridLimit(int gridSize);
|
void setGridLimit(int gridSize);
|
||||||
|
|
||||||
public abstract Pointer tadOnlyShapeInfo(@Cast("Nd4jLong *") LongPointer shapeInfo, IntPointer dimension, int dimensionLength);
|
Pointer tadOnlyShapeInfo(@Cast("Nd4jLong *") LongPointer shapeInfo, IntPointer dimension, int dimensionLength);
|
||||||
|
|
||||||
///////////////
|
///////////////
|
||||||
|
|
||||||
public abstract void pullRows(PointerPointer extraPointers,
|
void pullRows(PointerPointer extraPointers,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo,
|
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo,
|
||||||
|
@ -760,7 +716,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
|
|
||||||
public abstract void average(PointerPointer extraPointers,
|
void average(PointerPointer extraPointers,
|
||||||
PointerPointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
PointerPointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
PointerPointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
PointerPointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo,
|
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo,
|
||||||
|
@ -771,7 +727,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
|
|
||||||
public abstract void accumulate(PointerPointer extraPointers,
|
void accumulate(PointerPointer extraPointers,
|
||||||
PointerPointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
PointerPointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
PointerPointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
PointerPointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo,
|
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeInfo,
|
||||||
|
@ -781,15 +737,15 @@ public abstract class NativeOps extends Pointer {
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
|
|
||||||
public abstract void enableP2P(boolean reallyEnable);
|
void enableP2P(boolean reallyEnable);
|
||||||
|
|
||||||
public abstract void checkP2P();
|
void checkP2P();
|
||||||
|
|
||||||
public abstract boolean isP2PAvailable();
|
boolean isP2PAvailable();
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
public abstract void shuffle(PointerPointer extraPointers,
|
void shuffle(PointerPointer extraPointers,
|
||||||
PointerPointer x, @Cast("Nd4jLong *") PointerPointer xShapeInfo,
|
PointerPointer x, @Cast("Nd4jLong *") PointerPointer xShapeInfo,
|
||||||
PointerPointer dx, @Cast("Nd4jLong *") PointerPointer dxShapeInfo,
|
PointerPointer dx, @Cast("Nd4jLong *") PointerPointer dxShapeInfo,
|
||||||
PointerPointer z, @Cast("Nd4jLong *") PointerPointer zShapeInfo,
|
PointerPointer z, @Cast("Nd4jLong *") PointerPointer zShapeInfo,
|
||||||
|
@ -802,15 +758,15 @@ public abstract class NativeOps extends Pointer {
|
||||||
|
|
||||||
// opType conversion
|
// opType conversion
|
||||||
|
|
||||||
public abstract void convertTypes(PointerPointer extras, int srcType, Pointer x, long N, int dstType, Pointer z);
|
void convertTypes(PointerPointer extras, int srcType, Pointer x, long N, int dstType, Pointer z);
|
||||||
|
|
||||||
public abstract boolean isExperimentalEnabled();
|
boolean isExperimentalEnabled();
|
||||||
|
|
||||||
// GridOps
|
// GridOps
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// MetaOps
|
// MetaOps
|
||||||
public abstract void execMetaPredicateShape(PointerPointer extras,
|
void execMetaPredicateShape(PointerPointer extras,
|
||||||
int opTypeA, int opNumA,
|
int opTypeA, int opNumA,
|
||||||
int opTypeB, int opNumB,
|
int opTypeB, int opNumB,
|
||||||
long N,
|
long N,
|
||||||
|
@ -826,7 +782,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
*/
|
*/
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
|
|
||||||
public abstract void execAggregate(PointerPointer extras, int opNum,
|
void execAggregate(PointerPointer extras, int opNum,
|
||||||
PointerPointer arguments,
|
PointerPointer arguments,
|
||||||
int numArguments,
|
int numArguments,
|
||||||
@Cast("Nd4jLong **") PointerPointer shapes,
|
@Cast("Nd4jLong **") PointerPointer shapes,
|
||||||
|
@ -839,20 +795,20 @@ public abstract class NativeOps extends Pointer {
|
||||||
int numRealArguments,
|
int numRealArguments,
|
||||||
@Cast("nd4j::DataType") int dataType);
|
@Cast("nd4j::DataType") int dataType);
|
||||||
|
|
||||||
public abstract void execAggregateBatch(PointerPointer extras, int numAggregates, int opNum, int maxArgs,
|
void execAggregateBatch(PointerPointer extras, int numAggregates, int opNum, int maxArgs,
|
||||||
int maxShapes, int maxIntArrays, int maxIntArraySize, int maxIdx, int maxReals,
|
int maxShapes, int maxIntArrays, int maxIntArraySize, int maxIdx, int maxReals,
|
||||||
Pointer ptrToArguments, @Cast("nd4j::DataType") int dataType);
|
Pointer ptrToArguments, @Cast("nd4j::DataType") int dataType);
|
||||||
|
|
||||||
|
|
||||||
//////////////
|
//////////////
|
||||||
public abstract void execRandom(PointerPointer extraPointers,
|
void execRandom(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer state,
|
Pointer state,
|
||||||
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeBuffer,
|
Pointer z, @Cast("Nd4jLong *") LongPointer zShapeBuffer,
|
||||||
Pointer dz, @Cast("Nd4jLong *") LongPointer dzShapeBuffer,
|
Pointer dz, @Cast("Nd4jLong *") LongPointer dzShapeBuffer,
|
||||||
Pointer extraArguments);
|
Pointer extraArguments);
|
||||||
|
|
||||||
public abstract void execRandom(PointerPointer extraPointers,
|
void execRandom3(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer state,
|
Pointer state,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeBuffer,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeBuffer,
|
||||||
|
@ -863,7 +819,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
Pointer dz, @Cast("Nd4jLong *") LongPointer dzShapeBuffer,
|
Pointer dz, @Cast("Nd4jLong *") LongPointer dzShapeBuffer,
|
||||||
Pointer extraArguments);
|
Pointer extraArguments);
|
||||||
|
|
||||||
public abstract void execRandom(PointerPointer extraPointers,
|
void execRandom2(PointerPointer extraPointers,
|
||||||
int opNum,
|
int opNum,
|
||||||
Pointer state,
|
Pointer state,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeBuffer,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeBuffer,
|
||||||
|
@ -875,13 +831,13 @@ public abstract class NativeOps extends Pointer {
|
||||||
////////////////////
|
////////////////////
|
||||||
|
|
||||||
|
|
||||||
public abstract Pointer initRandom(PointerPointer extraPointers, long seed, long numberOfElements, Pointer pointerToBuffer);
|
Pointer initRandom(PointerPointer extraPointers, long seed, long numberOfElements, Pointer pointerToBuffer);
|
||||||
|
|
||||||
public abstract void refreshBuffer(PointerPointer extraPointers, long seed, Pointer pointer);
|
void refreshBuffer(PointerPointer extraPointers, long seed, Pointer pointer);
|
||||||
|
|
||||||
public abstract void reSeedBuffer(PointerPointer extraPointers, long seed, Pointer pointer);
|
void reSeedBuffer(PointerPointer extraPointers, long seed, Pointer pointer);
|
||||||
|
|
||||||
public abstract void destroyRandom(Pointer pointer);
|
void destroyRandom(Pointer pointer);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -893,7 +849,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param wordSize the word size (4 for float, 8 for doubles)
|
* @param wordSize the word size (4 for float, 8 for doubles)
|
||||||
* @return a pointer to a numpy array
|
* @return a pointer to a numpy array
|
||||||
*/
|
*/
|
||||||
public abstract Pointer numpyFromNd4j(Pointer data, Pointer shapeBuffer, long wordSize);
|
Pointer numpyFromNd4j(Pointer data, Pointer shapeBuffer, long wordSize);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -903,14 +859,14 @@ public abstract class NativeOps extends Pointer {
|
||||||
* to get the length for
|
* to get the length for
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract int elementSizeForNpyArrayHeader(Pointer npyArray);
|
int elementSizeForNpyArrayHeader(Pointer npyArray);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param npyArrayStruct
|
* @param npyArrayStruct
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract Pointer dataPointForNumpyStruct(Pointer npyArrayStruct);
|
Pointer dataPointForNumpyStruct(Pointer npyArrayStruct);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -921,7 +877,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param wordSize the word size
|
* @param wordSize the word size
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract Pointer numpyHeaderForNd4j(Pointer data, Pointer shapeBuffer, long wordSize, LongPointer length);
|
Pointer numpyHeaderForNd4j(Pointer data, Pointer shapeBuffer, long wordSize, LongPointer length);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load numpy from a header
|
* Load numpy from a header
|
||||||
|
@ -930,13 +886,13 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param data the header data to parse
|
* @param data the header data to parse
|
||||||
* @return a pointer to a numpy cnpy:NpyArray struct
|
* @return a pointer to a numpy cnpy:NpyArray struct
|
||||||
*/
|
*/
|
||||||
public abstract Pointer loadNpyFromHeader(Pointer data);
|
Pointer loadNpyFromHeader(Pointer data);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param npyArray
|
* @param npyArray
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract Pointer dataPointForNumpyHeader(Pointer npyArray);
|
Pointer dataPointForNumpyHeader(Pointer npyArray);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the shape buffer from a
|
* Get the shape buffer from a
|
||||||
|
@ -946,7 +902,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param npyArray
|
* @param npyArray
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract Pointer shapeBufferForNumpyHeader(Pointer npyArray);
|
Pointer shapeBufferForNumpyHeader(Pointer npyArray);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used in {@link org.nd4j.linalg.factory.NDArrayFactory#createFromNpyPointer(Pointer)}
|
* Used in {@link org.nd4j.linalg.factory.NDArrayFactory#createFromNpyPointer(Pointer)}
|
||||||
|
@ -956,7 +912,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param npyArray the pointer to the numpy array to use
|
* @param npyArray the pointer to the numpy array to use
|
||||||
* @return the pointer for the numpy array
|
* @return the pointer for the numpy array
|
||||||
*/
|
*/
|
||||||
public abstract Pointer dataPointForNumpy(Pointer npyArray);
|
Pointer dataPointForNumpy(Pointer npyArray);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a shape buffer for a numpy array.
|
* Get a shape buffer for a numpy array.
|
||||||
|
@ -965,7 +921,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param npyArray the numpy array to get the shape buffer for
|
* @param npyArray the numpy array to get the shape buffer for
|
||||||
* @return a pointer representing the shape buffer for numpy
|
* @return a pointer representing the shape buffer for numpy
|
||||||
*/
|
*/
|
||||||
public abstract Pointer shapeBufferForNumpy(Pointer npyArray);
|
Pointer shapeBufferForNumpy(Pointer npyArray);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Thie method releases numpy pointer
|
* Thie method releases numpy pointer
|
||||||
|
@ -974,7 +930,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
*
|
*
|
||||||
* @param npyArray
|
* @param npyArray
|
||||||
*/
|
*/
|
||||||
public abstract void releaseNumpy(Pointer npyArray);
|
void releaseNumpy(Pointer npyArray);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -984,7 +940,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param path the path to the file
|
* @param path the path to the file
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract Pointer numpyFromFile(BytePointer path);
|
Pointer numpyFromFile(BytePointer path);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -994,7 +950,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param buffer the buffer pointer to check
|
* @param buffer the buffer pointer to check
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract int lengthForShapeBufferPointer(Pointer buffer);
|
int lengthForShapeBufferPointer(Pointer buffer);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate the element size
|
* Calculate the element size
|
||||||
|
@ -1004,7 +960,7 @@ public abstract class NativeOps extends Pointer {
|
||||||
* element size for
|
* element size for
|
||||||
* @return the element size for a given array
|
* @return the element size for a given array
|
||||||
*/
|
*/
|
||||||
public abstract int elementSizeForNpyArray(Pointer npyArray);
|
int elementSizeForNpyArray(Pointer npyArray);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1013,31 +969,31 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param address the address to get the pointer
|
* @param address the address to get the pointer
|
||||||
* @return the pointer for the given address
|
* @return the pointer for the given address
|
||||||
*/
|
*/
|
||||||
public abstract Pointer pointerForAddress(long address);
|
Pointer pointerForAddress(long address);
|
||||||
|
|
||||||
|
|
||||||
////// NPZ ///////
|
////// NPZ ///////
|
||||||
public abstract Pointer mapFromNpzFile(BytePointer path);
|
Pointer mapFromNpzFile(BytePointer path);
|
||||||
|
|
||||||
public abstract int getNumNpyArraysInMap(Pointer map);
|
int getNumNpyArraysInMap(Pointer map);
|
||||||
|
|
||||||
public abstract String getNpyArrayNameFromMap(Pointer map, int index);
|
String getNpyArrayNameFromMap(Pointer map, int index);
|
||||||
|
|
||||||
public abstract Pointer getNpyArrayFromMap(Pointer map, int index);
|
Pointer getNpyArrayFromMap(Pointer map, int index);
|
||||||
|
|
||||||
public abstract Pointer getNpyArrayData(Pointer npArray);
|
Pointer getNpyArrayData(Pointer npArray);
|
||||||
|
|
||||||
public abstract LongPointer getNpyArrayShape(Pointer npArray);
|
LongPointer getNpyArrayShape(Pointer npArray);
|
||||||
|
|
||||||
public abstract int getNpyArrayRank(Pointer npArray);
|
int getNpyArrayRank(Pointer npArray);
|
||||||
|
|
||||||
public abstract char getNpyArrayOrder(Pointer npArray);
|
char getNpyArrayOrder(Pointer npArray);
|
||||||
|
|
||||||
public abstract int getNpyArrayElemSize(Pointer npArray);
|
int getNpyArrayElemSize(Pointer npArray);
|
||||||
///////
|
///////
|
||||||
|
|
||||||
|
|
||||||
public abstract void tear(PointerPointer extras,
|
void tear(PointerPointer extras,
|
||||||
Pointer tensor, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer tensor, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dtensor, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dtensor, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
PointerPointer targets, @Cast("Nd4jLong *") LongPointer zShapeInfo,
|
PointerPointer targets, @Cast("Nd4jLong *") LongPointer zShapeInfo,
|
||||||
|
@ -1045,26 +1001,26 @@ public abstract class NativeOps extends Pointer {
|
||||||
@Cast("Nd4jLong *") LongPointer tadOffsets);
|
@Cast("Nd4jLong *") LongPointer tadOffsets);
|
||||||
|
|
||||||
|
|
||||||
public abstract long encodeBitmap(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, long N, IntPointer dz, float threshold);
|
long encodeBitmap(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, long N, IntPointer dz, float threshold);
|
||||||
|
|
||||||
public abstract void decodeBitmap(PointerPointer extraPointers, Pointer dx, long N, Pointer dz, LongPointer zShapeInfo);
|
void decodeBitmap(PointerPointer extraPointers, Pointer dx, long N, Pointer dz, LongPointer zShapeInfo);
|
||||||
|
|
||||||
|
|
||||||
public abstract void encodeThresholdP1(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, long N, IntPointer dz, float threshold);
|
void encodeThresholdP1(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, long N, IntPointer dz, float threshold);
|
||||||
|
|
||||||
public abstract void encodeThresholdP2Int(PointerPointer extraPointers, IntPointer dx, long N, IntPointer dz);
|
void encodeThresholdP2Int(PointerPointer extraPointers, IntPointer dx, long N, IntPointer dz);
|
||||||
|
|
||||||
public abstract void encodeThresholdP3(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, IntPointer offsets, long N, IntPointer dz);
|
void encodeThresholdP3(PointerPointer extraPointers, Pointer dx, LongPointer xShapeInfo, IntPointer offsets, long N, IntPointer dz);
|
||||||
|
|
||||||
public abstract void decodeThreshold(PointerPointer extraPointers, Pointer dx, long N, Pointer dz, LongPointer zShapeInfo);
|
void decodeThreshold(PointerPointer extraPointers, Pointer dx, long N, Pointer dz, LongPointer zShapeInfo);
|
||||||
|
|
||||||
public abstract void sort(PointerPointer extraPointers,
|
void sort(PointerPointer extraPointers,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
boolean descending);
|
boolean descending);
|
||||||
|
|
||||||
|
|
||||||
public abstract void sortTad(PointerPointer extraPointers,
|
void sortTad(PointerPointer extraPointers,
|
||||||
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
Pointer x, @Cast("Nd4jLong *") LongPointer xShapeInfo,
|
||||||
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
Pointer dx, @Cast("Nd4jLong *") LongPointer dxShapeInfo,
|
||||||
IntPointer dimension,
|
IntPointer dimension,
|
||||||
|
@ -1074,81 +1030,81 @@ public abstract class NativeOps extends Pointer {
|
||||||
boolean descending);
|
boolean descending);
|
||||||
|
|
||||||
|
|
||||||
public abstract void sortCooIndices(PointerPointer extraPointers, @Cast("Nd4jLong *") LongPointer indices, Pointer values, long length, int rank);
|
void sortCooIndices(PointerPointer extraPointers, @Cast("Nd4jLong *") LongPointer indices, Pointer values, long length, int rank);
|
||||||
|
|
||||||
|
|
||||||
public abstract LongPointer mmapFile(PointerPointer extraPointers, String fileName, long length);
|
LongPointer mmapFile(PointerPointer extraPointers, String fileName, long length);
|
||||||
|
|
||||||
public abstract void munmapFile(PointerPointer extraPointers, LongPointer ptrMap, long length);
|
void munmapFile(PointerPointer extraPointers, LongPointer ptrMap, long length);
|
||||||
|
|
||||||
public abstract ResultWrapperAbstraction executeFlatGraph(PointerPointer extraPointers, Pointer flatBufferPointer);
|
ResultWrapperAbstraction executeFlatGraph(PointerPointer extraPointers, Pointer flatBufferPointer);
|
||||||
|
|
||||||
public abstract String getAllCustomOps();
|
String getAllCustomOps();
|
||||||
|
|
||||||
public abstract String getAllOperations();
|
String getAllOperations();
|
||||||
|
|
||||||
public abstract int execCustomOp(PointerPointer extraPointers, long opHashCode, Pointer context);
|
int execCustomOp2(PointerPointer extraPointers, long opHashCode, Pointer context);
|
||||||
|
|
||||||
public abstract int execCustomOp(PointerPointer extraPointers, long opHashCode, PointerPointer inputBuffers, PointerPointer inputShapes, int numInput, PointerPointer outputBuffers, PointerPointer outputShapes, int numOutputs, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs, @Cast("bool *") BooleanPointer bArgs, int numBArgs, boolean isInplace);
|
int execCustomOp(PointerPointer extraPointers, long opHashCode, PointerPointer inputBuffers, PointerPointer inputShapes, int numInput, PointerPointer outputBuffers, PointerPointer outputShapes, int numOutputs, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs, @Cast("bool *") BooleanPointer bArgs, int numBArgs, boolean isInplace);
|
||||||
|
|
||||||
public abstract Pointer calculateOutputShapes(PointerPointer extraPointers, long hash, PointerPointer inputShapes, int numInputShapes, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs);
|
Pointer calculateOutputShapes(PointerPointer extraPointers, long hash, PointerPointer inputShapes, int numInputShapes, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs);
|
||||||
|
|
||||||
public abstract Pointer calculateOutputShapes(PointerPointer extraPointers, long hash, PointerPointer inputBunffers, PointerPointer inputShapes, int numInputShapes, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs, @Cast("bool *") BooleanPointer bArgs, int numBArgs);
|
Pointer calculateOutputShapes2(PointerPointer extraPointers, long hash, PointerPointer inputBunffers, PointerPointer inputShapes, int numInputShapes, DoublePointer tArgs, int numTArgs, @Cast("Nd4jLong *") LongPointer iArgs, int numIArgs, @Cast("bool *") BooleanPointer bArgs, int numBArgs);
|
||||||
|
|
||||||
public abstract int registerGraph(PointerPointer extraPointers, long graphId, Pointer flatBufferPointer);
|
int registerGraph(PointerPointer extraPointers, long graphId, Pointer flatBufferPointer);
|
||||||
|
|
||||||
public abstract Pointer executeStoredGraph(PointerPointer extraPointers, long graphId, PointerPointer inputBuffers, PointerPointer inputShapes, IntPointer inputIndices, int numInputs);
|
Pointer executeStoredGraph(PointerPointer extraPointers, long graphId, PointerPointer inputBuffers, PointerPointer inputShapes, IntPointer inputIndices, int numInputs);
|
||||||
|
|
||||||
public abstract void deleteResultWrapper(Pointer ptr);
|
void deleteResultWrapper(Pointer ptr);
|
||||||
|
|
||||||
public abstract void deleteShapeList(Pointer ptr);
|
void deleteShapeList(Pointer ptr);
|
||||||
|
|
||||||
public abstract int unregisterGraph(PointerPointer extraPointers, long graphId);
|
int unregisterGraph(PointerPointer extraPointers, long graphId);
|
||||||
|
|
||||||
public abstract void deleteIntArray(Pointer pointer);
|
void deleteIntArray(Pointer pointer);
|
||||||
|
|
||||||
public abstract void deleteLongArray(Pointer pointer);
|
void deleteLongArray(Pointer pointer);
|
||||||
|
|
||||||
public abstract void deletePointerArray(Pointer pointer);
|
void deletePointerArray(Pointer pointer);
|
||||||
|
|
||||||
public abstract void deleteNPArrayStruct(Pointer pointer);
|
void deleteNPArrayStruct(Pointer pointer);
|
||||||
|
|
||||||
public abstract void deleteNPArrayMap(Pointer pointer);
|
void deleteNPArrayMap(Pointer pointer);
|
||||||
|
|
||||||
public abstract void deleteVariablesSet(Pointer pointer);
|
void deleteVariablesSet(Pointer pointer);
|
||||||
|
|
||||||
// GraphState creation
|
// GraphState creation
|
||||||
public abstract Pointer getGraphState(long id);
|
Pointer getGraphState(long id);
|
||||||
|
|
||||||
public abstract void deleteShapeBuffer(Pointer state);
|
void deleteShapeBuffer(Pointer state);
|
||||||
|
|
||||||
public abstract void deleteTadPack(Pointer pointer);
|
void deleteTadPack(Pointer pointer);
|
||||||
|
|
||||||
public abstract void deleteGraphState(Pointer state);
|
void deleteGraphState(Pointer state);
|
||||||
|
|
||||||
public abstract int estimateThreshold(PointerPointer extraPointers, Pointer x, LongPointer xShapeInfo, int N, float threshold);
|
int estimateThreshold(PointerPointer extraPointers, Pointer x, LongPointer xShapeInfo, int N, float threshold);
|
||||||
|
|
||||||
// this method executes op that requires scope to be present: if/while/cond/whatever
|
// this method executes op that requires scope to be present: if/while/cond/whatever
|
||||||
public abstract int execCustomOpWithScope(PointerPointer extraPointers, Pointer state, long opHash, long[] scopes, int numScopes, PointerPointer inputBuffers, PointerPointer inputShapes, int numInputs, PointerPointer outputBuffers, PointerPointer outputShapes, int numOutputs);
|
int execCustomOpWithScope(PointerPointer extraPointers, Pointer state, long opHash, long[] scopes, int numScopes, PointerPointer inputBuffers, PointerPointer inputShapes, int numInputs, PointerPointer outputBuffers, PointerPointer outputShapes, int numOutputs);
|
||||||
|
|
||||||
public abstract void scatterUpdate(PointerPointer extraPointers, int opCode, int numOfUpdates,
|
void scatterUpdate(PointerPointer extraPointers, int opCode, int numOfUpdates,
|
||||||
Pointer hX, @Cast("Nd4jLong *") LongPointer hXShapeInfo, @Cast("Nd4jLong *") LongPointer hxOffsets,
|
Pointer hX, @Cast("Nd4jLong *") LongPointer hXShapeInfo, @Cast("Nd4jLong *") LongPointer hxOffsets,
|
||||||
Pointer dX, @Cast("Nd4jLong *") LongPointer dXShapeInfo, @Cast("Nd4jLong *") LongPointer dxOffsets,
|
Pointer dX, @Cast("Nd4jLong *") LongPointer dXShapeInfo, @Cast("Nd4jLong *") LongPointer dxOffsets,
|
||||||
Pointer hY, @Cast("Nd4jLong *") LongPointer hYShapeInfo, @Cast("Nd4jLong *") LongPointer hyOffsets,
|
Pointer hY, @Cast("Nd4jLong *") LongPointer hYShapeInfo, @Cast("Nd4jLong *") LongPointer hyOffsets,
|
||||||
Pointer dY, @Cast("Nd4jLong *") LongPointer dYShapeInfo, @Cast("Nd4jLong *") LongPointer dyOffsets,
|
Pointer dY, @Cast("Nd4jLong *") LongPointer dYShapeInfo, @Cast("Nd4jLong *") LongPointer dyOffsets,
|
||||||
IntPointer hIndices, IntPointer dIndices);
|
IntPointer hIndices, IntPointer dIndices);
|
||||||
|
|
||||||
//public abstract void fillUtf8String(PointerPointer extraPointers, String[] string, int numStrings, Pointer buffer);
|
//void fillUtf8String(PointerPointer extraPointers, String[] string, int numStrings, Pointer buffer);
|
||||||
public abstract Pointer createUtf8String(PointerPointer extraPointers, String string, int length);
|
Pointer createUtf8String(PointerPointer extraPointers, String string, int length);
|
||||||
public abstract void deleteUtf8String(PointerPointer extraPointers, Pointer ptr);
|
void deleteUtf8String(PointerPointer extraPointers, Pointer ptr);
|
||||||
|
|
||||||
|
|
||||||
public abstract void inspectArray(PointerPointer extraPointers, Pointer buffer, @Cast("Nd4jLong *") LongPointer shapeInfo, Pointer specialBuffer, @Cast("Nd4jLong *") LongPointer specialShapeInfo, @Cast("nd4j::DebugInfo *") Pointer debugInfo);
|
void inspectArray(PointerPointer extraPointers, Pointer buffer, @Cast("Nd4jLong *") LongPointer shapeInfo, Pointer specialBuffer, @Cast("Nd4jLong *") LongPointer specialShapeInfo, @Cast("nd4j::DebugInfo *") Pointer debugInfo);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* this method tries to read numBytes bytes from buffer to provoke crash in certain scenarios
|
* this method tries to read numBytes bytes from buffer to provoke crash in certain scenarios
|
||||||
*/
|
*/
|
||||||
public abstract void tryPointer(Pointer extras, Pointer buffer, int numBytesToRead);
|
void tryPointer(Pointer extras, Pointer buffer, int numBytesToRead);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1158,17 +1114,17 @@ public abstract class NativeOps extends Pointer {
|
||||||
* @param numpyHeader
|
* @param numpyHeader
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract int dataTypeFromNpyHeader(Pointer numpyHeader);
|
int dataTypeFromNpyHeader(Pointer numpyHeader);
|
||||||
|
|
||||||
public abstract Pointer shapeBuffer(int rank, @Cast("Nd4jLong *") LongPointer shape, @Cast("Nd4jLong *") LongPointer strides, int dtype, char order, long ews, boolean empty);
|
Pointer shapeBuffer(int rank, @Cast("Nd4jLong *") LongPointer shape, @Cast("Nd4jLong *") LongPointer strides, int dtype, char order, long ews, boolean empty);
|
||||||
|
|
||||||
public abstract Pointer constantBuffer(int dtype, DoublePointer data, int length);
|
Pointer constantBufferDouble(int dtype, DoublePointer data, int length);
|
||||||
|
|
||||||
public abstract Pointer constantBuffer(int dtype, @Cast("Nd4jLong *") LongPointer data, int length);
|
Pointer constantBufferLong(int dtype, @Cast("Nd4jLong *") LongPointer data, int length);
|
||||||
|
|
||||||
public abstract String runLightBenchmarkSuit(boolean printOut);
|
String runLightBenchmarkSuit(boolean printOut);
|
||||||
|
|
||||||
public abstract String runFullBenchmarkSuit(boolean printOut);
|
String runFullBenchmarkSuit(boolean printOut);
|
||||||
|
|
||||||
public abstract long getCachedMemory(int deviceId);
|
long getCachedMemory(int deviceId);
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,46 @@ public class NativeOpsHolder {
|
||||||
@Getter
|
@Getter
|
||||||
private final NativeOps deviceNativeOps;
|
private final NativeOps deviceNativeOps;
|
||||||
|
|
||||||
|
public static int getCores(int totals) {
|
||||||
|
// that's special case for Xeon Phi
|
||||||
|
if (totals >= 256)
|
||||||
|
return 64;
|
||||||
|
|
||||||
|
int ht_off = totals / 2; // we count off HyperThreading without any excuses
|
||||||
|
if (ht_off <= 4)
|
||||||
|
return 4; // special case for Intel i5. and nobody likes i3 anyway
|
||||||
|
|
||||||
|
if (ht_off > 24) {
|
||||||
|
int rounds = 0;
|
||||||
|
while (ht_off > 24) { // we loop until final value gets below 24 cores, since that's reasonable threshold as of 2016
|
||||||
|
if (ht_off > 24) {
|
||||||
|
ht_off /= 2; // we dont' have any cpus that has higher number then 24 physical cores
|
||||||
|
rounds++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 20 threads is special case in this branch
|
||||||
|
if (ht_off == 20 && rounds < 2)
|
||||||
|
ht_off /= 2;
|
||||||
|
} else { // low-core models are known, but there's a gap, between consumer cpus and xeons
|
||||||
|
if (ht_off <= 6) {
|
||||||
|
// that's more likely consumer-grade cpu, so leave this value alone
|
||||||
|
return ht_off;
|
||||||
|
} else {
|
||||||
|
if (isOdd(ht_off)) // if that's odd number, it's final result
|
||||||
|
return ht_off;
|
||||||
|
|
||||||
|
// 20 threads & 16 threads are special case in this branch, where we go min value
|
||||||
|
if (ht_off == 20 || ht_off == 16)
|
||||||
|
ht_off /= 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ht_off;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isOdd(int value) {
|
||||||
|
return (value % 2 != 0);
|
||||||
|
}
|
||||||
|
|
||||||
private NativeOpsHolder() {
|
private NativeOpsHolder() {
|
||||||
try {
|
try {
|
||||||
Properties props = Nd4jContext.getInstance().getConf();
|
Properties props = Nd4jContext.getInstance().getConf();
|
||||||
|
@ -57,7 +97,7 @@ public class NativeOpsHolder {
|
||||||
deviceNativeOps.setOmpNumThreads(Math.max(1, cores / chips));
|
deviceNativeOps.setOmpNumThreads(Math.max(1, cores / chips));
|
||||||
} else
|
} else
|
||||||
deviceNativeOps.setOmpNumThreads(
|
deviceNativeOps.setOmpNumThreads(
|
||||||
deviceNativeOps.getCores(Runtime.getRuntime().availableProcessors()));
|
getCores(Runtime.getRuntime().availableProcessors()));
|
||||||
}
|
}
|
||||||
//deviceNativeOps.setOmpNumThreads(4);
|
//deviceNativeOps.setOmpNumThreads(4);
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,7 @@ public abstract class Nd4jBlas implements Blas {
|
||||||
if (cores > 0 && chips > 0)
|
if (cores > 0 && chips > 0)
|
||||||
numThreads = Math.max(1, cores / chips);
|
numThreads = Math.max(1, cores / chips);
|
||||||
else
|
else
|
||||||
numThreads = NativeOps.getCores(Runtime.getRuntime().availableProcessors());
|
numThreads = NativeOpsHolder.getCores(Runtime.getRuntime().availableProcessors());
|
||||||
setMaxThreads(numThreads);
|
setMaxThreads(numThreads);
|
||||||
}
|
}
|
||||||
log.info("Number of threads used for BLAS: {}", getMaxThreads());
|
log.info("Number of threads used for BLAS: {}", getMaxThreads());
|
||||||
|
|
|
@ -318,7 +318,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
|
|
||||||
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
|
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
|
||||||
} else {
|
} else {
|
||||||
nativeOps.execSummaryStats(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execSummaryStatsTad(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
||||||
|
@ -360,7 +360,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context));
|
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context));
|
||||||
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
|
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
|
||||||
} else {
|
} else {
|
||||||
nativeOps.execReduce3(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execReduce3Tad(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostYShapeInfo, AtomicAllocator.getInstance().getPointer(op.y(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostYShapeInfo, AtomicAllocator.getInstance().getPointer(op.y(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context),
|
||||||
|
@ -408,7 +408,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
} else {
|
} else {
|
||||||
switch (op.getOpType()) {
|
switch (op.getOpType()) {
|
||||||
case REDUCE_FLOAT:
|
case REDUCE_FLOAT:
|
||||||
nativeOps.execReduceFloat(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execReduceFloat2(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
||||||
|
@ -418,7 +418,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
null);
|
null);
|
||||||
break;
|
break;
|
||||||
case REDUCE_BOOL:
|
case REDUCE_BOOL:
|
||||||
nativeOps.execReduceBool(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execReduceBool2(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
||||||
|
@ -428,7 +428,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
null);
|
null);
|
||||||
break;
|
break;
|
||||||
case REDUCE_SAME:
|
case REDUCE_SAME:
|
||||||
nativeOps.execReduceSame(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execReduceSame2(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
||||||
|
@ -438,7 +438,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
null);
|
null);
|
||||||
break;
|
break;
|
||||||
case REDUCE_LONG:
|
case REDUCE_LONG:
|
||||||
nativeOps.execReduceLong(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execReduceLong2(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
||||||
|
@ -1027,7 +1027,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
if (op.y() != null) {
|
if (op.y() != null) {
|
||||||
val y = AtomicAllocator.getInstance().getPointer(op.y(), context);
|
val y = AtomicAllocator.getInstance().getPointer(op.y(), context);
|
||||||
val yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
|
val yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
|
||||||
nativeOps.execReduce3(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execReduce3Tad(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo,
|
null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo,
|
||||||
|
@ -1037,7 +1037,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
dimensionPointer, null, (LongPointer) devTadShapeInfo, (LongPointer) devTadOffsets, (LongPointer) yDevTadShapeInfo, (LongPointer) yDevTadOffsets);
|
dimensionPointer, null, (LongPointer) devTadShapeInfo, (LongPointer) devTadOffsets, (LongPointer) yDevTadShapeInfo, (LongPointer) yDevTadOffsets);
|
||||||
} else {
|
} else {
|
||||||
if (op instanceof Variance) {
|
if (op instanceof Variance) {
|
||||||
nativeOps.execSummaryStats(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execSummaryStatsTad(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
||||||
|
@ -1051,7 +1051,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
} else {
|
} else {
|
||||||
switch (op.getOpType()) {
|
switch (op.getOpType()) {
|
||||||
case REDUCE_FLOAT:
|
case REDUCE_FLOAT:
|
||||||
nativeOps.execReduceFloat(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execReduceFloat2(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
||||||
|
@ -1061,7 +1061,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
null);
|
null);
|
||||||
break;
|
break;
|
||||||
case REDUCE_SAME:
|
case REDUCE_SAME:
|
||||||
nativeOps.execReduceSame(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execReduceSame2(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
||||||
|
@ -1071,7 +1071,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
null);
|
null);
|
||||||
break;
|
break;
|
||||||
case REDUCE_BOOL:
|
case REDUCE_BOOL:
|
||||||
nativeOps.execReduceBool(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execReduceBool2(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
||||||
|
@ -1081,7 +1081,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
null);
|
null);
|
||||||
break;
|
break;
|
||||||
case REDUCE_LONG:
|
case REDUCE_LONG:
|
||||||
nativeOps.execReduceLong(xShapeInfoHostPointer, op.opNum(),
|
nativeOps.execReduceLong2(xShapeInfoHostPointer, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
extraArgs,
|
extraArgs,
|
||||||
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
||||||
|
@ -1159,7 +1159,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
|
|
||||||
switch (op.getOpType()) {
|
switch (op.getOpType()) {
|
||||||
case SCALAR:
|
case SCALAR:
|
||||||
nativeOps.execScalar(extraPointers, op.opNum(),
|
nativeOps.execScalarTad(extraPointers, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
||||||
null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo,
|
null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo,
|
||||||
|
@ -1172,7 +1172,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
(LongPointer) devTadShapeInfoZ, (LongPointer) devTadOffsetsZ);
|
(LongPointer) devTadShapeInfoZ, (LongPointer) devTadOffsetsZ);
|
||||||
break;
|
break;
|
||||||
case SCALAR_BOOL:
|
case SCALAR_BOOL:
|
||||||
nativeOps.execScalarBool(extraPointers, op.opNum(),
|
nativeOps.execScalarBoolTad(extraPointers, op.opNum(),
|
||||||
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
null, (LongPointer) hostXShapeInfo, x, (LongPointer) xShapeInfo,
|
||||||
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
null, (LongPointer) hostZShapeInfo, z, (LongPointer) zShapeInfo,
|
||||||
null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo,
|
null, (LongPointer) hostYShapeInfo, y, (LongPointer) yShapeInfo,
|
||||||
|
@ -1777,7 +1777,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
|
|
||||||
if (op.x() != null && op.y() != null && op.z() != null) {
|
if (op.x() != null && op.y() != null && op.z() != null) {
|
||||||
// triple arg call
|
// triple arg call
|
||||||
nativeOps.execRandom(extraZZ, op.opNum(), rng.getStatePointer(), // rng state ptr
|
nativeOps.execRandom3(extraZZ, op.opNum(), rng.getStatePointer(), // rng state ptr
|
||||||
null, (LongPointer) hostXShapeInfo, AtomicAllocator.getInstance().getPointer(op.x(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostXShapeInfo, AtomicAllocator.getInstance().getPointer(op.x(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context),
|
||||||
null, (LongPointer) hostYShapeInfo, AtomicAllocator.getInstance().getPointer(op.y(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostYShapeInfo, AtomicAllocator.getInstance().getPointer(op.y(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context),
|
||||||
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
||||||
|
@ -1785,7 +1785,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
|
|
||||||
} else if (op.x() != null && op.z() != null) {
|
} else if (op.x() != null && op.z() != null) {
|
||||||
//double arg call
|
//double arg call
|
||||||
nativeOps.execRandom(extraZZ, op.opNum(), rng.getStatePointer(), // rng state ptr
|
nativeOps.execRandom2(extraZZ, op.opNum(), rng.getStatePointer(), // rng state ptr
|
||||||
null, (LongPointer) hostXShapeInfo, AtomicAllocator.getInstance().getPointer(op.x(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostXShapeInfo, AtomicAllocator.getInstance().getPointer(op.x(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context),
|
||||||
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
null, (LongPointer) hostZShapeInfo, AtomicAllocator.getInstance().getPointer(op.z(), context), (LongPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context),
|
||||||
AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(op.z().dataType()),context));
|
AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(op.z().dataType()),context));
|
||||||
|
@ -2208,7 +2208,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
for (val t: op.tArgs())
|
for (val t: op.tArgs())
|
||||||
tArgs.put(cnt++, (float) t);
|
tArgs.put(cnt++, (float) t);
|
||||||
|
|
||||||
val ptrptr = (Nd4jCuda.ShapeList) nativeOps.calculateOutputShapes(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length, bArgs, op.numBArguments());
|
val ptrptr = (Nd4jCuda.ShapeList) nativeOps.calculateOutputShapes2(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length, bArgs, op.numBArguments());
|
||||||
|
|
||||||
if (ptrptr == null)
|
if (ptrptr == null)
|
||||||
throw new RuntimeException();
|
throw new RuntimeException();
|
||||||
|
@ -2539,7 +2539,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
val ctx = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
|
val ctx = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
|
||||||
((CudaOpContext) context).setCudaStream(ctx.getOldStream(), ctx.getBufferReduction(), ctx.getBufferAllocation());
|
((CudaOpContext) context).setCudaStream(ctx.getOldStream(), ctx.getBufferReduction(), ctx.getBufferAllocation());
|
||||||
|
|
||||||
nativeOps.execCustomOp(null, op.opHash(), context.contextPointer());
|
nativeOps.execCustomOp2(null, op.opHash(), context.contextPointer());
|
||||||
|
|
||||||
if (context.getOutputArrays().isEmpty())
|
if (context.getOutputArrays().isEmpty())
|
||||||
return new INDArray[0];
|
return new INDArray[0];
|
||||||
|
@ -2607,7 +2607,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DataBuffer createConstantBuffer(long[] values, DataType desiredType) {
|
public DataBuffer createConstantBuffer(long[] values, DataType desiredType) {
|
||||||
val dbf = (Nd4jCuda.ConstantDataBuffer) nativeOps.constantBuffer(desiredType.toInt(), new LongPointer(values), values.length);
|
val dbf = (Nd4jCuda.ConstantDataBuffer) nativeOps.constantBufferLong(desiredType.toInt(), new LongPointer(values), values.length);
|
||||||
|
|
||||||
val buffer = Nd4j.createBuffer(dbf.primary(), dbf.special(), values.length, desiredType);
|
val buffer = Nd4j.createBuffer(dbf.primary(), dbf.special(), values.length, desiredType);
|
||||||
buffer.setConstant(true);
|
buffer.setConstant(true);
|
||||||
|
@ -2617,7 +2617,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DataBuffer createConstantBuffer(double[] values, DataType desiredType) {
|
public DataBuffer createConstantBuffer(double[] values, DataType desiredType) {
|
||||||
val dbf = (Nd4jCuda.ConstantDataBuffer) nativeOps.constantBuffer(desiredType.toInt(), new DoublePointer(values), values.length);
|
val dbf = (Nd4jCuda.ConstantDataBuffer) nativeOps.constantBufferDouble(desiredType.toInt(), new DoublePointer(values), values.length);
|
||||||
|
|
||||||
val buffer = Nd4j.createBuffer(dbf.primary(), dbf.special(), values.length, desiredType);
|
val buffer = Nd4j.createBuffer(dbf.primary(), dbf.special(), values.length, desiredType);
|
||||||
buffer.setConstant(true);
|
buffer.setConstant(true);
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,20 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2015-2019 Skymind, Inc.
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
package org.nd4j.nativeblas;
|
||||||
|
|
||||||
|
public abstract class Nd4jCudaHelper extends Nd4jCudaPresets implements NativeOps {
|
||||||
|
}
|
|
@ -26,7 +26,7 @@ import org.bytedeco.javacpp.tools.InfoMapper;
|
||||||
*
|
*
|
||||||
* @author saudet
|
* @author saudet
|
||||||
*/
|
*/
|
||||||
@Properties(target = "org.nd4j.nativeblas.Nd4jCuda",
|
@Properties(target = "org.nd4j.nativeblas.Nd4jCuda", helper = "org.nd4j.nativeblas.Nd4jCudaHelper",
|
||||||
value = {@Platform(define = "LIBND4J_ALL_OPS", include = {
|
value = {@Platform(define = "LIBND4J_ALL_OPS", include = {
|
||||||
"array/DataType.h",
|
"array/DataType.h",
|
||||||
"array/ConstantDescriptor.h",
|
"array/ConstantDescriptor.h",
|
||||||
|
@ -112,7 +112,7 @@ public class Nd4jCudaPresets implements InfoMapper {
|
||||||
public void map(InfoMap infoMap) {
|
public void map(InfoMap infoMap) {
|
||||||
infoMap.put(new Info("thread_local", "ND4J_EXPORT", "INLINEDEF", "CUBLASWINAPI", "FORCEINLINE",
|
infoMap.put(new Info("thread_local", "ND4J_EXPORT", "INLINEDEF", "CUBLASWINAPI", "FORCEINLINE",
|
||||||
"_CUDA_H", "_CUDA_D", "_CUDA_G", "_CUDA_HD", "LIBND4J_ALL_OPS", "NOT_EXCLUDED").cppTypes().annotations())
|
"_CUDA_H", "_CUDA_D", "_CUDA_G", "_CUDA_HD", "LIBND4J_ALL_OPS", "NOT_EXCLUDED").cppTypes().annotations())
|
||||||
.put(new Info("NativeOps").base("org.nd4j.nativeblas.NativeOps"))
|
.put(new Info("NativeOps.h").objectify())
|
||||||
.put(new Info("const char").valueTypes("byte").pointerTypes("@Cast(\"char*\") String",
|
.put(new Info("const char").valueTypes("byte").pointerTypes("@Cast(\"char*\") String",
|
||||||
"@Cast(\"char*\") BytePointer"))
|
"@Cast(\"char*\") BytePointer"))
|
||||||
.put(new Info("char").valueTypes("char").pointerTypes("@Cast(\"char*\") BytePointer",
|
.put(new Info("char").valueTypes("char").pointerTypes("@Cast(\"char*\") BytePointer",
|
||||||
|
|
|
@ -20,7 +20,7 @@ dtype = float
|
||||||
complex.double.class = org.nd4j.linalg.jcublas.complex.ComplexDouble
|
complex.double.class = org.nd4j.linalg.jcublas.complex.ComplexDouble
|
||||||
complex.float.class = org.nd4j.linalg.jcublas.complex.ComplexFloat
|
complex.float.class = org.nd4j.linalg.jcublas.complex.ComplexFloat
|
||||||
blas.ops= org.nd4j.linalg.jcublas.JCublasWrapper
|
blas.ops= org.nd4j.linalg.jcublas.JCublasWrapper
|
||||||
native.ops= org.nd4j.nativeblas.Nd4jCuda$NativeOps
|
native.ops= org.nd4j.nativeblas.Nd4jCuda
|
||||||
ndarrayfactory.class = org.nd4j.linalg.jcublas.JCublasNDArrayFactory
|
ndarrayfactory.class = org.nd4j.linalg.jcublas.JCublasNDArrayFactory
|
||||||
affinitymanager = org.nd4j.jita.concurrency.CudaAffinityManager
|
affinitymanager = org.nd4j.jita.concurrency.CudaAffinityManager
|
||||||
memorymanager = org.nd4j.jita.memory.CudaMemoryManager
|
memorymanager = org.nd4j.jita.memory.CudaMemoryManager
|
||||||
|
|
|
@ -392,7 +392,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
} else {
|
} else {
|
||||||
Variance var = (Variance) op;
|
Variance var = (Variance) op;
|
||||||
try {
|
try {
|
||||||
loop.execSummaryStats(null, op.opNum(),
|
loop.execSummaryStatsTad(null, op.opNum(),
|
||||||
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
||||||
null, null,
|
null, null,
|
||||||
getPointerForExtraArgs(op, op.z().dataType()),
|
getPointerForExtraArgs(op, op.z().dataType()),
|
||||||
|
@ -445,7 +445,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
null, null);
|
null, null);
|
||||||
} else {
|
} else {
|
||||||
try {
|
try {
|
||||||
loop.execReduce3(null, op.opNum(),
|
loop.execReduce3Tad(null, op.opNum(),
|
||||||
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
||||||
null, null,
|
null, null,
|
||||||
getPointerForExtraArgs(op, op.z().dataType()),
|
getPointerForExtraArgs(op, op.z().dataType()),
|
||||||
|
@ -505,7 +505,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
} else {
|
} else {
|
||||||
switch (op.getOpType()) {
|
switch (op.getOpType()) {
|
||||||
case REDUCE_FLOAT:
|
case REDUCE_FLOAT:
|
||||||
loop.execReduceFloat(null, op.opNum(),
|
loop.execReduceFloat2(null, op.opNum(),
|
||||||
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
||||||
null, null,
|
null, null,
|
||||||
getPointerForExtraArgs(op, op.z().dataType()),
|
getPointerForExtraArgs(op, op.z().dataType()),
|
||||||
|
@ -517,7 +517,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
null);
|
null);
|
||||||
break;
|
break;
|
||||||
case REDUCE_LONG:
|
case REDUCE_LONG:
|
||||||
loop.execReduceLong(null, op.opNum(),
|
loop.execReduceLong2(null, op.opNum(),
|
||||||
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
||||||
null, null,
|
null, null,
|
||||||
getPointerForExtraArgs(op, op.x().dataType()),
|
getPointerForExtraArgs(op, op.x().dataType()),
|
||||||
|
@ -529,7 +529,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
null);
|
null);
|
||||||
break;
|
break;
|
||||||
case REDUCE_SAME:
|
case REDUCE_SAME:
|
||||||
loop.execReduceSame(null, op.opNum(),
|
loop.execReduceSame2(null, op.opNum(),
|
||||||
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
||||||
null, null,
|
null, null,
|
||||||
getPointerForExtraArgs(op, op.z().dataType()),
|
getPointerForExtraArgs(op, op.z().dataType()),
|
||||||
|
@ -541,7 +541,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
null);
|
null);
|
||||||
break;
|
break;
|
||||||
case REDUCE_BOOL:
|
case REDUCE_BOOL:
|
||||||
loop.execReduceBool(null, op.opNum(),
|
loop.execReduceBool2(null, op.opNum(),
|
||||||
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
||||||
null, null,
|
null, null,
|
||||||
getPointerForExtraArgs(op, op.x().dataType()),
|
getPointerForExtraArgs(op, op.x().dataType()),
|
||||||
|
@ -604,7 +604,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
|
|
||||||
switch (op.getOpType()) {
|
switch (op.getOpType()) {
|
||||||
case SCALAR:
|
case SCALAR:
|
||||||
loop.execScalar(null, op.opNum(),
|
loop.execScalarTad(null, op.opNum(),
|
||||||
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
||||||
null, null,
|
null, null,
|
||||||
op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(),
|
op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(),
|
||||||
|
@ -620,7 +620,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
(LongPointer) devTadShapeInfoZ, (LongPointer) devTadOffsetsZ);
|
(LongPointer) devTadShapeInfoZ, (LongPointer) devTadOffsetsZ);
|
||||||
break;
|
break;
|
||||||
case SCALAR_BOOL:
|
case SCALAR_BOOL:
|
||||||
loop.execScalarBool(null, op.opNum(),
|
loop.execScalarBoolTad(null, op.opNum(),
|
||||||
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
||||||
null, null,
|
null, null,
|
||||||
op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(),
|
op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(),
|
||||||
|
@ -1255,7 +1255,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
|
|
||||||
if (op.x() != null && op.y() != null && op.z() != null) {
|
if (op.x() != null && op.y() != null && op.z() != null) {
|
||||||
// triple arg call
|
// triple arg call
|
||||||
loop.execRandom(null, op.opNum(), rng.getStatePointer(), // rng state ptr
|
loop.execRandom3(null, op.opNum(), rng.getStatePointer(), // rng state ptr
|
||||||
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
||||||
null, null,
|
null, null,
|
||||||
op.y().data().addressPointer(), (LongPointer) op.y().shapeInfoDataBuffer().addressPointer(),
|
op.y().data().addressPointer(), (LongPointer) op.y().shapeInfoDataBuffer().addressPointer(),
|
||||||
|
@ -1265,7 +1265,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
op.extraArgsDataBuff(op.z().dataType()).addressPointer());
|
op.extraArgsDataBuff(op.z().dataType()).addressPointer());
|
||||||
} else if (op.x() != null && op.z() != null) {
|
} else if (op.x() != null && op.z() != null) {
|
||||||
//double arg call
|
//double arg call
|
||||||
loop.execRandom(null, op.opNum(), rng.getStatePointer(), // rng state ptr
|
loop.execRandom2(null, op.opNum(), rng.getStatePointer(), // rng state ptr
|
||||||
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
op.x().data().addressPointer(), (LongPointer) op.x().shapeInfoDataBuffer().addressPointer(),
|
||||||
null, null,
|
null, null,
|
||||||
op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(),
|
op.z().data().addressPointer(), (LongPointer) op.z().shapeInfoDataBuffer().addressPointer(),
|
||||||
|
@ -1862,7 +1862,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
|
|
||||||
Nd4jCpu.ShapeList ptrptr;
|
Nd4jCpu.ShapeList ptrptr;
|
||||||
try {
|
try {
|
||||||
ptrptr = (Nd4jCpu.ShapeList) loop.calculateOutputShapes(null,
|
ptrptr = (Nd4jCpu.ShapeList) loop.calculateOutputShapes2(null,
|
||||||
hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs,
|
hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs,
|
||||||
op.numTArguments(), iArgs, op.numIArguments(), bArgs, op.numBArguments());
|
op.numTArguments(), iArgs, op.numIArguments(), bArgs, op.numBArguments());
|
||||||
} catch (Throwable t){
|
} catch (Throwable t){
|
||||||
|
@ -2070,7 +2070,7 @@ public class NativeOpExecutioner extends DefaultOpExecutioner {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
loop.execCustomOp(null, op.opHash(), context.contextPointer());
|
loop.execCustomOp2(null, op.opHash(), context.contextPointer());
|
||||||
|
|
||||||
if (context.getOutputArrays().isEmpty())
|
if (context.getOutputArrays().isEmpty())
|
||||||
return new INDArray[0];
|
return new INDArray[0];
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,20 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2015-2019 Skymind, Inc.
|
||||||
|
*
|
||||||
|
* This program and the accompanying materials are made available under the
|
||||||
|
* terms of the Apache License, Version 2.0 which is available at
|
||||||
|
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
******************************************************************************/
|
||||||
|
|
||||||
|
package org.nd4j.nativeblas;
|
||||||
|
|
||||||
|
public abstract class Nd4jCpuHelper extends Nd4jCpuPresets implements NativeOps {
|
||||||
|
}
|
|
@ -31,7 +31,7 @@ import java.util.Scanner;
|
||||||
*
|
*
|
||||||
* @author saudet
|
* @author saudet
|
||||||
*/
|
*/
|
||||||
@Properties(target = "org.nd4j.nativeblas.Nd4jCpu",
|
@Properties(target = "org.nd4j.nativeblas.Nd4jCpu", helper = "org.nd4j.nativeblas.Nd4jCpuHelper",
|
||||||
value = {@Platform(define = "LIBND4J_ALL_OPS", include = {
|
value = {@Platform(define = "LIBND4J_ALL_OPS", include = {
|
||||||
"memory/MemoryType.h",
|
"memory/MemoryType.h",
|
||||||
"array/DataType.h",
|
"array/DataType.h",
|
||||||
|
@ -155,7 +155,7 @@ public class Nd4jCpuPresets implements InfoMapper, BuildEnabled {
|
||||||
public void map(InfoMap infoMap) {
|
public void map(InfoMap infoMap) {
|
||||||
infoMap.put(new Info("thread_local", "ND4J_EXPORT", "INLINEDEF", "CUBLASWINAPI", "FORCEINLINE",
|
infoMap.put(new Info("thread_local", "ND4J_EXPORT", "INLINEDEF", "CUBLASWINAPI", "FORCEINLINE",
|
||||||
"_CUDA_H", "_CUDA_D", "_CUDA_G", "_CUDA_HD", "LIBND4J_ALL_OPS", "NOT_EXCLUDED").cppTypes().annotations())
|
"_CUDA_H", "_CUDA_D", "_CUDA_G", "_CUDA_HD", "LIBND4J_ALL_OPS", "NOT_EXCLUDED").cppTypes().annotations())
|
||||||
.put(new Info("NativeOps").base("org.nd4j.nativeblas.NativeOps"))
|
.put(new Info("NativeOps.h").objectify())
|
||||||
.put(new Info("const char").valueTypes("byte").pointerTypes("@Cast(\"char*\") String",
|
.put(new Info("const char").valueTypes("byte").pointerTypes("@Cast(\"char*\") String",
|
||||||
"@Cast(\"char*\") BytePointer"))
|
"@Cast(\"char*\") BytePointer"))
|
||||||
.put(new Info("char").valueTypes("char").pointerTypes("@Cast(\"char*\") BytePointer",
|
.put(new Info("char").valueTypes("char").pointerTypes("@Cast(\"char*\") BytePointer",
|
||||||
|
|
|
@ -25,7 +25,7 @@ dtype = float
|
||||||
complex.double.class = org.nd4j.linalg.cpu.nativecpu.complex.ComplexDouble
|
complex.double.class = org.nd4j.linalg.cpu.nativecpu.complex.ComplexDouble
|
||||||
blas.ops = org.nd4j.linalg.cpu.nativecpu.BlasWrapper
|
blas.ops = org.nd4j.linalg.cpu.nativecpu.BlasWrapper
|
||||||
sparseblas.ops = org.nd4j.linalg.cpu.nativecpu.SparseBlasWrapper
|
sparseblas.ops = org.nd4j.linalg.cpu.nativecpu.SparseBlasWrapper
|
||||||
native.ops= org.nd4j.nativeblas.Nd4jCpu$NativeOps
|
native.ops= org.nd4j.nativeblas.Nd4jCpu
|
||||||
ndarrayfactory.class = org.nd4j.linalg.cpu.nativecpu.CpuNDArrayFactory
|
ndarrayfactory.class = org.nd4j.linalg.cpu.nativecpu.CpuNDArrayFactory
|
||||||
sparsendarrayfactory.class = org.nd4j.linalg.cpu.nativecpu.CpuSparseNDArrayFactory
|
sparsendarrayfactory.class = org.nd4j.linalg.cpu.nativecpu.CpuSparseNDArrayFactory
|
||||||
ndarray.order = c
|
ndarray.order = c
|
||||||
|
|
4
pom.xml
4
pom.xml
|
@ -288,7 +288,7 @@
|
||||||
<javacpp.platform.extension/> <!-- -Djavacpp.platform.extension=-avx512 -->
|
<javacpp.platform.extension/> <!-- -Djavacpp.platform.extension=-avx512 -->
|
||||||
<javacpp.platform.properties>${javacpp.platform}</javacpp.platform.properties>
|
<javacpp.platform.properties>${javacpp.platform}</javacpp.platform.properties>
|
||||||
|
|
||||||
<javacpp.version>1.5.1</javacpp.version>
|
<javacpp.version>1.5.2-SNAPSHOT</javacpp.version>
|
||||||
<javacpp-presets.version>1.5.1</javacpp-presets.version>
|
<javacpp-presets.version>1.5.1</javacpp-presets.version>
|
||||||
<javacv.version>1.5.1</javacv.version>
|
<javacv.version>1.5.1</javacv.version>
|
||||||
|
|
||||||
|
@ -298,7 +298,7 @@
|
||||||
<openblas.version>0.3.6</openblas.version>
|
<openblas.version>0.3.6</openblas.version>
|
||||||
<mkl.version>2019.4</mkl.version>
|
<mkl.version>2019.4</mkl.version>
|
||||||
<mkl-dnn.version>0.20</mkl-dnn.version>
|
<mkl-dnn.version>0.20</mkl-dnn.version>
|
||||||
<mkl-dnn.javacpp.version>${mkl-dnn.version}-${javacpp.version}</mkl-dnn.javacpp.version>
|
<mkl-dnn.javacpp.version>${mkl-dnn.version}-${javacpp-presets.version}</mkl-dnn.javacpp.version>
|
||||||
<opencv.version>4.1.0</opencv.version>
|
<opencv.version>4.1.0</opencv.version>
|
||||||
<ffmpeg.version>4.1.3</ffmpeg.version>
|
<ffmpeg.version>4.1.3</ffmpeg.version>
|
||||||
<leptonica.version>1.78.0</leptonica.version>
|
<leptonica.version>1.78.0</leptonica.version>
|
||||||
|
|
Loading…
Reference in New Issue