Perf improvements (#242)

* initial commit

Signed-off-by: raver119 <raver119@gmail.com>

* meh

Signed-off-by: raver119 <raver119@gmail.com>

* better ExpandDims impl

Signed-off-by: raver119 <raver119@gmail.com>

* better Squeeze impl

Signed-off-by: raver119 <raver119@gmail.com>

* better Softmax impl

Signed-off-by: raver119 <raver119@gmail.com>

* one test disabled

Signed-off-by: raver119 <raver119@gmail.com>

* more accurate impl

Signed-off-by: raver119 <raver119@gmail.com>

* - GraphProfiler now prints full shapeInfo instead of shape
- softmax typo fix

Signed-off-by: raver119 <raver119@gmail.com>
master
raver119 2020-02-14 16:20:31 +03:00 committed by GitHub
parent 6e6289b6b9
commit 9e3c1b02b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 822 additions and 172 deletions

View File

@ -1088,8 +1088,23 @@ namespace nd4j {
if (e < node->input()->size() - 1)
nd4j_printf(", ", "");
}
if (node->opType() == OpType_CUSTOM) {
auto ctx = node->protoContext();
if (ctx->getIArguments()->size() > 0) {
printf("]; iArgs: [");
for (int e = 0; e < ctx->getIArguments()->size(); e++) {
printf("%i", ctx->getIArguments()->at(e));
if (e < ctx->getIArguments()->size() - 1)
nd4j_printf(", ", "");
}
}
}
nd4j_printf("]; \n", "");
// printf("\n");
fflush(stdout);
}

View File

@ -117,11 +117,11 @@ namespace nd4j {
}
void NodeProfile::addInputShape(Nd4jLong *shapeInfo) {
_inputShapes.emplace_back(ShapeUtils::shapeAsString(shapeInfo));
_inputShapes.emplace_back(ShapeUtils::shapeInfoAsString(shapeInfo));
}
void NodeProfile::addOutputShape(Nd4jLong *shapeInfo) {
_outputShapes.emplace_back(ShapeUtils::shapeAsString(shapeInfo));
_outputShapes.emplace_back(ShapeUtils::shapeInfoAsString(shapeInfo));
}
void NodeProfile::merge(NodeProfile *other) {

View File

@ -97,6 +97,8 @@ namespace nd4j {
static std::string shapeAsString(const int rank, const Nd4jLong* shapeInfo);
static std::string strideAsString(const NDArray* array);
static std::string shapeInfoAsString(const Nd4jLong* shapeInfo);
static std::vector<Nd4jLong> shapeAsVector(const Nd4jLong* shapeInfo);
// evaluate shapeInfo for diagonal array which is made using input arr elements as diagonal

View File

@ -666,6 +666,26 @@ Nd4jLong* ShapeUtils::evalTileShapeInfo(const NDArray& arr, const std::vector<Nd
return result;
}
std::string ShapeUtils::shapeInfoAsString(const Nd4jLong* shapeInfo) {
if(!shapeInfo)
throw std::runtime_error("ShapeUtils::shapeAsString method: input shapeInfo must not be nullptr !");
std::string result;
int len = shape::shapeInfoLength(shapeInfo[0]);
result.append("[");
for (int e = 0; e < len; e++) {
result += flatbuffers::NumToString(shapeInfo[e]);
if (e < len - 1)
result.append(", ");
}
result.append("]");
return result;
}
std::string ShapeUtils::shapeAsString(const int rank, const Nd4jLong* shapeInfo) {
if(!shapeInfo)

View File

@ -47,11 +47,12 @@ namespace nd4j {
shape.insert(shape.begin() + axis, 1);
auto tmp = input->reshape(input->ordering(), shape);
output->assign(tmp);
STORE_RESULT(output);
if (input->ews() == 1 && output->ews() == 1 && input->ordering() == output->ordering()) {
output->dataBuffer()->copyBufferFrom(*input->dataBuffer().get(), output->lengthOf() * DataTypeUtils::sizeOfElement(output->dataType()), 0, input->bufferOffset());
} else {
auto tmp = input->reshape(input->ordering(), shape);
output->assign(tmp);
}
return Status::OK();
}

View File

@ -25,7 +25,7 @@
namespace nd4j {
namespace ops {
CUSTOM_OP_IMPL(squeeze, 1, 1, true, 0, -2) {
CUSTOM_OP_IMPL(squeeze, 1, 1, false, 0, -2) {
auto input = INPUT_VARIABLE(0);
auto output = OUTPUT_VARIABLE(0);
@ -73,8 +73,12 @@ namespace nd4j {
if (block.isInplace()) {
output->reshapei(input->ordering(), shape, false);
} else {
auto tmp = input->reshape(input->ordering(), shape);
output->assign(tmp);
if (input->ews() == 1 && output->ews() == 1 && input->ordering() == output->ordering()) {
output->dataBuffer()->copyBufferFrom(*input->dataBuffer().get(), output->lengthOf() * DataTypeUtils::sizeOfElement(output->dataType()), 0, input->bufferOffset());
} else {
auto tmp = input->reshape(input->ordering(), shape);
output->assign(tmp);
}
}
return Status::OK();

View File

@ -191,6 +191,70 @@ void softMaxForVector(nd4j::LaunchContext * context, const NDArray& input, NDArr
BUILD_SINGLE_SELECTOR(xType, logSoftMaxForVector_, (input.getBuffer(), input.getShapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES);
}
template <typename T>
void softmax_loop(T *input, T *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen);
template <>
FORCEINLINE void softmax_loop(float *input, float *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) {
auto func = PRAGMA_THREADS_FOR {
for (auto i = start; i < stop; i++) {
auto inBuff = input + offsets[i];
auto outBuff = output + offsets[i];
float max = -DataTypeUtils::max<float>();
float sum = 0.f;
#pragma omp simd reduction(max:max)
for (uint j = 0; j < tadLen; ++j)
max = nd4j::math::nd4j_max<float>(max, inBuff[j]);
#pragma omp simd reduction(+:sum)
for (uint j = 0; j < tadLen; ++j) {
float temp = nd4j::math::nd4j_exp<float, float>(inBuff[j] - max);
outBuff[j] = temp;
sum += temp;
}
#pragma omp simd
for (uint j = 0; j < tadLen; ++j)
outBuff[j] /= sum;
}
};
samediff::Threads::parallel_tad(func,0, numOfSubArrs);
}
template <typename T>
FORCEINLINE void softmax_loop(T *input, T *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) {
auto func = PRAGMA_THREADS_FOR {
for (auto i = start; i < stop; i++) {
auto inBuff = input + offsets[i];
auto outBuff = output + offsets[i];
T max = -DataTypeUtils::max<T>();
T sum(0.f);
#pragma omp simd reduction(maxT:max)
for (uint j = 0; j < tadLen; ++j)
max = nd4j::math::nd4j_max<T>(max, inBuff[j]);
#pragma omp simd reduction(sumT:sum)
for (uint j = 0; j < tadLen; ++j) {
T temp = nd4j::math::nd4j_exp<T, T>(inBuff[j] - max);
outBuff[j] = temp;
sum += temp;
}
#pragma omp simd
for (uint j = 0; j < tadLen; ++j)
outBuff[j] /= sum;
}
};
samediff::Threads::parallel_tad(func,0, numOfSubArrs);
}
//////////////////////////////////////////////////////////////////////////
template <typename T>
static void softmax_(nd4j::LaunchContext * context, const NDArray& input, NDArray& output, const int dimension) {
@ -213,31 +277,10 @@ static void softmax_(nd4j::LaunchContext * context, const NDArray& input, NDArra
const uint tadLen = shape::length(tadShapeInfo);
if(shape::elementWiseStride(tadShapeInfo) == 1){
T *inBuff = input.bufferAsT<T>();
T *outBuff = output.bufferAsT<T>();
auto func = PRAGMA_THREADS_FOR {
for (auto i = start; i < stop; i += increment) {
T *inBuff = input.bufferAsT<T>() + tadOffsets[i];
T *outBuff = output.bufferAsT<T>() + tadOffsets[i];
T max = -DataTypeUtils::max<T>();
T sum = 0;
for (uint j = 0; j < tadLen; ++j)
max = nd4j::math::nd4j_max<T>(max, inBuff[j]);
for (uint j = 0; j < tadLen; ++j) {
T temp = nd4j::math::nd4j_exp<T, T>(inBuff[j] - max);
outBuff[j] = temp;
sum += temp;
}
for (uint j = 0; j < tadLen; ++j)
outBuff[j] /= sum;
}
};
samediff::Threads::parallel_tad(func,0, numOfSubArrs);
softmax_loop(inBuff, outBuff, tadOffsets, numOfSubArrs, tadLen);
}
else {

View File

@ -127,6 +127,32 @@ namespace nd4j {
template<typename T, typename Z>
math_def inline Z nd4j_erfc(T num);
math_def inline int32_t floatToRawIntBits(float d) {
union {
float f;
int32_t i;
} tmp;
tmp.f = d;
return tmp.i;
}
math_def inline float intBitsToFloat(int32_t i) {
union {
float f;
int32_t i;
} tmp;
tmp.i = i;
return tmp.f;
}
math_def inline float mulsignf(float x, float y) {
return intBitsToFloat(floatToRawIntBits(x) ^ (floatToRawIntBits(y) & (1 << 31)));
}
math_def inline float copysignfk(float x, float y) {
return intBitsToFloat((floatToRawIntBits(x) & ~(1 << 31)) ^ (floatToRawIntBits(y) & (1 << 31)));
}
template<typename T, typename Z>
math_def inline Z nd4j_sigmoid(T val) {
return (Z) 1.0f / ((Z) 1.0f + nd4j_exp<T, Z>(-val));
@ -660,6 +686,11 @@ namespace nd4j {
* @param val2
* @return
*/
template <>
math_def inline float nd4j_pow(float val, float val2) {
return p_pow<float>(val, val2);
}
template <typename X, typename Y, typename Z>
math_def inline Z nd4j_pow(X val, Y val2) {
return p_pow<Z>(static_cast<Z>(val), static_cast<Z>(val2));
@ -767,10 +798,23 @@ namespace nd4j {
}
math_def inline float neu_tanh(float val, float sign) {
float e(M_E);
float av = sign * val;
auto p = nd4j::math::nd4j_pow<float, float, float>(e, -av * 2.f);
return (1 - p) / (1 + p);
}
template <>
math_def inline float nd4j_tanh(float val) {
float sign = copysignfk(1.0f, val);
return sign * neu_tanh(val, sign);
}
template <typename X, typename Z>
math_def inline Z nd4j_tanh(X val) {
return val <= 0 ? neg_tanh(val) : pos_tanh(val);
//return p_tanh<Z>(static_cast<Z>(val));
}
template <typename X, typename Z>

View File

@ -48,5 +48,16 @@ TEST_F(DeclarableOpsTests18, test_bitcast_1) {
auto status = op.execute({&x}, {&z}, {}, {(Nd4jLong) nd4j::DataType::INT64}, {});
ASSERT_EQ(Status::OK(), status);
ASSERT_EQ(e, z);
}
TEST_F(DeclarableOpsTests18, test_tanh_1) {
auto x = NDArrayFactory::create<float>('c', {8}, {0.23f, -0.23f, 0.35f, -0.35f, 0.64f, -0.64f, 100000.f, -100000.f});
auto z = x.ulike();
auto e = NDArrayFactory::create<float>('c', {8}, {0.226028f, -0.226028f, 0.336376f, -0.336376f, 0.564900f, -0.564900f, 1.f, -1.f});
nd4j::ops::tanh op;
op.execute({&x}, {&z});
ASSERT_EQ(e, z);
}

View File

@ -66,4 +66,14 @@ TEST_F(DeclarableOpsTests19, test_conv1d_bp_1) {
delete result;
}
TEST_F(DeclarableOpsTests19, test_squeeze_1) {
auto x = NDArrayFactory::create<double>('c', {3, 4, 1});
auto e = NDArrayFactory::create<double>('c', {3, 4});
int axis = 2;
nd4j::ops::squeeze op;
auto status = op.execute({&x}, {&e}, {axis});
ASSERT_EQ(Status::OK(), status);
}

View File

@ -169,6 +169,53 @@ TEST_F(PlaygroundTests, test_broadcast_1) {
}
*/
/*
TEST_F(PlaygroundTests, test_broadcast_1) {
int pool = 500;
std::vector<NDArray*> aX(pool);
std::vector<NDArray*> aY(pool);
std::vector<NDArray*> aZ(pool);
for (int e = 0; e < pool; e++) {
aX[e] = NDArrayFactory::create_<float>('c', {512, 3072});
aY[e] = NDArrayFactory::create_<float>('c', {768});
aZ[e] = NDArrayFactory::create_<float>('c', {512, 3072});
aX[e]->assign( (e+1) / 119);
aY[e]->assign( (e+3) / 119);
}
std::vector<Nd4jLong> values;
for (int e = 0; e < 1000; e++) {
auto x = aX[e < pool ? e : e % pool];
auto y = aY[e < pool ? e : e % pool];
auto z = aZ[e < pool ? e : e % pool];
auto timeStart = std::chrono::system_clock::now();
//x->applyTrueBroadcast(BroadcastOpsTuple::Multiply(), *y, *z);
x->applyTransform(transform::Tanh, *z, nullptr);
auto timeEnd = std::chrono::system_clock::now();
auto outerTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeStart).count();
values.emplace_back(outerTime);
}
std::sort(values.begin(), values.end());
nd4j_printf("Time: %lld us;\n", values[values.size() / 2]);
for (int e = 0; e < pool; e++) {
delete aX[e];
delete aY[e];
delete aZ[e];
}
}
*/
/*
TEST_F(PlaygroundTests, test_s_0) {

View File

@ -4250,14 +4250,20 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
* set new order and shape in case of suitable array length (in-place operation)
* order - order to set
* shape - shape to set
*
* copyToNewBuff - if true then old buffer will be copied to new buffer if last one will be allocated after reshaping
* if there was permute applied before or there are weird strides, then new buffer is allocated for array
*/
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape);
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape);
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector long[] shape);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongPointer shape);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongBuffer shape);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector long[] shape);
/**
@ -4267,8 +4273,11 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
*
* if permute have been applied before or there are weird strides, then new buffer is allocated for new array
*/
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape);
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape);
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector long[] shape);
/**
@ -6203,6 +6212,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
// #include <pointercast.h>
// #include <dll.h>
// #include <string>
// #include <vector>
@Namespace("nd4j::graph") @NoOffset public static class NodeProfile extends Pointer {
static { Loader.load(); }
/** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
@ -6235,11 +6245,20 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
public native void setObjectsSize(@Cast("Nd4jLong") long bytes);
public native void setTotalSize(@Cast("Nd4jLong") long bytes);
public native void addInputShape(@Cast("Nd4jLong*") LongPointer shapeInfo);
public native void addInputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo);
public native void addInputShape(@Cast("Nd4jLong*") long[] shapeInfo);
public native void addOutputShape(@Cast("Nd4jLong*") LongPointer shapeInfo);
public native void addOutputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo);
public native void addOutputShape(@Cast("Nd4jLong*") long[] shapeInfo);
public native @Cast("Nd4jLong") long getActivationsSize();
public native @Cast("Nd4jLong") long getTemporarySize();
public native @Cast("Nd4jLong") long getObjectsSize();
public native @Cast("Nd4jLong") long getTotalSize();
public native @Cast("Nd4jLong") long getExecutionTime();
public native @StdString @ByRef @Cast({"char*", "std::string*"}) BytePointer name();
public native void merge(NodeProfile other);
@ -6835,9 +6854,15 @@ public static final int PREALLOC_SIZE = 33554432;
@Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") LongBuffer oldShape, int newRank, @Cast("Nd4jLong*") LongBuffer newShape, @Cast("bool") boolean isFOrder);
@Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") long[] oldShape, int newRank, @Cast("Nd4jLong*") long[] newShape, @Cast("bool") boolean isFOrder);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") LongPointer oldShapeInfo, int newRank, @Cast("const Nd4jLong*") LongPointer newShape, @Cast("Nd4jLong*") LongPointer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") LongBuffer oldShapeInfo, int newRank, @Cast("const Nd4jLong*") LongBuffer newShape, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") long[] oldShapeInfo, int newRank, @Cast("const Nd4jLong*") long[] newShape, @Cast("Nd4jLong*") long[] newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongPointer oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") LongPointer newShape, @Cast("Nd4jLong*") LongPointer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongBuffer oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") LongBuffer newShape, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") long[] oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") long[] newShape, @Cast("Nd4jLong*") long[] newShapeInfo);
/**
* newShapeInfo contains rank, shape and order only, no strides/ews/type
*/
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongPointer oldShapeInfo, @Cast("Nd4jLong*") LongPointer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongBuffer oldShapeInfo, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") long[] oldShapeInfo, @Cast("Nd4jLong*") long[] newShapeInfo);
/**
* Get the shape info buffer
@ -7145,6 +7170,15 @@ public static final int PREALLOC_SIZE = 33554432;
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongPointer shapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongBuffer shapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") long[] shapeInfo);
/**
* shape - input inShape is shape only, not shapeInfo
* returns number of non-unity dimensions in inShape
*/
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") LongPointer inShape);
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") LongBuffer inShape);
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") long[] inShape);
/**
* Returns whether the
* given shape is a vector or not
@ -7163,9 +7197,9 @@ public static final int PREALLOC_SIZE = 33554432;
* Returns the shape portion of an information
* buffer
*/
@Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeOf(@Cast("Nd4jLong*") LongPointer buffer);
@Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeOf(@Cast("Nd4jLong*") LongBuffer buffer);
@Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeOf(@Cast("Nd4jLong*") long[] buffer);
@Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeOf(@Cast("Nd4jLong*") LongPointer shapeInfo);
@Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeOf(@Cast("Nd4jLong*") LongBuffer shapeInfo);
@Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeOf(@Cast("Nd4jLong*") long[] shapeInfo);
/**
* Return a copy of a buffer.
@ -7903,40 +7937,22 @@ public static final int PREALLOC_SIZE = 33554432;
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer offsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] offsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] offsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, @Cast("const Nd4jLong*") LongPointer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer zOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, @Cast("const Nd4jLong*") LongPointer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer zOffsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer zOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer zOffsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, @Cast("const Nd4jLong*") long[] zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] zOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, @Cast("const Nd4jLong*") long[] zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] zOffsets);
// ND4J_EXPORT void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const char order = 'c');
// ND4J_EXPORT void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const Nd4jLong* zShapeInfo, Nd4jLong*& zOffsets, const char order = 'c');
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") LongPointer buffer, byte order);
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") LongBuffer buffer, byte order);
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") long[] buffer, byte order);
// deduce element-wise stride
// if array is scalar or unit length vector then ews = 1
// if array is common vector then ews = stride of non-unity dimension
// if strides are normal set ews = 1, otherwise ews = 0
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong") long len);
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong") long len);
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong") long len);
// deduce order and element-wise stride
// if array is scalar or unit length vector then ews = 1 and order is preserved
// if array is common vector then ews = stride of non-unity dimension and order is preserved
// if strides are normal/contiguous then ews = 1 and corresponding order is set, otherwise ews = 0 and order is preserved
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongPointer shapeInfo);
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongBuffer shapeInfo);
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") long[] shapeInfo);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongPointer shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") LongPointer shapeNoUnities, @Cast("const Nd4jLong*") LongPointer stridesNoUnities);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongBuffer shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") LongBuffer shapeNoUnities, @Cast("const Nd4jLong*") LongBuffer stridesNoUnities);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") long[] shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") long[] shapeNoUnities, @Cast("const Nd4jLong*") long[] stridesNoUnities);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongPointer shapeInfo);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongBuffer shapeInfo);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") long[] shapeInfo);
/**
* processes whole set of sub-arrays
@ -7946,7 +7962,7 @@ public static final int PREALLOC_SIZE = 33554432;
* numOfSubArrs - number of sub-arrays, size of subArrOffsets is equal to numOfSubArrs
* dimsSize - size of dimsToExclude, if dimsSize = array rank or dimsSize = 0 it means sub-array is whole array, copy of wholeShapeInfo and one zero offset will be returned
* dimsToExclude - MUST BE SORTED, dimensions to evaluate sub-array along, i.e. when shape is [2,3,4,5] and dimsToExclude={0,2}, then there will be 8 sub-arrays with shape [3,5]
* subArrShapeInfo - output argument, contains shapeInfo common for all sub-arrays
* subArrShapeInfo - output argument, contains shapeInfo (same for all sub-arrays)
* subArrOffsets - output argument, contains successive sub-arrays offsets from original this-buffer
* keepUnitiesInShape - if false then eliminate unities from sub-array shapeInfo, for example {1,a,1,b} -> {a,b}
*/
@ -7957,6 +7973,24 @@ public static final int PREALLOC_SIZE = 33554432;
@Namespace("shape") public static native void calcSubArrShapeAndOffsets(@Cast("const Nd4jLong*") long[] wholeShapeInfo, @Cast("const Nd4jLong") long numOfSubArrs, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] subArrShapeInfo, @Cast("Nd4jLong*") long[] subArrOffsets, @Cast("bool") boolean keepUnitiesInShape/*=false*/);
@Namespace("shape") public static native void calcSubArrShapeAndOffsets(@Cast("const Nd4jLong*") long[] wholeShapeInfo, @Cast("const Nd4jLong") long numOfSubArrs, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] subArrShapeInfo, @Cast("Nd4jLong*") long[] subArrOffsets);
/**
* for example inShapeInfo is {3, 2,1,4, 4,4,1, 16384,1,99}
* then output shapeNoUnities will contain {2,4, 4,1} - that is only shape and strides, no rank/type/ews/order
* stridesNoUnities will point on strides in shapeNoUnities that is on {4,1}
* returns number of non-unity dimensions in inShapeInfo
* if there is no unities in inShapeInfo, then no copy procedure will be performed and shapeNoUnities/stridesNoUnities will point on corresponding places in inShapeInfo
*/
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongPointer inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef LongPointer stridesNoUnities);
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongBuffer inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer stridesNoUnities);
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") long[] inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef long[] stridesNoUnities);
/**
* for example inShapeInfo is {3, 2,1,3,1,4, 12,12,4,4,1, 16384,1,99}, dimsToExclude = {2,3}, dimsSize = 2
* then outShapeInfo will contain {3, 2,3,4, 12,4,1, 16384,1,99}
*/
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongPointer inShapeInfo, int dimsSize, @Const IntPointer dimsToExclude, @Cast("Nd4jLong*") LongPointer outShapeInfo);
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongBuffer inShapeInfo, int dimsSize, @Const IntBuffer dimsToExclude, @Cast("Nd4jLong*") LongBuffer outShapeInfo);
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") long[] inShapeInfo, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] outShapeInfo);
@ -8186,6 +8220,8 @@ public static final int PREALLOC_SIZE = 33554432;
* @param rank the rank of the shape
*/
//////////////////////////////////////////////////////////////////////
/**
* Returns whether the
* given shape is a vector or not
@ -8735,69 +8771,60 @@ public static final int PREALLOC_SIZE = 33554432;
// return true;
// }
// INLINEDEF _CUDA_H bool reshapeC(const int oldRank, const Nd4jLong* oldShapeInfo, const int newRank, const Nd4jLong* newShape, const bool isFOrder, Nd4jLong* newShapeInfo) {
//////////////////////////////////////////////////////////////////////
// INLINEDEF _CUDA_H bool reshapeC(const int oldRank, const Nd4jLong* oldShapeInfo, const int newRank, const Nd4jLong* newShape, Nd4jLong* newShapeInfo) {
// // PLEASE NOTE !: reshaping not-permuted (ews=1) array in f order (except insertion/elimination of unities) will definitely cause allocation of new buffer for array elements
// // also this function takes into account identical shapes automatically, namely in that case oldShapeInfo is completely copied to newShapeInfo
// const int newOrder = isFOrder ? 102 : 99;
// const int oldOrder = oldShapeInfo[2 * oldRank + 3];
// newShapeInfo[0] = newRank;
// memcpy(newShapeInfo + 1, newShape, newRank * sizeof(Nd4jLong));
// Nd4jLong* newStrides = shape::stride(newShapeInfo);
// const Nd4jLong* oldShape = shape::shapeOf(const_cast<Nd4jLong*>(oldShapeInfo));
// Nd4jLong* newStrides = shape::stride(newShapeInfo);
// const Nd4jLong* oldShape = shape::shapeOf(const_cast<Nd4jLong*>(oldShapeInfo));
// const Nd4jLong* oldStrides = shape::stride(const_cast<Nd4jLong*>(oldShapeInfo));
// int oldStart(0), oldStop(1), newStart(0), newStop(1), newDim, oldDim;
// Nd4jLong oldStart(0), oldStop(1), newStart(0), newStop(1), newDim, oldDim;
// while (newStart < newRank && oldStart < oldRank) {
// newDim = newShape[newStart];
// oldDim = oldShape[oldStart];
// while (newDim != oldDim)
// while (newDim != oldDim && newDim > 0 && oldDim > 0)
// if (newDim < oldDim) newDim *= newShape[newStop++];
// else oldDim *= oldShape[oldStop++];
// // ------ Check whether the original axes can be combined ------ //
// for (int i = oldStart; i < oldStop - 1; i++) {
// if(oldShape[i] == 1) { // ignore strides like {...,1,1,...}
// if(oldOrder == 102) ++oldStart;
// for (int step = 1, i = oldStart; i < oldStop - 1; ++i) {
// if(oldShape[i] == 1) // skip unity-dimension and its stride
// continue;
// }
// if(oldOrder == 102 && oldStrides[i + 1] != oldShape[i] * oldStrides[i])
// return false; // not contiguous enough
// if(oldOrder == 99 && oldStrides[i] != oldShape[i + 1] * oldStrides[i + 1])
// return false; // not contiguous enough
// while((i + step) < oldRank && oldShape[i + step] == 1)
// ++step; // skip following unity-dimensions and its strides if such are present
// if((i + step) < oldRank && oldStrides[i] != oldShape[i + step] * oldStrides[i + step])
// return false; // not contiguous enough
// }
// // ------ Calculate new strides for all axes currently worked with ------ //
// if(isFOrder) {
// newStrides[newStart] = oldStrides[oldStart];
// for (int i = newStart + 1; i < newStop; ++i)
// newStrides[i] = newStrides[i - 1] * newShape[i - 1];
// }
// else {
// newStrides[newStop - 1] = oldStrides[oldStop - 1];
// for (int i = newStop - 1; i > newStart; --i)
// newStrides[i - 1] = newStrides[i] * newShape[i];
// }
// newStrides[newStop - 1] = oldStrides[oldStop - 1];
// for (int i = newStop - 1; i > newStart; --i)
// newStrides[i - 1] = newStrides[i] * newShape[i];
// newStart = newStop++;
// oldStart = oldStop++;
// }
// newShapeInfo[2 * newRank + 3] = shape::order(oldShapeInfo); // order
// newShapeInfo[2 * newRank + 2] = shape::elementWiseStride(oldShapeInfo); // ews
// newShapeInfo[2 * newRank + 1] = shape::type(oldShapeInfo); // type
// // rest of strides should be unities (if there is remainder in strides space, that is newStart < newRank)
// for (int i = newStart; i < newRank; ++i)
// newStrides[i] = 1;
// newShapeInfo[2 * newRank + 3] = shape::order(oldShapeInfo); // order
// newShapeInfo[2 * newRank + 2] = shape::elementWiseStride(oldShapeInfo); // ews
// newShapeInfo[2 * newRank + 1] = shape::type(oldShapeInfo); // type
// return true;
// }
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
// this function checks the consistence of dimensions with array rank (negative dimensions, too large dimensions, too big number of dimensions)
@ -8838,9 +8865,198 @@ public static final int PREALLOC_SIZE = 33554432;
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
// INLINEDEF _CUDA_HD void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const Nd4jLong* zShapeInfo, Nd4jLong*& zOffsets, const char order) {
// // we assume all array have same length
// const Nd4jLong len = shape::length(xShapeInfo);
// const Nd4jLong xEws = shape::elementWiseStride(xShapeInfo);
// const Nd4jLong yEws = shape::elementWiseStride(yShapeInfo);
// const Nd4jLong zEws = shape::elementWiseStride(zShapeInfo);
// const char xOrder = shape::order(xShapeInfo);
// const char yOrder = shape::order(yShapeInfo);
// const char zOrder = shape::order(zShapeInfo);
// const bool shapesSame = shape::shapeEquals(xShapeInfo, yShapeInfo, zShapeInfo);
// if (xEws == 1 && yEws == 1 && zEws == 1 && xOrder == yOrder && xOrder == zOrder && (xOrder == 'c' || shapesSame)) {
// xOffsets = yOffsets = zOffsets = nullptr;
// }
// else if(xEws == 1 && yEws == 1 && xOrder == yOrder && (xOrder == 'c' || shape::shapeEquals(xShapeInfo, yShapeInfo))) {
// xOffsets = yOffsets = nullptr;
// zOffsets = new Nd4jLong[len];
// shape::calcOffsets(zShapeInfo, zOffsets, xOrder);
// }
// else if(xEws == 1 && zEws == 1 && xOrder == zOrder && (xOrder == 'c' || shape::shapeEquals(xShapeInfo, zShapeInfo))) {
// xOffsets = zOffsets = nullptr;
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
// }
// else if(yEws == 1 && zEws == 1 && yOrder == zOrder && (yOrder == 'c' || shape::shapeEquals(yShapeInfo, zShapeInfo))) {
// yOffsets = zOffsets = nullptr;
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
// }
// else if(xEws == 1) {
// xOffsets = nullptr;
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
// }
// PRAGMA_OMP_SECTION
// {
// zOffsets = new Nd4jLong[len];
// shape::calcOffsets(zShapeInfo, zOffsets, xOrder);
// }
// }
// }
// else if(yEws == 1) {
// yOffsets = nullptr;
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
// }
// PRAGMA_OMP_SECTION
// {
// zOffsets = new Nd4jLong[len];
// shape::calcOffsets(zShapeInfo, zOffsets, yOrder);
// }
// }
// }
// else if(zEws == 1) {
// zOffsets = nullptr;
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets, zOrder);
// }
// PRAGMA_OMP_SECTION
// {
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets, zOrder);
// }
// }
// }
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo, zShapeInfo)) {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// yOffsets = zOffsets = xOffsets;
// }
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) {
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// }
// PRAGMA_OMP_SECTION
// {
// zOffsets = new Nd4jLong[len];
// shape::calcOffsets(zShapeInfo, zOffsets);
// }
// }
// yOffsets = xOffsets;
// }
// else if(shape::haveSameShapeAndStrides(xShapeInfo, zShapeInfo)) {
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// }
// PRAGMA_OMP_SECTION
// {
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets);
// }
// }
// zOffsets = xOffsets;
// }
// else {
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// }
// PRAGMA_OMP_SECTION
// {
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets);
// }
// PRAGMA_OMP_SECTION
// {
// zOffsets = new Nd4jLong[len];
// shape::calcOffsets(zShapeInfo, zOffsets);
// }
// }
// }
// }
//////////////////////////////////////////////////////////////////////
// INLINEDEF _CUDA_HD void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const char order) {
// // we assume all array have same length
// const Nd4jLong len = shape::length(xShapeInfo);
// const Nd4jLong xEws = shape::elementWiseStride(xShapeInfo);
// const Nd4jLong yEws = shape::elementWiseStride(yShapeInfo);
// const char xOrder = shape::order(xShapeInfo);
// const char yOrder = shape::order(yShapeInfo);
// const bool shapesSame = shape::shapeEquals(xShapeInfo, yShapeInfo);
// if (xEws == 1 && yEws == 1 && xOrder == yOrder && (xOrder == 'c' || shapesSame)) {
// xOffsets = yOffsets = nullptr;
// }
// else if(xEws == 1) {
// xOffsets = nullptr;
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
// }
// else if(yEws == 1) {
// yOffsets = nullptr;
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
// }
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// yOffsets = xOffsets;
// }
// else {
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// }
// PRAGMA_OMP_SECTION
// {
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets);
// }
// }
// }
// }
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
@ -9065,6 +9281,9 @@ public static final int PREALLOC_SIZE = 33554432;
// returns TRUE if this op allows in-place execution
public native @Cast("bool") boolean allowsInplace();
// this method allows you to enable/disable inplace call for a given op
public native void allowInplace(@Cast("bool") boolean reallyAllow);
// this method returns opNum (applicable for legacy XYZ ops only)
public native int getOpNum();

View File

@ -4253,14 +4253,20 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
* set new order and shape in case of suitable array length (in-place operation)
* order - order to set
* shape - shape to set
*
* copyToNewBuff - if true then old buffer will be copied to new buffer if last one will be allocated after reshaping
* if there was permute applied before or there are weird strides, then new buffer is allocated for array
*/
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape);
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape);
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector long[] shape);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongPointer shape);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongBuffer shape);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector long[] shape);
/**
@ -4270,8 +4276,11 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
*
* if permute have been applied before or there are weird strides, then new buffer is allocated for new array
*/
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape);
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape);
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector long[] shape);
/**
@ -6206,6 +6215,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
// #include <pointercast.h>
// #include <dll.h>
// #include <string>
// #include <vector>
@Namespace("nd4j::graph") @NoOffset public static class NodeProfile extends Pointer {
static { Loader.load(); }
/** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
@ -6238,11 +6248,20 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
public native void setObjectsSize(@Cast("Nd4jLong") long bytes);
public native void setTotalSize(@Cast("Nd4jLong") long bytes);
public native void addInputShape(@Cast("Nd4jLong*") LongPointer shapeInfo);
public native void addInputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo);
public native void addInputShape(@Cast("Nd4jLong*") long[] shapeInfo);
public native void addOutputShape(@Cast("Nd4jLong*") LongPointer shapeInfo);
public native void addOutputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo);
public native void addOutputShape(@Cast("Nd4jLong*") long[] shapeInfo);
public native @Cast("Nd4jLong") long getActivationsSize();
public native @Cast("Nd4jLong") long getTemporarySize();
public native @Cast("Nd4jLong") long getObjectsSize();
public native @Cast("Nd4jLong") long getTotalSize();
public native @Cast("Nd4jLong") long getExecutionTime();
public native @StdString @ByRef @Cast({"char*", "std::string*"}) BytePointer name();
public native void merge(NodeProfile other);
@ -6838,9 +6857,15 @@ public static final int PREALLOC_SIZE = 33554432;
@Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") LongBuffer oldShape, int newRank, @Cast("Nd4jLong*") LongBuffer newShape, @Cast("bool") boolean isFOrder);
@Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") long[] oldShape, int newRank, @Cast("Nd4jLong*") long[] newShape, @Cast("bool") boolean isFOrder);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") LongPointer oldShapeInfo, int newRank, @Cast("const Nd4jLong*") LongPointer newShape, @Cast("Nd4jLong*") LongPointer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") LongBuffer oldShapeInfo, int newRank, @Cast("const Nd4jLong*") LongBuffer newShape, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") long[] oldShapeInfo, int newRank, @Cast("const Nd4jLong*") long[] newShape, @Cast("Nd4jLong*") long[] newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongPointer oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") LongPointer newShape, @Cast("Nd4jLong*") LongPointer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongBuffer oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") LongBuffer newShape, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") long[] oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") long[] newShape, @Cast("Nd4jLong*") long[] newShapeInfo);
/**
* newShapeInfo contains rank, shape and order only, no strides/ews/type
*/
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongPointer oldShapeInfo, @Cast("Nd4jLong*") LongPointer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongBuffer oldShapeInfo, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") long[] oldShapeInfo, @Cast("Nd4jLong*") long[] newShapeInfo);
/**
* Get the shape info buffer
@ -7148,6 +7173,15 @@ public static final int PREALLOC_SIZE = 33554432;
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongPointer shapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongBuffer shapeInfo);
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") long[] shapeInfo);
/**
* shape - input inShape is shape only, not shapeInfo
* returns number of non-unity dimensions in inShape
*/
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") LongPointer inShape);
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") LongBuffer inShape);
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") long[] inShape);
/**
* Returns whether the
* given shape is a vector or not
@ -7166,9 +7200,9 @@ public static final int PREALLOC_SIZE = 33554432;
* Returns the shape portion of an information
* buffer
*/
@Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeOf(@Cast("Nd4jLong*") LongPointer buffer);
@Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeOf(@Cast("Nd4jLong*") LongBuffer buffer);
@Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeOf(@Cast("Nd4jLong*") long[] buffer);
@Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeOf(@Cast("Nd4jLong*") LongPointer shapeInfo);
@Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeOf(@Cast("Nd4jLong*") LongBuffer shapeInfo);
@Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeOf(@Cast("Nd4jLong*") long[] shapeInfo);
/**
* Return a copy of a buffer.
@ -7906,40 +7940,22 @@ public static final int PREALLOC_SIZE = 33554432;
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer offsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] offsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] offsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, @Cast("const Nd4jLong*") LongPointer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer zOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, @Cast("const Nd4jLong*") LongPointer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer zOffsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer zOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer zOffsets);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, @Cast("const Nd4jLong*") long[] zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] zOffsets, byte order/*='c'*/);
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, @Cast("const Nd4jLong*") long[] zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] zOffsets);
// ND4J_EXPORT void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const char order = 'c');
// ND4J_EXPORT void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const Nd4jLong* zShapeInfo, Nd4jLong*& zOffsets, const char order = 'c');
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") LongPointer buffer, byte order);
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") LongBuffer buffer, byte order);
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") long[] buffer, byte order);
// deduce element-wise stride
// if array is scalar or unit length vector then ews = 1
// if array is common vector then ews = stride of non-unity dimension
// if strides are normal set ews = 1, otherwise ews = 0
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong") long len);
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong") long len);
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong") long len);
// deduce order and element-wise stride
// if array is scalar or unit length vector then ews = 1 and order is preserved
// if array is common vector then ews = stride of non-unity dimension and order is preserved
// if strides are normal/contiguous then ews = 1 and corresponding order is set, otherwise ews = 0 and order is preserved
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongPointer shapeInfo);
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongBuffer shapeInfo);
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") long[] shapeInfo);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongPointer shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") LongPointer shapeNoUnities, @Cast("const Nd4jLong*") LongPointer stridesNoUnities);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongBuffer shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") LongBuffer shapeNoUnities, @Cast("const Nd4jLong*") LongBuffer stridesNoUnities);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") long[] shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") long[] shapeNoUnities, @Cast("const Nd4jLong*") long[] stridesNoUnities);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongPointer shapeInfo);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongBuffer shapeInfo);
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") long[] shapeInfo);
/**
* processes whole set of sub-arrays
@ -7949,7 +7965,7 @@ public static final int PREALLOC_SIZE = 33554432;
* numOfSubArrs - number of sub-arrays, size of subArrOffsets is equal to numOfSubArrs
* dimsSize - size of dimsToExclude, if dimsSize = array rank or dimsSize = 0 it means sub-array is whole array, copy of wholeShapeInfo and one zero offset will be returned
* dimsToExclude - MUST BE SORTED, dimensions to evaluate sub-array along, i.e. when shape is [2,3,4,5] and dimsToExclude={0,2}, then there will be 8 sub-arrays with shape [3,5]
* subArrShapeInfo - output argument, contains shapeInfo common for all sub-arrays
* subArrShapeInfo - output argument, contains shapeInfo (same for all sub-arrays)
* subArrOffsets - output argument, contains successive sub-arrays offsets from original this-buffer
* keepUnitiesInShape - if false then eliminate unities from sub-array shapeInfo, for example {1,a,1,b} -> {a,b}
*/
@ -7960,6 +7976,24 @@ public static final int PREALLOC_SIZE = 33554432;
@Namespace("shape") public static native void calcSubArrShapeAndOffsets(@Cast("const Nd4jLong*") long[] wholeShapeInfo, @Cast("const Nd4jLong") long numOfSubArrs, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] subArrShapeInfo, @Cast("Nd4jLong*") long[] subArrOffsets, @Cast("bool") boolean keepUnitiesInShape/*=false*/);
@Namespace("shape") public static native void calcSubArrShapeAndOffsets(@Cast("const Nd4jLong*") long[] wholeShapeInfo, @Cast("const Nd4jLong") long numOfSubArrs, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] subArrShapeInfo, @Cast("Nd4jLong*") long[] subArrOffsets);
/**
* for example inShapeInfo is {3, 2,1,4, 4,4,1, 16384,1,99}
* then output shapeNoUnities will contain {2,4, 4,1} - that is only shape and strides, no rank/type/ews/order
* stridesNoUnities will point on strides in shapeNoUnities that is on {4,1}
* returns number of non-unity dimensions in inShapeInfo
* if there is no unities in inShapeInfo, then no copy procedure will be performed and shapeNoUnities/stridesNoUnities will point on corresponding places in inShapeInfo
*/
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongPointer inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef LongPointer stridesNoUnities);
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongBuffer inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer stridesNoUnities);
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") long[] inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef long[] stridesNoUnities);
/**
* for example inShapeInfo is {3, 2,1,3,1,4, 12,12,4,4,1, 16384,1,99}, dimsToExclude = {2,3}, dimsSize = 2
* then outShapeInfo will contain {3, 2,3,4, 12,4,1, 16384,1,99}
*/
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongPointer inShapeInfo, int dimsSize, @Const IntPointer dimsToExclude, @Cast("Nd4jLong*") LongPointer outShapeInfo);
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongBuffer inShapeInfo, int dimsSize, @Const IntBuffer dimsToExclude, @Cast("Nd4jLong*") LongBuffer outShapeInfo);
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") long[] inShapeInfo, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] outShapeInfo);
@ -8189,6 +8223,8 @@ public static final int PREALLOC_SIZE = 33554432;
* @param rank the rank of the shape
*/
//////////////////////////////////////////////////////////////////////
/**
* Returns whether the
* given shape is a vector or not
@ -8738,69 +8774,60 @@ public static final int PREALLOC_SIZE = 33554432;
// return true;
// }
// INLINEDEF _CUDA_H bool reshapeC(const int oldRank, const Nd4jLong* oldShapeInfo, const int newRank, const Nd4jLong* newShape, const bool isFOrder, Nd4jLong* newShapeInfo) {
//////////////////////////////////////////////////////////////////////
// INLINEDEF _CUDA_H bool reshapeC(const int oldRank, const Nd4jLong* oldShapeInfo, const int newRank, const Nd4jLong* newShape, Nd4jLong* newShapeInfo) {
// // PLEASE NOTE !: reshaping not-permuted (ews=1) array in f order (except insertion/elimination of unities) will definitely cause allocation of new buffer for array elements
// // also this function takes into account identical shapes automatically, namely in that case oldShapeInfo is completely copied to newShapeInfo
// const int newOrder = isFOrder ? 102 : 99;
// const int oldOrder = oldShapeInfo[2 * oldRank + 3];
// newShapeInfo[0] = newRank;
// memcpy(newShapeInfo + 1, newShape, newRank * sizeof(Nd4jLong));
// Nd4jLong* newStrides = shape::stride(newShapeInfo);
// const Nd4jLong* oldShape = shape::shapeOf(const_cast<Nd4jLong*>(oldShapeInfo));
// Nd4jLong* newStrides = shape::stride(newShapeInfo);
// const Nd4jLong* oldShape = shape::shapeOf(const_cast<Nd4jLong*>(oldShapeInfo));
// const Nd4jLong* oldStrides = shape::stride(const_cast<Nd4jLong*>(oldShapeInfo));
// int oldStart(0), oldStop(1), newStart(0), newStop(1), newDim, oldDim;
// Nd4jLong oldStart(0), oldStop(1), newStart(0), newStop(1), newDim, oldDim;
// while (newStart < newRank && oldStart < oldRank) {
// newDim = newShape[newStart];
// oldDim = oldShape[oldStart];
// while (newDim != oldDim)
// while (newDim != oldDim && newDim > 0 && oldDim > 0)
// if (newDim < oldDim) newDim *= newShape[newStop++];
// else oldDim *= oldShape[oldStop++];
// // ------ Check whether the original axes can be combined ------ //
// for (int i = oldStart; i < oldStop - 1; i++) {
// if(oldShape[i] == 1) { // ignore strides like {...,1,1,...}
// if(oldOrder == 102) ++oldStart;
// for (int step = 1, i = oldStart; i < oldStop - 1; ++i) {
// if(oldShape[i] == 1) // skip unity-dimension and its stride
// continue;
// }
// if(oldOrder == 102 && oldStrides[i + 1] != oldShape[i] * oldStrides[i])
// return false; // not contiguous enough
// if(oldOrder == 99 && oldStrides[i] != oldShape[i + 1] * oldStrides[i + 1])
// return false; // not contiguous enough
// while((i + step) < oldRank && oldShape[i + step] == 1)
// ++step; // skip following unity-dimensions and its strides if such are present
// if((i + step) < oldRank && oldStrides[i] != oldShape[i + step] * oldStrides[i + step])
// return false; // not contiguous enough
// }
// // ------ Calculate new strides for all axes currently worked with ------ //
// if(isFOrder) {
// newStrides[newStart] = oldStrides[oldStart];
// for (int i = newStart + 1; i < newStop; ++i)
// newStrides[i] = newStrides[i - 1] * newShape[i - 1];
// }
// else {
// newStrides[newStop - 1] = oldStrides[oldStop - 1];
// for (int i = newStop - 1; i > newStart; --i)
// newStrides[i - 1] = newStrides[i] * newShape[i];
// }
// newStrides[newStop - 1] = oldStrides[oldStop - 1];
// for (int i = newStop - 1; i > newStart; --i)
// newStrides[i - 1] = newStrides[i] * newShape[i];
// newStart = newStop++;
// oldStart = oldStop++;
// }
// newShapeInfo[2 * newRank + 3] = shape::order(oldShapeInfo); // order
// newShapeInfo[2 * newRank + 2] = shape::elementWiseStride(oldShapeInfo); // ews
// newShapeInfo[2 * newRank + 1] = shape::type(oldShapeInfo); // type
// // rest of strides should be unities (if there is remainder in strides space, that is newStart < newRank)
// for (int i = newStart; i < newRank; ++i)
// newStrides[i] = 1;
// newShapeInfo[2 * newRank + 3] = shape::order(oldShapeInfo); // order
// newShapeInfo[2 * newRank + 2] = shape::elementWiseStride(oldShapeInfo); // ews
// newShapeInfo[2 * newRank + 1] = shape::type(oldShapeInfo); // type
// return true;
// }
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
// this function checks the consistence of dimensions with array rank (negative dimensions, too large dimensions, too big number of dimensions)
@ -8841,9 +8868,198 @@ public static final int PREALLOC_SIZE = 33554432;
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
// INLINEDEF _CUDA_HD void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const Nd4jLong* zShapeInfo, Nd4jLong*& zOffsets, const char order) {
// // we assume all array have same length
// const Nd4jLong len = shape::length(xShapeInfo);
// const Nd4jLong xEws = shape::elementWiseStride(xShapeInfo);
// const Nd4jLong yEws = shape::elementWiseStride(yShapeInfo);
// const Nd4jLong zEws = shape::elementWiseStride(zShapeInfo);
// const char xOrder = shape::order(xShapeInfo);
// const char yOrder = shape::order(yShapeInfo);
// const char zOrder = shape::order(zShapeInfo);
// const bool shapesSame = shape::shapeEquals(xShapeInfo, yShapeInfo, zShapeInfo);
// if (xEws == 1 && yEws == 1 && zEws == 1 && xOrder == yOrder && xOrder == zOrder && (xOrder == 'c' || shapesSame)) {
// xOffsets = yOffsets = zOffsets = nullptr;
// }
// else if(xEws == 1 && yEws == 1 && xOrder == yOrder && (xOrder == 'c' || shape::shapeEquals(xShapeInfo, yShapeInfo))) {
// xOffsets = yOffsets = nullptr;
// zOffsets = new Nd4jLong[len];
// shape::calcOffsets(zShapeInfo, zOffsets, xOrder);
// }
// else if(xEws == 1 && zEws == 1 && xOrder == zOrder && (xOrder == 'c' || shape::shapeEquals(xShapeInfo, zShapeInfo))) {
// xOffsets = zOffsets = nullptr;
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
// }
// else if(yEws == 1 && zEws == 1 && yOrder == zOrder && (yOrder == 'c' || shape::shapeEquals(yShapeInfo, zShapeInfo))) {
// yOffsets = zOffsets = nullptr;
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
// }
// else if(xEws == 1) {
// xOffsets = nullptr;
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
// }
// PRAGMA_OMP_SECTION
// {
// zOffsets = new Nd4jLong[len];
// shape::calcOffsets(zShapeInfo, zOffsets, xOrder);
// }
// }
// }
// else if(yEws == 1) {
// yOffsets = nullptr;
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
// }
// PRAGMA_OMP_SECTION
// {
// zOffsets = new Nd4jLong[len];
// shape::calcOffsets(zShapeInfo, zOffsets, yOrder);
// }
// }
// }
// else if(zEws == 1) {
// zOffsets = nullptr;
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets, zOrder);
// }
// PRAGMA_OMP_SECTION
// {
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets, zOrder);
// }
// }
// }
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo, zShapeInfo)) {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// yOffsets = zOffsets = xOffsets;
// }
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) {
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// }
// PRAGMA_OMP_SECTION
// {
// zOffsets = new Nd4jLong[len];
// shape::calcOffsets(zShapeInfo, zOffsets);
// }
// }
// yOffsets = xOffsets;
// }
// else if(shape::haveSameShapeAndStrides(xShapeInfo, zShapeInfo)) {
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// }
// PRAGMA_OMP_SECTION
// {
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets);
// }
// }
// zOffsets = xOffsets;
// }
// else {
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// }
// PRAGMA_OMP_SECTION
// {
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets);
// }
// PRAGMA_OMP_SECTION
// {
// zOffsets = new Nd4jLong[len];
// shape::calcOffsets(zShapeInfo, zOffsets);
// }
// }
// }
// }
//////////////////////////////////////////////////////////////////////
// INLINEDEF _CUDA_HD void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const char order) {
// // we assume all array have same length
// const Nd4jLong len = shape::length(xShapeInfo);
// const Nd4jLong xEws = shape::elementWiseStride(xShapeInfo);
// const Nd4jLong yEws = shape::elementWiseStride(yShapeInfo);
// const char xOrder = shape::order(xShapeInfo);
// const char yOrder = shape::order(yShapeInfo);
// const bool shapesSame = shape::shapeEquals(xShapeInfo, yShapeInfo);
// if (xEws == 1 && yEws == 1 && xOrder == yOrder && (xOrder == 'c' || shapesSame)) {
// xOffsets = yOffsets = nullptr;
// }
// else if(xEws == 1) {
// xOffsets = nullptr;
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
// }
// else if(yEws == 1) {
// yOffsets = nullptr;
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
// }
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// yOffsets = xOffsets;
// }
// else {
// PRAGMA_OMP_PARALLEL_SECTIONS
// {
// PRAGMA_OMP_SECTION
// {
// xOffsets = new Nd4jLong[len];
// shape::calcOffsets(xShapeInfo, xOffsets);
// }
// PRAGMA_OMP_SECTION
// {
// yOffsets = new Nd4jLong[len];
// shape::calcOffsets(yShapeInfo, yOffsets);
// }
// }
// }
// }
//////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////
@ -11382,6 +11598,9 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
// returns TRUE if this op allows in-place execution
public native @Cast("bool") boolean allowsInplace();
// this method allows you to enable/disable inplace call for a given op
public native void allowInplace(@Cast("bool") boolean reallyAllow);
// this method returns opNum (applicable for legacy XYZ ops only)
public native int getOpNum();
@ -21093,7 +21312,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
public permute() { super((Pointer)null); allocate(); }
private native void allocate();
public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
}
}
// #endif
// #if NOT_EXCLUDED(OP_reshapeas)
@ -21111,7 +21330,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
public reshapeas() { super((Pointer)null); allocate(); }
private native void allocate();
public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
}
}
// #endif
// #if NOT_EXCLUDED(OP_transpose)
@ -22222,7 +22441,22 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
public tensormmul() { super((Pointer)null); allocate(); }
private native void allocate();
public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
}
}
@Namespace("nd4j::ops") public static class tensormmul_bp extends DeclarableCustomOp {
static { Loader.load(); }
/** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
public tensormmul_bp(Pointer p) { super(p); }
/** Native array allocator. Access with {@link Pointer#position(long)}. */
public tensormmul_bp(long size) { super((Pointer)null); allocateArray(size); }
private native void allocateArray(long size);
@Override public tensormmul_bp position(long position) {
return (tensormmul_bp)super.position(position);
}
public tensormmul_bp() { super((Pointer)null); allocate(); }
private native void allocate();
public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
}
// #endif
/**