Perf improvements (#242)
* initial commit Signed-off-by: raver119 <raver119@gmail.com> * meh Signed-off-by: raver119 <raver119@gmail.com> * better ExpandDims impl Signed-off-by: raver119 <raver119@gmail.com> * better Squeeze impl Signed-off-by: raver119 <raver119@gmail.com> * better Softmax impl Signed-off-by: raver119 <raver119@gmail.com> * one test disabled Signed-off-by: raver119 <raver119@gmail.com> * more accurate impl Signed-off-by: raver119 <raver119@gmail.com> * - GraphProfiler now prints full shapeInfo instead of shape - softmax typo fix Signed-off-by: raver119 <raver119@gmail.com>master
parent
6e6289b6b9
commit
9e3c1b02b1
|
@ -1088,8 +1088,23 @@ namespace nd4j {
|
|||
if (e < node->input()->size() - 1)
|
||||
nd4j_printf(", ", "");
|
||||
}
|
||||
|
||||
if (node->opType() == OpType_CUSTOM) {
|
||||
auto ctx = node->protoContext();
|
||||
if (ctx->getIArguments()->size() > 0) {
|
||||
printf("]; iArgs: [");
|
||||
|
||||
for (int e = 0; e < ctx->getIArguments()->size(); e++) {
|
||||
printf("%i", ctx->getIArguments()->at(e));
|
||||
if (e < ctx->getIArguments()->size() - 1)
|
||||
nd4j_printf(", ", "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nd4j_printf("]; \n", "");
|
||||
|
||||
|
||||
// printf("\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
|
|
@ -117,11 +117,11 @@ namespace nd4j {
|
|||
}
|
||||
|
||||
void NodeProfile::addInputShape(Nd4jLong *shapeInfo) {
|
||||
_inputShapes.emplace_back(ShapeUtils::shapeAsString(shapeInfo));
|
||||
_inputShapes.emplace_back(ShapeUtils::shapeInfoAsString(shapeInfo));
|
||||
}
|
||||
|
||||
void NodeProfile::addOutputShape(Nd4jLong *shapeInfo) {
|
||||
_outputShapes.emplace_back(ShapeUtils::shapeAsString(shapeInfo));
|
||||
_outputShapes.emplace_back(ShapeUtils::shapeInfoAsString(shapeInfo));
|
||||
}
|
||||
|
||||
void NodeProfile::merge(NodeProfile *other) {
|
||||
|
|
|
@ -97,6 +97,8 @@ namespace nd4j {
|
|||
static std::string shapeAsString(const int rank, const Nd4jLong* shapeInfo);
|
||||
static std::string strideAsString(const NDArray* array);
|
||||
|
||||
static std::string shapeInfoAsString(const Nd4jLong* shapeInfo);
|
||||
|
||||
static std::vector<Nd4jLong> shapeAsVector(const Nd4jLong* shapeInfo);
|
||||
|
||||
// evaluate shapeInfo for diagonal array which is made using input arr elements as diagonal
|
||||
|
|
|
@ -666,6 +666,26 @@ Nd4jLong* ShapeUtils::evalTileShapeInfo(const NDArray& arr, const std::vector<Nd
|
|||
return result;
|
||||
}
|
||||
|
||||
std::string ShapeUtils::shapeInfoAsString(const Nd4jLong* shapeInfo) {
|
||||
|
||||
if(!shapeInfo)
|
||||
throw std::runtime_error("ShapeUtils::shapeAsString method: input shapeInfo must not be nullptr !");
|
||||
|
||||
std::string result;
|
||||
|
||||
int len = shape::shapeInfoLength(shapeInfo[0]);
|
||||
|
||||
result.append("[");
|
||||
for (int e = 0; e < len; e++) {
|
||||
result += flatbuffers::NumToString(shapeInfo[e]);
|
||||
if (e < len - 1)
|
||||
result.append(", ");
|
||||
}
|
||||
result.append("]");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
std::string ShapeUtils::shapeAsString(const int rank, const Nd4jLong* shapeInfo) {
|
||||
if(!shapeInfo)
|
||||
|
|
|
@ -47,11 +47,12 @@ namespace nd4j {
|
|||
|
||||
shape.insert(shape.begin() + axis, 1);
|
||||
|
||||
if (input->ews() == 1 && output->ews() == 1 && input->ordering() == output->ordering()) {
|
||||
output->dataBuffer()->copyBufferFrom(*input->dataBuffer().get(), output->lengthOf() * DataTypeUtils::sizeOfElement(output->dataType()), 0, input->bufferOffset());
|
||||
} else {
|
||||
auto tmp = input->reshape(input->ordering(), shape);
|
||||
output->assign(tmp);
|
||||
|
||||
STORE_RESULT(output);
|
||||
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
namespace nd4j {
|
||||
namespace ops {
|
||||
CUSTOM_OP_IMPL(squeeze, 1, 1, true, 0, -2) {
|
||||
CUSTOM_OP_IMPL(squeeze, 1, 1, false, 0, -2) {
|
||||
auto input = INPUT_VARIABLE(0);
|
||||
auto output = OUTPUT_VARIABLE(0);
|
||||
|
||||
|
@ -72,10 +72,14 @@ namespace nd4j {
|
|||
|
||||
if (block.isInplace()) {
|
||||
output->reshapei(input->ordering(), shape, false);
|
||||
} else {
|
||||
if (input->ews() == 1 && output->ews() == 1 && input->ordering() == output->ordering()) {
|
||||
output->dataBuffer()->copyBufferFrom(*input->dataBuffer().get(), output->lengthOf() * DataTypeUtils::sizeOfElement(output->dataType()), 0, input->bufferOffset());
|
||||
} else {
|
||||
auto tmp = input->reshape(input->ordering(), shape);
|
||||
output->assign(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -191,6 +191,70 @@ void softMaxForVector(nd4j::LaunchContext * context, const NDArray& input, NDArr
|
|||
BUILD_SINGLE_SELECTOR(xType, logSoftMaxForVector_, (input.getBuffer(), input.getShapeInfo(), output.buffer(), output.shapeInfo()), FLOAT_TYPES);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void softmax_loop(T *input, T *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen);
|
||||
|
||||
template <>
|
||||
FORCEINLINE void softmax_loop(float *input, float *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) {
|
||||
auto func = PRAGMA_THREADS_FOR {
|
||||
for (auto i = start; i < stop; i++) {
|
||||
auto inBuff = input + offsets[i];
|
||||
auto outBuff = output + offsets[i];
|
||||
|
||||
float max = -DataTypeUtils::max<float>();
|
||||
float sum = 0.f;
|
||||
|
||||
#pragma omp simd reduction(max:max)
|
||||
for (uint j = 0; j < tadLen; ++j)
|
||||
max = nd4j::math::nd4j_max<float>(max, inBuff[j]);
|
||||
|
||||
#pragma omp simd reduction(+:sum)
|
||||
for (uint j = 0; j < tadLen; ++j) {
|
||||
float temp = nd4j::math::nd4j_exp<float, float>(inBuff[j] - max);
|
||||
outBuff[j] = temp;
|
||||
sum += temp;
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (uint j = 0; j < tadLen; ++j)
|
||||
outBuff[j] /= sum;
|
||||
}
|
||||
};
|
||||
|
||||
samediff::Threads::parallel_tad(func,0, numOfSubArrs);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
FORCEINLINE void softmax_loop(T *input, T *output, Nd4jLong *offsets, Nd4jLong numOfSubArrs, uint32_t tadLen) {
|
||||
auto func = PRAGMA_THREADS_FOR {
|
||||
for (auto i = start; i < stop; i++) {
|
||||
auto inBuff = input + offsets[i];
|
||||
auto outBuff = output + offsets[i];
|
||||
|
||||
T max = -DataTypeUtils::max<T>();
|
||||
T sum(0.f);
|
||||
|
||||
#pragma omp simd reduction(maxT:max)
|
||||
for (uint j = 0; j < tadLen; ++j)
|
||||
max = nd4j::math::nd4j_max<T>(max, inBuff[j]);
|
||||
|
||||
#pragma omp simd reduction(sumT:sum)
|
||||
for (uint j = 0; j < tadLen; ++j) {
|
||||
T temp = nd4j::math::nd4j_exp<T, T>(inBuff[j] - max);
|
||||
outBuff[j] = temp;
|
||||
sum += temp;
|
||||
}
|
||||
|
||||
#pragma omp simd
|
||||
for (uint j = 0; j < tadLen; ++j)
|
||||
outBuff[j] /= sum;
|
||||
}
|
||||
};
|
||||
|
||||
samediff::Threads::parallel_tad(func,0, numOfSubArrs);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
template <typename T>
|
||||
static void softmax_(nd4j::LaunchContext * context, const NDArray& input, NDArray& output, const int dimension) {
|
||||
|
@ -213,31 +277,10 @@ static void softmax_(nd4j::LaunchContext * context, const NDArray& input, NDArra
|
|||
const uint tadLen = shape::length(tadShapeInfo);
|
||||
|
||||
if(shape::elementWiseStride(tadShapeInfo) == 1){
|
||||
T *inBuff = input.bufferAsT<T>();
|
||||
T *outBuff = output.bufferAsT<T>();
|
||||
|
||||
auto func = PRAGMA_THREADS_FOR {
|
||||
for (auto i = start; i < stop; i += increment) {
|
||||
|
||||
T *inBuff = input.bufferAsT<T>() + tadOffsets[i];
|
||||
T *outBuff = output.bufferAsT<T>() + tadOffsets[i];
|
||||
|
||||
T max = -DataTypeUtils::max<T>();
|
||||
T sum = 0;
|
||||
|
||||
for (uint j = 0; j < tadLen; ++j)
|
||||
max = nd4j::math::nd4j_max<T>(max, inBuff[j]);
|
||||
|
||||
for (uint j = 0; j < tadLen; ++j) {
|
||||
T temp = nd4j::math::nd4j_exp<T, T>(inBuff[j] - max);
|
||||
outBuff[j] = temp;
|
||||
sum += temp;
|
||||
}
|
||||
|
||||
for (uint j = 0; j < tadLen; ++j)
|
||||
outBuff[j] /= sum;
|
||||
}
|
||||
};
|
||||
|
||||
samediff::Threads::parallel_tad(func,0, numOfSubArrs);
|
||||
softmax_loop(inBuff, outBuff, tadOffsets, numOfSubArrs, tadLen);
|
||||
}
|
||||
else {
|
||||
|
||||
|
|
|
@ -127,6 +127,32 @@ namespace nd4j {
|
|||
template<typename T, typename Z>
|
||||
math_def inline Z nd4j_erfc(T num);
|
||||
|
||||
math_def inline int32_t floatToRawIntBits(float d) {
|
||||
union {
|
||||
float f;
|
||||
int32_t i;
|
||||
} tmp;
|
||||
tmp.f = d;
|
||||
return tmp.i;
|
||||
}
|
||||
|
||||
math_def inline float intBitsToFloat(int32_t i) {
|
||||
union {
|
||||
float f;
|
||||
int32_t i;
|
||||
} tmp;
|
||||
tmp.i = i;
|
||||
return tmp.f;
|
||||
}
|
||||
|
||||
math_def inline float mulsignf(float x, float y) {
|
||||
return intBitsToFloat(floatToRawIntBits(x) ^ (floatToRawIntBits(y) & (1 << 31)));
|
||||
}
|
||||
|
||||
math_def inline float copysignfk(float x, float y) {
|
||||
return intBitsToFloat((floatToRawIntBits(x) & ~(1 << 31)) ^ (floatToRawIntBits(y) & (1 << 31)));
|
||||
}
|
||||
|
||||
template<typename T, typename Z>
|
||||
math_def inline Z nd4j_sigmoid(T val) {
|
||||
return (Z) 1.0f / ((Z) 1.0f + nd4j_exp<T, Z>(-val));
|
||||
|
@ -660,6 +686,11 @@ namespace nd4j {
|
|||
* @param val2
|
||||
* @return
|
||||
*/
|
||||
template <>
|
||||
math_def inline float nd4j_pow(float val, float val2) {
|
||||
return p_pow<float>(val, val2);
|
||||
}
|
||||
|
||||
template <typename X, typename Y, typename Z>
|
||||
math_def inline Z nd4j_pow(X val, Y val2) {
|
||||
return p_pow<Z>(static_cast<Z>(val), static_cast<Z>(val2));
|
||||
|
@ -767,10 +798,23 @@ namespace nd4j {
|
|||
}
|
||||
|
||||
|
||||
math_def inline float neu_tanh(float val, float sign) {
|
||||
float e(M_E);
|
||||
float av = sign * val;
|
||||
auto p = nd4j::math::nd4j_pow<float, float, float>(e, -av * 2.f);
|
||||
return (1 - p) / (1 + p);
|
||||
}
|
||||
|
||||
template <>
|
||||
math_def inline float nd4j_tanh(float val) {
|
||||
float sign = copysignfk(1.0f, val);
|
||||
return sign * neu_tanh(val, sign);
|
||||
}
|
||||
|
||||
|
||||
template <typename X, typename Z>
|
||||
math_def inline Z nd4j_tanh(X val) {
|
||||
return val <= 0 ? neg_tanh(val) : pos_tanh(val);
|
||||
//return p_tanh<Z>(static_cast<Z>(val));
|
||||
}
|
||||
|
||||
template <typename X, typename Z>
|
||||
|
|
|
@ -50,3 +50,14 @@ TEST_F(DeclarableOpsTests18, test_bitcast_1) {
|
|||
|
||||
ASSERT_EQ(e, z);
|
||||
}
|
||||
|
||||
TEST_F(DeclarableOpsTests18, test_tanh_1) {
|
||||
auto x = NDArrayFactory::create<float>('c', {8}, {0.23f, -0.23f, 0.35f, -0.35f, 0.64f, -0.64f, 100000.f, -100000.f});
|
||||
auto z = x.ulike();
|
||||
auto e = NDArrayFactory::create<float>('c', {8}, {0.226028f, -0.226028f, 0.336376f, -0.336376f, 0.564900f, -0.564900f, 1.f, -1.f});
|
||||
|
||||
nd4j::ops::tanh op;
|
||||
op.execute({&x}, {&z});
|
||||
|
||||
ASSERT_EQ(e, z);
|
||||
}
|
|
@ -67,3 +67,13 @@ TEST_F(DeclarableOpsTests19, test_conv1d_bp_1) {
|
|||
|
||||
delete result;
|
||||
}
|
||||
|
||||
TEST_F(DeclarableOpsTests19, test_squeeze_1) {
|
||||
auto x = NDArrayFactory::create<double>('c', {3, 4, 1});
|
||||
auto e = NDArrayFactory::create<double>('c', {3, 4});
|
||||
int axis = 2;
|
||||
|
||||
nd4j::ops::squeeze op;
|
||||
auto status = op.execute({&x}, {&e}, {axis});
|
||||
ASSERT_EQ(Status::OK(), status);
|
||||
}
|
|
@ -169,6 +169,53 @@ TEST_F(PlaygroundTests, test_broadcast_1) {
|
|||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
TEST_F(PlaygroundTests, test_broadcast_1) {
|
||||
int pool = 500;
|
||||
std::vector<NDArray*> aX(pool);
|
||||
std::vector<NDArray*> aY(pool);
|
||||
std::vector<NDArray*> aZ(pool);
|
||||
|
||||
for (int e = 0; e < pool; e++) {
|
||||
aX[e] = NDArrayFactory::create_<float>('c', {512, 3072});
|
||||
aY[e] = NDArrayFactory::create_<float>('c', {768});
|
||||
aZ[e] = NDArrayFactory::create_<float>('c', {512, 3072});
|
||||
|
||||
aX[e]->assign( (e+1) / 119);
|
||||
aY[e]->assign( (e+3) / 119);
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::vector<Nd4jLong> values;
|
||||
|
||||
for (int e = 0; e < 1000; e++) {
|
||||
auto x = aX[e < pool ? e : e % pool];
|
||||
auto y = aY[e < pool ? e : e % pool];
|
||||
auto z = aZ[e < pool ? e : e % pool];
|
||||
|
||||
auto timeStart = std::chrono::system_clock::now();
|
||||
|
||||
//x->applyTrueBroadcast(BroadcastOpsTuple::Multiply(), *y, *z);
|
||||
x->applyTransform(transform::Tanh, *z, nullptr);
|
||||
|
||||
auto timeEnd = std::chrono::system_clock::now();
|
||||
auto outerTime = std::chrono::duration_cast<std::chrono::microseconds>(timeEnd - timeStart).count();
|
||||
values.emplace_back(outerTime);
|
||||
}
|
||||
|
||||
std::sort(values.begin(), values.end());
|
||||
|
||||
nd4j_printf("Time: %lld us;\n", values[values.size() / 2]);
|
||||
|
||||
for (int e = 0; e < pool; e++) {
|
||||
delete aX[e];
|
||||
delete aY[e];
|
||||
delete aZ[e];
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
/*
|
||||
|
||||
TEST_F(PlaygroundTests, test_s_0) {
|
||||
|
|
|
@ -4250,14 +4250,20 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
|
|||
* set new order and shape in case of suitable array length (in-place operation)
|
||||
* order - order to set
|
||||
* shape - shape to set
|
||||
*
|
||||
* copyToNewBuff - if true then old buffer will be copied to new buffer if last one will be allocated after reshaping
|
||||
* if there was permute applied before or there are weird strides, then new buffer is allocated for array
|
||||
*/
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape);
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape);
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector long[] shape);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongPointer shape);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongBuffer shape);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector long[] shape);
|
||||
|
||||
/**
|
||||
|
@ -4267,8 +4273,11 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
|
|||
*
|
||||
* if permute have been applied before or there are weird strides, then new buffer is allocated for new array
|
||||
*/
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape);
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape);
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector long[] shape);
|
||||
|
||||
/**
|
||||
|
@ -6203,6 +6212,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
|
|||
// #include <pointercast.h>
|
||||
// #include <dll.h>
|
||||
// #include <string>
|
||||
// #include <vector>
|
||||
@Namespace("nd4j::graph") @NoOffset public static class NodeProfile extends Pointer {
|
||||
static { Loader.load(); }
|
||||
/** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
|
||||
|
@ -6235,11 +6245,20 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
|
|||
public native void setObjectsSize(@Cast("Nd4jLong") long bytes);
|
||||
public native void setTotalSize(@Cast("Nd4jLong") long bytes);
|
||||
|
||||
public native void addInputShape(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
public native void addInputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
public native void addInputShape(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
public native void addOutputShape(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
public native void addOutputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
public native void addOutputShape(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
|
||||
public native @Cast("Nd4jLong") long getActivationsSize();
|
||||
public native @Cast("Nd4jLong") long getTemporarySize();
|
||||
public native @Cast("Nd4jLong") long getObjectsSize();
|
||||
public native @Cast("Nd4jLong") long getTotalSize();
|
||||
|
||||
public native @Cast("Nd4jLong") long getExecutionTime();
|
||||
|
||||
public native @StdString @ByRef @Cast({"char*", "std::string*"}) BytePointer name();
|
||||
|
||||
public native void merge(NodeProfile other);
|
||||
|
@ -6835,9 +6854,15 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
@Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") LongBuffer oldShape, int newRank, @Cast("Nd4jLong*") LongBuffer newShape, @Cast("bool") boolean isFOrder);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") long[] oldShape, int newRank, @Cast("Nd4jLong*") long[] newShape, @Cast("bool") boolean isFOrder);
|
||||
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") LongPointer oldShapeInfo, int newRank, @Cast("const Nd4jLong*") LongPointer newShape, @Cast("Nd4jLong*") LongPointer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") LongBuffer oldShapeInfo, int newRank, @Cast("const Nd4jLong*") LongBuffer newShape, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") long[] oldShapeInfo, int newRank, @Cast("const Nd4jLong*") long[] newShape, @Cast("Nd4jLong*") long[] newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongPointer oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") LongPointer newShape, @Cast("Nd4jLong*") LongPointer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongBuffer oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") LongBuffer newShape, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") long[] oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") long[] newShape, @Cast("Nd4jLong*") long[] newShapeInfo);
|
||||
/**
|
||||
* newShapeInfo contains rank, shape and order only, no strides/ews/type
|
||||
*/
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongPointer oldShapeInfo, @Cast("Nd4jLong*") LongPointer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongBuffer oldShapeInfo, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") long[] oldShapeInfo, @Cast("Nd4jLong*") long[] newShapeInfo);
|
||||
|
||||
/**
|
||||
* Get the shape info buffer
|
||||
|
@ -7145,6 +7170,15 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
|
||||
/**
|
||||
* shape - input inShape is shape only, not shapeInfo
|
||||
* returns number of non-unity dimensions in inShape
|
||||
*/
|
||||
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") LongPointer inShape);
|
||||
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") LongBuffer inShape);
|
||||
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") long[] inShape);
|
||||
|
||||
/**
|
||||
* Returns whether the
|
||||
* given shape is a vector or not
|
||||
|
@ -7163,9 +7197,9 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
* Returns the shape portion of an information
|
||||
* buffer
|
||||
*/
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeOf(@Cast("Nd4jLong*") LongPointer buffer);
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeOf(@Cast("Nd4jLong*") LongBuffer buffer);
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeOf(@Cast("Nd4jLong*") long[] buffer);
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeOf(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeOf(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeOf(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
|
||||
/**
|
||||
* Return a copy of a buffer.
|
||||
|
@ -7903,40 +7937,22 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer offsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] offsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] offsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, @Cast("const Nd4jLong*") LongPointer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer zOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, @Cast("const Nd4jLong*") LongPointer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer zOffsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer zOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer zOffsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, @Cast("const Nd4jLong*") long[] zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] zOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, @Cast("const Nd4jLong*") long[] zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] zOffsets);
|
||||
// ND4J_EXPORT void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const char order = 'c');
|
||||
// ND4J_EXPORT void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const Nd4jLong* zShapeInfo, Nd4jLong*& zOffsets, const char order = 'c');
|
||||
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") LongPointer buffer, byte order);
|
||||
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") LongBuffer buffer, byte order);
|
||||
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") long[] buffer, byte order);
|
||||
|
||||
// deduce element-wise stride
|
||||
// if array is scalar or unit length vector then ews = 1
|
||||
// if array is common vector then ews = stride of non-unity dimension
|
||||
// if strides are normal set ews = 1, otherwise ews = 0
|
||||
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong") long len);
|
||||
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong") long len);
|
||||
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong") long len);
|
||||
|
||||
// deduce order and element-wise stride
|
||||
// if array is scalar or unit length vector then ews = 1 and order is preserved
|
||||
// if array is common vector then ews = stride of non-unity dimension and order is preserved
|
||||
// if strides are normal/contiguous then ews = 1 and corresponding order is set, otherwise ews = 0 and order is preserved
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongPointer shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") LongPointer shapeNoUnities, @Cast("const Nd4jLong*") LongPointer stridesNoUnities);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongBuffer shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") LongBuffer shapeNoUnities, @Cast("const Nd4jLong*") LongBuffer stridesNoUnities);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") long[] shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") long[] shapeNoUnities, @Cast("const Nd4jLong*") long[] stridesNoUnities);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
|
||||
/**
|
||||
* processes whole set of sub-arrays
|
||||
|
@ -7946,7 +7962,7 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
* numOfSubArrs - number of sub-arrays, size of subArrOffsets is equal to numOfSubArrs
|
||||
* dimsSize - size of dimsToExclude, if dimsSize = array rank or dimsSize = 0 it means sub-array is whole array, copy of wholeShapeInfo and one zero offset will be returned
|
||||
* dimsToExclude - MUST BE SORTED, dimensions to evaluate sub-array along, i.e. when shape is [2,3,4,5] and dimsToExclude={0,2}, then there will be 8 sub-arrays with shape [3,5]
|
||||
* subArrShapeInfo - output argument, contains shapeInfo common for all sub-arrays
|
||||
* subArrShapeInfo - output argument, contains shapeInfo (same for all sub-arrays)
|
||||
* subArrOffsets - output argument, contains successive sub-arrays offsets from original this-buffer
|
||||
* keepUnitiesInShape - if false then eliminate unities from sub-array shapeInfo, for example {1,a,1,b} -> {a,b}
|
||||
*/
|
||||
|
@ -7957,6 +7973,24 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
@Namespace("shape") public static native void calcSubArrShapeAndOffsets(@Cast("const Nd4jLong*") long[] wholeShapeInfo, @Cast("const Nd4jLong") long numOfSubArrs, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] subArrShapeInfo, @Cast("Nd4jLong*") long[] subArrOffsets, @Cast("bool") boolean keepUnitiesInShape/*=false*/);
|
||||
@Namespace("shape") public static native void calcSubArrShapeAndOffsets(@Cast("const Nd4jLong*") long[] wholeShapeInfo, @Cast("const Nd4jLong") long numOfSubArrs, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] subArrShapeInfo, @Cast("Nd4jLong*") long[] subArrOffsets);
|
||||
|
||||
/**
|
||||
* for example inShapeInfo is {3, 2,1,4, 4,4,1, 16384,1,99}
|
||||
* then output shapeNoUnities will contain {2,4, 4,1} - that is only shape and strides, no rank/type/ews/order
|
||||
* stridesNoUnities will point on strides in shapeNoUnities that is on {4,1}
|
||||
* returns number of non-unity dimensions in inShapeInfo
|
||||
* if there is no unities in inShapeInfo, then no copy procedure will be performed and shapeNoUnities/stridesNoUnities will point on corresponding places in inShapeInfo
|
||||
*/
|
||||
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongPointer inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef LongPointer stridesNoUnities);
|
||||
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongBuffer inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer stridesNoUnities);
|
||||
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") long[] inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef long[] stridesNoUnities);
|
||||
|
||||
/**
|
||||
* for example inShapeInfo is {3, 2,1,3,1,4, 12,12,4,4,1, 16384,1,99}, dimsToExclude = {2,3}, dimsSize = 2
|
||||
* then outShapeInfo will contain {3, 2,3,4, 12,4,1, 16384,1,99}
|
||||
*/
|
||||
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongPointer inShapeInfo, int dimsSize, @Const IntPointer dimsToExclude, @Cast("Nd4jLong*") LongPointer outShapeInfo);
|
||||
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongBuffer inShapeInfo, int dimsSize, @Const IntBuffer dimsToExclude, @Cast("Nd4jLong*") LongBuffer outShapeInfo);
|
||||
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") long[] inShapeInfo, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] outShapeInfo);
|
||||
|
||||
|
||||
|
||||
|
@ -8186,6 +8220,8 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
* @param rank the rank of the shape
|
||||
*/
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* Returns whether the
|
||||
* given shape is a vector or not
|
||||
|
@ -8735,62 +8771,51 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
// return true;
|
||||
// }
|
||||
|
||||
// INLINEDEF _CUDA_H bool reshapeC(const int oldRank, const Nd4jLong* oldShapeInfo, const int newRank, const Nd4jLong* newShape, const bool isFOrder, Nd4jLong* newShapeInfo) {
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// INLINEDEF _CUDA_H bool reshapeC(const int oldRank, const Nd4jLong* oldShapeInfo, const int newRank, const Nd4jLong* newShape, Nd4jLong* newShapeInfo) {
|
||||
|
||||
// // PLEASE NOTE !: reshaping not-permuted (ews=1) array in f order (except insertion/elimination of unities) will definitely cause allocation of new buffer for array elements
|
||||
// // also this function takes into account identical shapes automatically, namely in that case oldShapeInfo is completely copied to newShapeInfo
|
||||
|
||||
// const int newOrder = isFOrder ? 102 : 99;
|
||||
// const int oldOrder = oldShapeInfo[2 * oldRank + 3];
|
||||
|
||||
// newShapeInfo[0] = newRank;
|
||||
// memcpy(newShapeInfo + 1, newShape, newRank * sizeof(Nd4jLong));
|
||||
|
||||
// Nd4jLong* newStrides = shape::stride(newShapeInfo);
|
||||
// const Nd4jLong* oldShape = shape::shapeOf(const_cast<Nd4jLong*>(oldShapeInfo));
|
||||
// const Nd4jLong* oldStrides = shape::stride(const_cast<Nd4jLong*>(oldShapeInfo));
|
||||
// int oldStart(0), oldStop(1), newStart(0), newStop(1), newDim, oldDim;
|
||||
|
||||
// Nd4jLong oldStart(0), oldStop(1), newStart(0), newStop(1), newDim, oldDim;
|
||||
|
||||
// while (newStart < newRank && oldStart < oldRank) {
|
||||
|
||||
// newDim = newShape[newStart];
|
||||
// oldDim = oldShape[oldStart];
|
||||
|
||||
// while (newDim != oldDim)
|
||||
// while (newDim != oldDim && newDim > 0 && oldDim > 0)
|
||||
// if (newDim < oldDim) newDim *= newShape[newStop++];
|
||||
// else oldDim *= oldShape[oldStop++];
|
||||
|
||||
// // ------ Check whether the original axes can be combined ------ //
|
||||
// for (int i = oldStart; i < oldStop - 1; i++) {
|
||||
|
||||
// if(oldShape[i] == 1) { // ignore strides like {...,1,1,...}
|
||||
// if(oldOrder == 102) ++oldStart;
|
||||
// for (int step = 1, i = oldStart; i < oldStop - 1; ++i) {
|
||||
// if(oldShape[i] == 1) // skip unity-dimension and its stride
|
||||
// continue;
|
||||
// }
|
||||
|
||||
// if(oldOrder == 102 && oldStrides[i + 1] != oldShape[i] * oldStrides[i])
|
||||
// return false; // not contiguous enough
|
||||
// if(oldOrder == 99 && oldStrides[i] != oldShape[i + 1] * oldStrides[i + 1])
|
||||
// while((i + step) < oldRank && oldShape[i + step] == 1)
|
||||
// ++step; // skip following unity-dimensions and its strides if such are present
|
||||
// if((i + step) < oldRank && oldStrides[i] != oldShape[i + step] * oldStrides[i + step])
|
||||
// return false; // not contiguous enough
|
||||
// }
|
||||
|
||||
// // ------ Calculate new strides for all axes currently worked with ------ //
|
||||
// if(isFOrder) {
|
||||
// newStrides[newStart] = oldStrides[oldStart];
|
||||
// for (int i = newStart + 1; i < newStop; ++i)
|
||||
// newStrides[i] = newStrides[i - 1] * newShape[i - 1];
|
||||
// }
|
||||
// else {
|
||||
// newStrides[newStop - 1] = oldStrides[oldStop - 1];
|
||||
// for (int i = newStop - 1; i > newStart; --i)
|
||||
// newStrides[i - 1] = newStrides[i] * newShape[i];
|
||||
// }
|
||||
|
||||
// newStart = newStop++;
|
||||
// oldStart = oldStop++;
|
||||
// }
|
||||
|
||||
// // rest of strides should be unities (if there is remainder in strides space, that is newStart < newRank)
|
||||
// for (int i = newStart; i < newRank; ++i)
|
||||
// newStrides[i] = 1;
|
||||
|
||||
// newShapeInfo[2 * newRank + 3] = shape::order(oldShapeInfo); // order
|
||||
// newShapeInfo[2 * newRank + 2] = shape::elementWiseStride(oldShapeInfo); // ews
|
||||
// newShapeInfo[2 * newRank + 1] = shape::type(oldShapeInfo); // type
|
||||
|
@ -8798,6 +8823,8 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
// return true;
|
||||
// }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
// this function checks the consistence of dimensions with array rank (negative dimensions, too large dimensions, too big number of dimensions)
|
||||
|
@ -8838,9 +8865,198 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// INLINEDEF _CUDA_HD void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const Nd4jLong* zShapeInfo, Nd4jLong*& zOffsets, const char order) {
|
||||
|
||||
// // we assume all array have same length
|
||||
// const Nd4jLong len = shape::length(xShapeInfo);
|
||||
|
||||
// const Nd4jLong xEws = shape::elementWiseStride(xShapeInfo);
|
||||
// const Nd4jLong yEws = shape::elementWiseStride(yShapeInfo);
|
||||
// const Nd4jLong zEws = shape::elementWiseStride(zShapeInfo);
|
||||
|
||||
// const char xOrder = shape::order(xShapeInfo);
|
||||
// const char yOrder = shape::order(yShapeInfo);
|
||||
// const char zOrder = shape::order(zShapeInfo);
|
||||
|
||||
// const bool shapesSame = shape::shapeEquals(xShapeInfo, yShapeInfo, zShapeInfo);
|
||||
|
||||
// if (xEws == 1 && yEws == 1 && zEws == 1 && xOrder == yOrder && xOrder == zOrder && (xOrder == 'c' || shapesSame)) {
|
||||
// xOffsets = yOffsets = zOffsets = nullptr;
|
||||
// }
|
||||
// else if(xEws == 1 && yEws == 1 && xOrder == yOrder && (xOrder == 'c' || shape::shapeEquals(xShapeInfo, yShapeInfo))) {
|
||||
// xOffsets = yOffsets = nullptr;
|
||||
// zOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(zShapeInfo, zOffsets, xOrder);
|
||||
// }
|
||||
// else if(xEws == 1 && zEws == 1 && xOrder == zOrder && (xOrder == 'c' || shape::shapeEquals(xShapeInfo, zShapeInfo))) {
|
||||
// xOffsets = zOffsets = nullptr;
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
|
||||
// }
|
||||
// else if(yEws == 1 && zEws == 1 && yOrder == zOrder && (yOrder == 'c' || shape::shapeEquals(yShapeInfo, zShapeInfo))) {
|
||||
// yOffsets = zOffsets = nullptr;
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
|
||||
// }
|
||||
// else if(xEws == 1) {
|
||||
// xOffsets = nullptr;
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// zOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(zShapeInfo, zOffsets, xOrder);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else if(yEws == 1) {
|
||||
// yOffsets = nullptr;
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// zOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(zShapeInfo, zOffsets, yOrder);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else if(zEws == 1) {
|
||||
// zOffsets = nullptr;
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets, zOrder);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets, zOrder);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo, zShapeInfo)) {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// yOffsets = zOffsets = xOffsets;
|
||||
// }
|
||||
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) {
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// zOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(zShapeInfo, zOffsets);
|
||||
// }
|
||||
// }
|
||||
// yOffsets = xOffsets;
|
||||
// }
|
||||
// else if(shape::haveSameShapeAndStrides(xShapeInfo, zShapeInfo)) {
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets);
|
||||
// }
|
||||
// }
|
||||
// zOffsets = xOffsets;
|
||||
// }
|
||||
// else {
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// zOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(zShapeInfo, zOffsets);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// INLINEDEF _CUDA_HD void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const char order) {
|
||||
|
||||
// // we assume all array have same length
|
||||
// const Nd4jLong len = shape::length(xShapeInfo);
|
||||
|
||||
// const Nd4jLong xEws = shape::elementWiseStride(xShapeInfo);
|
||||
// const Nd4jLong yEws = shape::elementWiseStride(yShapeInfo);
|
||||
|
||||
// const char xOrder = shape::order(xShapeInfo);
|
||||
// const char yOrder = shape::order(yShapeInfo);
|
||||
|
||||
// const bool shapesSame = shape::shapeEquals(xShapeInfo, yShapeInfo);
|
||||
|
||||
// if (xEws == 1 && yEws == 1 && xOrder == yOrder && (xOrder == 'c' || shapesSame)) {
|
||||
// xOffsets = yOffsets = nullptr;
|
||||
// }
|
||||
// else if(xEws == 1) {
|
||||
// xOffsets = nullptr;
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
|
||||
// }
|
||||
// else if(yEws == 1) {
|
||||
// yOffsets = nullptr;
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
|
||||
// }
|
||||
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// yOffsets = xOffsets;
|
||||
// }
|
||||
// else {
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
|
@ -9065,6 +9281,9 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
// returns TRUE if this op allows in-place execution
|
||||
public native @Cast("bool") boolean allowsInplace();
|
||||
|
||||
// this method allows you to enable/disable inplace call for a given op
|
||||
public native void allowInplace(@Cast("bool") boolean reallyAllow);
|
||||
|
||||
// this method returns opNum (applicable for legacy XYZ ops only)
|
||||
public native int getOpNum();
|
||||
|
||||
|
|
|
@ -4253,14 +4253,20 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
|
|||
* set new order and shape in case of suitable array length (in-place operation)
|
||||
* order - order to set
|
||||
* shape - shape to set
|
||||
*
|
||||
* copyToNewBuff - if true then old buffer will be copied to new buffer if last one will be allocated after reshaping
|
||||
* if there was permute applied before or there are weird strides, then new buffer is allocated for array
|
||||
*/
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape);
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape);
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(byte order, @Cast("Nd4jLong*") @StdVector long[] shape);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongPointer shape);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector LongBuffer shape);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @Cast("bool") boolean reshapei(@Cast("Nd4jLong*") @StdVector long[] shape);
|
||||
|
||||
/**
|
||||
|
@ -4270,8 +4276,11 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
|
|||
*
|
||||
* if permute have been applied before or there are weird strides, then new buffer is allocated for new array
|
||||
*/
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongPointer shape);
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector LongBuffer shape);
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector long[] shape, @Cast("const bool") boolean copyToNewBuff/*=true*/);
|
||||
public native @ByVal NDArray reshape(byte order, @Cast("Nd4jLong*") @StdVector long[] shape);
|
||||
|
||||
/**
|
||||
|
@ -6206,6 +6215,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
|
|||
// #include <pointercast.h>
|
||||
// #include <dll.h>
|
||||
// #include <string>
|
||||
// #include <vector>
|
||||
@Namespace("nd4j::graph") @NoOffset public static class NodeProfile extends Pointer {
|
||||
static { Loader.load(); }
|
||||
/** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
|
||||
|
@ -6238,11 +6248,20 @@ public native @Cast("bool") boolean isOptimalRequirementsMet();
|
|||
public native void setObjectsSize(@Cast("Nd4jLong") long bytes);
|
||||
public native void setTotalSize(@Cast("Nd4jLong") long bytes);
|
||||
|
||||
public native void addInputShape(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
public native void addInputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
public native void addInputShape(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
public native void addOutputShape(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
public native void addOutputShape(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
public native void addOutputShape(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
|
||||
public native @Cast("Nd4jLong") long getActivationsSize();
|
||||
public native @Cast("Nd4jLong") long getTemporarySize();
|
||||
public native @Cast("Nd4jLong") long getObjectsSize();
|
||||
public native @Cast("Nd4jLong") long getTotalSize();
|
||||
|
||||
public native @Cast("Nd4jLong") long getExecutionTime();
|
||||
|
||||
public native @StdString @ByRef @Cast({"char*", "std::string*"}) BytePointer name();
|
||||
|
||||
public native void merge(NodeProfile other);
|
||||
|
@ -6838,9 +6857,15 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
@Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") LongBuffer oldShape, int newRank, @Cast("Nd4jLong*") LongBuffer newShape, @Cast("bool") boolean isFOrder);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean canReshape(int oldRank, @Cast("Nd4jLong*") long[] oldShape, int newRank, @Cast("Nd4jLong*") long[] newShape, @Cast("bool") boolean isFOrder);
|
||||
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") LongPointer oldShapeInfo, int newRank, @Cast("const Nd4jLong*") LongPointer newShape, @Cast("Nd4jLong*") LongPointer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") LongBuffer oldShapeInfo, int newRank, @Cast("const Nd4jLong*") LongBuffer newShape, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(int oldRank, @Cast("const Nd4jLong*") long[] oldShapeInfo, int newRank, @Cast("const Nd4jLong*") long[] newShape, @Cast("Nd4jLong*") long[] newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongPointer oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") LongPointer newShape, @Cast("Nd4jLong*") LongPointer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongBuffer oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") LongBuffer newShape, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") long[] oldShapeInfo, byte newOrder, int newRank, @Cast("const Nd4jLong*") long[] newShape, @Cast("Nd4jLong*") long[] newShapeInfo);
|
||||
/**
|
||||
* newShapeInfo contains rank, shape and order only, no strides/ews/type
|
||||
*/
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongPointer oldShapeInfo, @Cast("Nd4jLong*") LongPointer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") LongBuffer oldShapeInfo, @Cast("Nd4jLong*") LongBuffer newShapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean reshapeC(@Cast("const Nd4jLong*") long[] oldShapeInfo, @Cast("Nd4jLong*") long[] newShapeInfo);
|
||||
|
||||
/**
|
||||
* Get the shape info buffer
|
||||
|
@ -7148,6 +7173,15 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
@Namespace("shape") public static native @Cast("bool") boolean isColumnVector(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
|
||||
/**
|
||||
* shape - input inShape is shape only, not shapeInfo
|
||||
* returns number of non-unity dimensions in inShape
|
||||
*/
|
||||
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") LongPointer inShape);
|
||||
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") LongBuffer inShape);
|
||||
@Namespace("shape") public static native int numOfNonUnitDims(int rank, @Cast("const Nd4jLong*") long[] inShape);
|
||||
|
||||
/**
|
||||
* Returns whether the
|
||||
* given shape is a vector or not
|
||||
|
@ -7166,9 +7200,9 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
* Returns the shape portion of an information
|
||||
* buffer
|
||||
*/
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeOf(@Cast("Nd4jLong*") LongPointer buffer);
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeOf(@Cast("Nd4jLong*") LongBuffer buffer);
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeOf(@Cast("Nd4jLong*") long[] buffer);
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") LongPointer shapeOf(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") LongBuffer shapeOf(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
@Namespace("shape") public static native @Cast("Nd4jLong*") long[] shapeOf(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
|
||||
/**
|
||||
* Return a copy of a buffer.
|
||||
|
@ -7906,40 +7940,22 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong*") LongBuffer offsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] offsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong*") long[] offsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, @Cast("const Nd4jLong*") LongPointer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer zOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongPointer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer xOffsets, @Cast("const Nd4jLong*") LongPointer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer yOffsets, @Cast("const Nd4jLong*") LongPointer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer zOffsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer zOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") LongBuffer xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer xOffsets, @Cast("const Nd4jLong*") LongBuffer yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer yOffsets, @Cast("const Nd4jLong*") LongBuffer zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer zOffsets);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, @Cast("const Nd4jLong*") long[] zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] zOffsets, byte order/*='c'*/);
|
||||
@Namespace("shape") public static native void calcOffsets(@Cast("const Nd4jLong*") long[] xShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] xOffsets, @Cast("const Nd4jLong*") long[] yShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] yOffsets, @Cast("const Nd4jLong*") long[] zShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] zOffsets);
|
||||
// ND4J_EXPORT void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const char order = 'c');
|
||||
// ND4J_EXPORT void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const Nd4jLong* zShapeInfo, Nd4jLong*& zOffsets, const char order = 'c');
|
||||
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") LongPointer buffer, byte order);
|
||||
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") LongBuffer buffer, byte order);
|
||||
@Namespace("shape") public static native void shapeOldScalar(@Cast("nd4j::DataType") int dtype, @Cast("Nd4jLong*const") long[] buffer, byte order);
|
||||
|
||||
// deduce element-wise stride
|
||||
// if array is scalar or unit length vector then ews = 1
|
||||
// if array is common vector then ews = stride of non-unity dimension
|
||||
// if strides are normal set ews = 1, otherwise ews = 0
|
||||
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong") long len);
|
||||
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong") long len);
|
||||
@Namespace("shape") public static native void setEws(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong") long len);
|
||||
|
||||
// deduce order and element-wise stride
|
||||
// if array is scalar or unit length vector then ews = 1 and order is preserved
|
||||
// if array is common vector then ews = stride of non-unity dimension and order is preserved
|
||||
// if strides are normal/contiguous then ews = 1 and corresponding order is set, otherwise ews = 0 and order is preserved
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongPointer shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongBuffer shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") long[] shapeInfo, @Cast("Nd4jLong") long len/*=-1*/);
|
||||
@Namespace("shape") public static native void setOrderAndEws(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongPointer shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") LongPointer shapeNoUnities, @Cast("const Nd4jLong*") LongPointer stridesNoUnities);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongBuffer shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") LongBuffer shapeNoUnities, @Cast("const Nd4jLong*") LongBuffer stridesNoUnities);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") long[] shapeInfo, byte proposedOrder, int numOfNonUnitDims, @Cast("const Nd4jLong*") long[] shapeNoUnities, @Cast("const Nd4jLong*") long[] stridesNoUnities);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongPointer shapeInfo);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") LongBuffer shapeInfo);
|
||||
@Namespace("shape") public static native void checkStridesSetEwsAndOrder(@Cast("Nd4jLong*") long[] shapeInfo);
|
||||
|
||||
/**
|
||||
* processes whole set of sub-arrays
|
||||
|
@ -7949,7 +7965,7 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
* numOfSubArrs - number of sub-arrays, size of subArrOffsets is equal to numOfSubArrs
|
||||
* dimsSize - size of dimsToExclude, if dimsSize = array rank or dimsSize = 0 it means sub-array is whole array, copy of wholeShapeInfo and one zero offset will be returned
|
||||
* dimsToExclude - MUST BE SORTED, dimensions to evaluate sub-array along, i.e. when shape is [2,3,4,5] and dimsToExclude={0,2}, then there will be 8 sub-arrays with shape [3,5]
|
||||
* subArrShapeInfo - output argument, contains shapeInfo common for all sub-arrays
|
||||
* subArrShapeInfo - output argument, contains shapeInfo (same for all sub-arrays)
|
||||
* subArrOffsets - output argument, contains successive sub-arrays offsets from original this-buffer
|
||||
* keepUnitiesInShape - if false then eliminate unities from sub-array shapeInfo, for example {1,a,1,b} -> {a,b}
|
||||
*/
|
||||
|
@ -7960,6 +7976,24 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
@Namespace("shape") public static native void calcSubArrShapeAndOffsets(@Cast("const Nd4jLong*") long[] wholeShapeInfo, @Cast("const Nd4jLong") long numOfSubArrs, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] subArrShapeInfo, @Cast("Nd4jLong*") long[] subArrOffsets, @Cast("bool") boolean keepUnitiesInShape/*=false*/);
|
||||
@Namespace("shape") public static native void calcSubArrShapeAndOffsets(@Cast("const Nd4jLong*") long[] wholeShapeInfo, @Cast("const Nd4jLong") long numOfSubArrs, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] subArrShapeInfo, @Cast("Nd4jLong*") long[] subArrOffsets);
|
||||
|
||||
/**
|
||||
* for example inShapeInfo is {3, 2,1,4, 4,4,1, 16384,1,99}
|
||||
* then output shapeNoUnities will contain {2,4, 4,1} - that is only shape and strides, no rank/type/ews/order
|
||||
* stridesNoUnities will point on strides in shapeNoUnities that is on {4,1}
|
||||
* returns number of non-unity dimensions in inShapeInfo
|
||||
* if there is no unities in inShapeInfo, then no copy procedure will be performed and shapeNoUnities/stridesNoUnities will point on corresponding places in inShapeInfo
|
||||
*/
|
||||
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongPointer inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongPointer shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef LongPointer stridesNoUnities);
|
||||
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongBuffer inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef LongBuffer stridesNoUnities);
|
||||
@Namespace("shape") public static native int excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") long[] inShapeInfo, @Cast("Nd4jLong*&") @ByPtrRef long[] shapeNoUnities, @Cast("Nd4jLong*&") @ByPtrRef long[] stridesNoUnities);
|
||||
|
||||
/**
|
||||
* for example inShapeInfo is {3, 2,1,3,1,4, 12,12,4,4,1, 16384,1,99}, dimsToExclude = {2,3}, dimsSize = 2
|
||||
* then outShapeInfo will contain {3, 2,3,4, 12,4,1, 16384,1,99}
|
||||
*/
|
||||
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongPointer inShapeInfo, int dimsSize, @Const IntPointer dimsToExclude, @Cast("Nd4jLong*") LongPointer outShapeInfo);
|
||||
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") LongBuffer inShapeInfo, int dimsSize, @Const IntBuffer dimsToExclude, @Cast("Nd4jLong*") LongBuffer outShapeInfo);
|
||||
@Namespace("shape") public static native void excludeUnitiesFromShapeInfo(@Cast("const Nd4jLong*") long[] inShapeInfo, int dimsSize, @Const int[] dimsToExclude, @Cast("Nd4jLong*") long[] outShapeInfo);
|
||||
|
||||
|
||||
|
||||
|
@ -8189,6 +8223,8 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
* @param rank the rank of the shape
|
||||
*/
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* Returns whether the
|
||||
* given shape is a vector or not
|
||||
|
@ -8738,62 +8774,51 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
// return true;
|
||||
// }
|
||||
|
||||
// INLINEDEF _CUDA_H bool reshapeC(const int oldRank, const Nd4jLong* oldShapeInfo, const int newRank, const Nd4jLong* newShape, const bool isFOrder, Nd4jLong* newShapeInfo) {
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// INLINEDEF _CUDA_H bool reshapeC(const int oldRank, const Nd4jLong* oldShapeInfo, const int newRank, const Nd4jLong* newShape, Nd4jLong* newShapeInfo) {
|
||||
|
||||
// // PLEASE NOTE !: reshaping not-permuted (ews=1) array in f order (except insertion/elimination of unities) will definitely cause allocation of new buffer for array elements
|
||||
// // also this function takes into account identical shapes automatically, namely in that case oldShapeInfo is completely copied to newShapeInfo
|
||||
|
||||
// const int newOrder = isFOrder ? 102 : 99;
|
||||
// const int oldOrder = oldShapeInfo[2 * oldRank + 3];
|
||||
|
||||
// newShapeInfo[0] = newRank;
|
||||
// memcpy(newShapeInfo + 1, newShape, newRank * sizeof(Nd4jLong));
|
||||
|
||||
// Nd4jLong* newStrides = shape::stride(newShapeInfo);
|
||||
// const Nd4jLong* oldShape = shape::shapeOf(const_cast<Nd4jLong*>(oldShapeInfo));
|
||||
// const Nd4jLong* oldStrides = shape::stride(const_cast<Nd4jLong*>(oldShapeInfo));
|
||||
// int oldStart(0), oldStop(1), newStart(0), newStop(1), newDim, oldDim;
|
||||
|
||||
// Nd4jLong oldStart(0), oldStop(1), newStart(0), newStop(1), newDim, oldDim;
|
||||
|
||||
// while (newStart < newRank && oldStart < oldRank) {
|
||||
|
||||
// newDim = newShape[newStart];
|
||||
// oldDim = oldShape[oldStart];
|
||||
|
||||
// while (newDim != oldDim)
|
||||
// while (newDim != oldDim && newDim > 0 && oldDim > 0)
|
||||
// if (newDim < oldDim) newDim *= newShape[newStop++];
|
||||
// else oldDim *= oldShape[oldStop++];
|
||||
|
||||
// // ------ Check whether the original axes can be combined ------ //
|
||||
// for (int i = oldStart; i < oldStop - 1; i++) {
|
||||
|
||||
// if(oldShape[i] == 1) { // ignore strides like {...,1,1,...}
|
||||
// if(oldOrder == 102) ++oldStart;
|
||||
// for (int step = 1, i = oldStart; i < oldStop - 1; ++i) {
|
||||
// if(oldShape[i] == 1) // skip unity-dimension and its stride
|
||||
// continue;
|
||||
// }
|
||||
|
||||
// if(oldOrder == 102 && oldStrides[i + 1] != oldShape[i] * oldStrides[i])
|
||||
// return false; // not contiguous enough
|
||||
// if(oldOrder == 99 && oldStrides[i] != oldShape[i + 1] * oldStrides[i + 1])
|
||||
// while((i + step) < oldRank && oldShape[i + step] == 1)
|
||||
// ++step; // skip following unity-dimensions and its strides if such are present
|
||||
// if((i + step) < oldRank && oldStrides[i] != oldShape[i + step] * oldStrides[i + step])
|
||||
// return false; // not contiguous enough
|
||||
// }
|
||||
|
||||
// // ------ Calculate new strides for all axes currently worked with ------ //
|
||||
// if(isFOrder) {
|
||||
// newStrides[newStart] = oldStrides[oldStart];
|
||||
// for (int i = newStart + 1; i < newStop; ++i)
|
||||
// newStrides[i] = newStrides[i - 1] * newShape[i - 1];
|
||||
// }
|
||||
// else {
|
||||
// newStrides[newStop - 1] = oldStrides[oldStop - 1];
|
||||
// for (int i = newStop - 1; i > newStart; --i)
|
||||
// newStrides[i - 1] = newStrides[i] * newShape[i];
|
||||
// }
|
||||
|
||||
// newStart = newStop++;
|
||||
// oldStart = oldStop++;
|
||||
// }
|
||||
|
||||
// // rest of strides should be unities (if there is remainder in strides space, that is newStart < newRank)
|
||||
// for (int i = newStart; i < newRank; ++i)
|
||||
// newStrides[i] = 1;
|
||||
|
||||
// newShapeInfo[2 * newRank + 3] = shape::order(oldShapeInfo); // order
|
||||
// newShapeInfo[2 * newRank + 2] = shape::elementWiseStride(oldShapeInfo); // ews
|
||||
// newShapeInfo[2 * newRank + 1] = shape::type(oldShapeInfo); // type
|
||||
|
@ -8801,6 +8826,8 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
// return true;
|
||||
// }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
// this function checks the consistence of dimensions with array rank (negative dimensions, too large dimensions, too big number of dimensions)
|
||||
|
@ -8841,9 +8868,198 @@ public static final int PREALLOC_SIZE = 33554432;
|
|||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// INLINEDEF _CUDA_HD void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const Nd4jLong* zShapeInfo, Nd4jLong*& zOffsets, const char order) {
|
||||
|
||||
// // we assume all array have same length
|
||||
// const Nd4jLong len = shape::length(xShapeInfo);
|
||||
|
||||
// const Nd4jLong xEws = shape::elementWiseStride(xShapeInfo);
|
||||
// const Nd4jLong yEws = shape::elementWiseStride(yShapeInfo);
|
||||
// const Nd4jLong zEws = shape::elementWiseStride(zShapeInfo);
|
||||
|
||||
// const char xOrder = shape::order(xShapeInfo);
|
||||
// const char yOrder = shape::order(yShapeInfo);
|
||||
// const char zOrder = shape::order(zShapeInfo);
|
||||
|
||||
// const bool shapesSame = shape::shapeEquals(xShapeInfo, yShapeInfo, zShapeInfo);
|
||||
|
||||
// if (xEws == 1 && yEws == 1 && zEws == 1 && xOrder == yOrder && xOrder == zOrder && (xOrder == 'c' || shapesSame)) {
|
||||
// xOffsets = yOffsets = zOffsets = nullptr;
|
||||
// }
|
||||
// else if(xEws == 1 && yEws == 1 && xOrder == yOrder && (xOrder == 'c' || shape::shapeEquals(xShapeInfo, yShapeInfo))) {
|
||||
// xOffsets = yOffsets = nullptr;
|
||||
// zOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(zShapeInfo, zOffsets, xOrder);
|
||||
// }
|
||||
// else if(xEws == 1 && zEws == 1 && xOrder == zOrder && (xOrder == 'c' || shape::shapeEquals(xShapeInfo, zShapeInfo))) {
|
||||
// xOffsets = zOffsets = nullptr;
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
|
||||
// }
|
||||
// else if(yEws == 1 && zEws == 1 && yOrder == zOrder && (yOrder == 'c' || shape::shapeEquals(yShapeInfo, zShapeInfo))) {
|
||||
// yOffsets = zOffsets = nullptr;
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
|
||||
// }
|
||||
// else if(xEws == 1) {
|
||||
// xOffsets = nullptr;
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// zOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(zShapeInfo, zOffsets, xOrder);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else if(yEws == 1) {
|
||||
// yOffsets = nullptr;
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// zOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(zShapeInfo, zOffsets, yOrder);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else if(zEws == 1) {
|
||||
// zOffsets = nullptr;
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets, zOrder);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets, zOrder);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo, zShapeInfo)) {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// yOffsets = zOffsets = xOffsets;
|
||||
// }
|
||||
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) {
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// zOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(zShapeInfo, zOffsets);
|
||||
// }
|
||||
// }
|
||||
// yOffsets = xOffsets;
|
||||
// }
|
||||
// else if(shape::haveSameShapeAndStrides(xShapeInfo, zShapeInfo)) {
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets);
|
||||
// }
|
||||
// }
|
||||
// zOffsets = xOffsets;
|
||||
// }
|
||||
// else {
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// zOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(zShapeInfo, zOffsets);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// INLINEDEF _CUDA_HD void calcOffsets(const Nd4jLong *xShapeInfo, Nd4jLong*& xOffsets, const Nd4jLong *yShapeInfo, Nd4jLong*& yOffsets, const char order) {
|
||||
|
||||
// // we assume all array have same length
|
||||
// const Nd4jLong len = shape::length(xShapeInfo);
|
||||
|
||||
// const Nd4jLong xEws = shape::elementWiseStride(xShapeInfo);
|
||||
// const Nd4jLong yEws = shape::elementWiseStride(yShapeInfo);
|
||||
|
||||
// const char xOrder = shape::order(xShapeInfo);
|
||||
// const char yOrder = shape::order(yShapeInfo);
|
||||
|
||||
// const bool shapesSame = shape::shapeEquals(xShapeInfo, yShapeInfo);
|
||||
|
||||
// if (xEws == 1 && yEws == 1 && xOrder == yOrder && (xOrder == 'c' || shapesSame)) {
|
||||
// xOffsets = yOffsets = nullptr;
|
||||
// }
|
||||
// else if(xEws == 1) {
|
||||
// xOffsets = nullptr;
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets, xOrder);
|
||||
// }
|
||||
// else if(yEws == 1) {
|
||||
// yOffsets = nullptr;
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets, yOrder);
|
||||
// }
|
||||
// else if(shape::haveSameShapeAndStrides(xShapeInfo, yShapeInfo)) {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// yOffsets = xOffsets;
|
||||
// }
|
||||
// else {
|
||||
// PRAGMA_OMP_PARALLEL_SECTIONS
|
||||
// {
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// xOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(xShapeInfo, xOffsets);
|
||||
// }
|
||||
// PRAGMA_OMP_SECTION
|
||||
// {
|
||||
// yOffsets = new Nd4jLong[len];
|
||||
// shape::calcOffsets(yShapeInfo, yOffsets);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
|
@ -11382,6 +11598,9 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
|
|||
// returns TRUE if this op allows in-place execution
|
||||
public native @Cast("bool") boolean allowsInplace();
|
||||
|
||||
// this method allows you to enable/disable inplace call for a given op
|
||||
public native void allowInplace(@Cast("bool") boolean reallyAllow);
|
||||
|
||||
// this method returns opNum (applicable for legacy XYZ ops only)
|
||||
public native int getOpNum();
|
||||
|
||||
|
@ -22223,6 +22442,21 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
|
|||
private native void allocate();
|
||||
public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
|
||||
}
|
||||
@Namespace("nd4j::ops") public static class tensormmul_bp extends DeclarableCustomOp {
|
||||
static { Loader.load(); }
|
||||
/** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
|
||||
public tensormmul_bp(Pointer p) { super(p); }
|
||||
/** Native array allocator. Access with {@link Pointer#position(long)}. */
|
||||
public tensormmul_bp(long size) { super((Pointer)null); allocateArray(size); }
|
||||
private native void allocateArray(long size);
|
||||
@Override public tensormmul_bp position(long position) {
|
||||
return (tensormmul_bp)super.position(position);
|
||||
}
|
||||
|
||||
public tensormmul_bp() { super((Pointer)null); allocate(); }
|
||||
private native void allocate();
|
||||
public native ShapeList calculateOutputShape(ShapeList inputShape, @ByRef Context block);
|
||||
}
|
||||
// #endif
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue