[WIP] Minor fixes (#140)
* - Tile java shape fn removed - Tile 0 validation added - scatter_upd test Signed-off-by: raver119 <raver119@gmail.com> * additional tile validation Signed-off-by: raver119 <raver119@gmail.com> * - provide vector case in cuda scatter op Signed-off-by: Yurii <yurii@skymind.io> * cpu ismax view fix Signed-off-by: raver119 <raver119@gmail.com> * exp Signed-off-by: raver119 <raver119@gmail.com> * cuda ismax fix Signed-off-by: raver119 <raver119@gmail.com>master
parent
a5867bb527
commit
d9ab299759
|
@ -231,7 +231,8 @@ void* NDArray::getSpecialBuffer() const {
|
|||
// change an array by repeating it the number of times given by reps.
|
||||
NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
|
||||
const int repsSize = reps.size();
|
||||
int product = 1;
|
||||
|
||||
Nd4jLong product = 1;
|
||||
for(const auto& item : reps)
|
||||
product *= item;
|
||||
if(product == 0)
|
||||
|
@ -286,6 +287,10 @@ NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
|
|||
// change an array by repeating it the number of times given by reps.
|
||||
void NDArray::tile(const std::vector<Nd4jLong>& reps, NDArray& target) const {
|
||||
|
||||
auto repProd = shape::prodLong(reps.data(), reps.size());
|
||||
if (repProd < 1)
|
||||
throw std::runtime_error("NDArray::tile: reps can't contain 0s");
|
||||
|
||||
// evaluate true tile shapeInfo for comparison with target shapeInfo
|
||||
auto newShapeInfo = ShapeUtils::evalTileShapeInfo(*this, reps, getContext()->getWorkspace());
|
||||
if(!shape::equalsSoft(newShapeInfo, target.getShapeInfo())) {
|
||||
|
|
|
@ -312,7 +312,8 @@ NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
|
|||
Nd4jLong product = 1;
|
||||
for(const auto& item : reps)
|
||||
product *= item;
|
||||
if(product == 0)
|
||||
|
||||
if(product < 1)
|
||||
throw std::runtime_error("NDArray::tile method: one of the elements in reps array is zero !");
|
||||
|
||||
int rankOld = rankOf();
|
||||
|
@ -351,6 +352,10 @@ NDArray NDArray::tile(const std::vector<Nd4jLong>& reps) const {
|
|||
// change an array by repeating it the number of times given by reps.
|
||||
void NDArray::tile(const std::vector<Nd4jLong>& reps, NDArray& target) const {
|
||||
|
||||
auto repProd = shape::prodLong(reps.data(), reps.size());
|
||||
if (repProd < 1)
|
||||
throw std::runtime_error("NDArray::tile: reps can't contain 0s");
|
||||
|
||||
// evaluate true tile shapeInfo for comparison with target shapeInfo
|
||||
auto newShapeInfo = ShapeUtils::evalTileShapeInfo(*this, reps, getContext()->getWorkspace());
|
||||
if(!shape::equalsSoft(newShapeInfo, target.getShapeInfo())) {
|
||||
|
|
|
@ -48,18 +48,16 @@ namespace nd4j {
|
|||
|
||||
for (int r = blockIdx.x; r < numTads; r += gridDim.x) {
|
||||
auto tadOffsetForBlock = tadOffsets[r];
|
||||
|
||||
int highestElement = (int) dX[r];
|
||||
auto highestElement = dX[r];
|
||||
|
||||
if (dimensionLength > 1 || tadEWS < 1) {
|
||||
|
||||
for (int e = threadIdx.x; e < tadLength; e += blockDim.x) {
|
||||
|
||||
for (Nd4jLong e = threadIdx.x; e < tadLength; e += blockDim.x) {
|
||||
auto xOffset = tadOffsetForBlock + shape::getIndexOffset(e, tadOnlyShapeInfo, tadLength);
|
||||
dZ[xOffset] = (e == highestElement ? (T) 1 : (T) 0);
|
||||
}
|
||||
} else {
|
||||
for (int e = threadIdx.x; e < tadLength; e += blockDim.x) {
|
||||
for (Nd4jLong e = threadIdx.x; e < tadLength; e += blockDim.x) {
|
||||
// so, we just set dZ[e] for each TAD. Sure, e should be replaced with
|
||||
auto idx = tadOffsetForBlock + (e * tadEWS);
|
||||
dZ[idx] = (e == highestElement ? (T) 1 : (T) 0);
|
||||
|
|
|
@ -50,6 +50,9 @@ CUSTOM_OP_IMPL(tile, 1, 1, false, 0, -2) {
|
|||
else {
|
||||
REQUIRE_TRUE(false, 0, "TILE op: this op requires repeats vector, either as IArgs or second array with length equal to rank of input array to be tiled !");
|
||||
}
|
||||
|
||||
auto repProd = shape::prodLong(reps.data(), reps.size());
|
||||
REQUIRE_TRUE(repProd > 0, 0, "TILE op: reps can't contain 0s");
|
||||
|
||||
input->tile(reps, *output);
|
||||
|
||||
|
@ -81,7 +84,10 @@ DECLARE_SHAPE_FN(tile) {
|
|||
}
|
||||
else {
|
||||
REQUIRE_TRUE(false, 0, "TILE op: this op requires repeats vector, either as IArgs or second array with length equal to rank of input array to be tiled !");
|
||||
}
|
||||
}
|
||||
|
||||
auto repProd = shape::prodLong(reps.data(), reps.size());
|
||||
REQUIRE_TRUE(repProd > 0, 0, "TILE op: reps can't contain 0s");
|
||||
|
||||
std::vector<Nd4jLong> shape(inRank);
|
||||
for (int e = 0; e < shape::rank(inShape); e++)
|
||||
|
|
|
@ -125,9 +125,12 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
|
|||
//to the back.
|
||||
//permuted version of the input shape info for setting up the tad problem
|
||||
auto tadPack = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), const_cast<int*>(dimensions.data()), dimensionsLength);
|
||||
auto tadPackZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), const_cast<int*>(dimensions.data()), dimensionsLength);
|
||||
|
||||
|
||||
auto tadShapeShapeInfo = tadPack.primaryShapeInfo();
|
||||
auto tadOffsets = tadPack.primaryOffsets();
|
||||
auto zOfsets = tadPackZ.platformOffsets();
|
||||
|
||||
int tadLength = shape::length(tadShapeShapeInfo);
|
||||
int tads = tadPack.numberOfTads();
|
||||
|
@ -137,7 +140,7 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
|
|||
num_threads = nd4j::math::nd4j_min<int>(num_threads, omp_get_max_threads());
|
||||
|
||||
auto tadEWS = shape::elementWiseStride(tadShapeShapeInfo);
|
||||
auto zEWS = tadEWS;
|
||||
auto zEWS = shape::elementWiseStride(tadPackZ.primaryShapeInfo());
|
||||
|
||||
int span = (tads / num_threads) + 8;
|
||||
|
||||
|
@ -151,7 +154,7 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
|
|||
for (int r = start; r < end; r++) {
|
||||
if (tadEWS > 0 && zEWS > 0 && dimensionsLength == 1) {
|
||||
auto rX = const_cast<NDArray*>(input)->bufferAsT<X>() + tadOffsets[r];
|
||||
auto rZ = output->bufferAsT<Z>() + tadOffsets[r];
|
||||
auto rZ = output->bufferAsT<Z>() + zOfsets[r];
|
||||
|
||||
auto maxValue = rX[0];
|
||||
int maxIdx = 0;
|
||||
|
@ -168,7 +171,7 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
|
|||
rZ[i] = maxIdx == i ? (Z) 1 : (Z) 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
else if (tadEWS > 1 && zEWS > 1) {
|
||||
for (int i = 0; i < tadLength; i++) {
|
||||
if (rX[i * tadEWS] > maxValue) {
|
||||
maxIdx = i;
|
||||
|
@ -180,6 +183,20 @@ static void ismax_(const NDArray* input, NDArray* output, const std::vector<int>
|
|||
for (int i = 0; i < tadLength; i++) {
|
||||
rZ[i * zEWS] = maxIdx == i ? (Z) 1 : (Z) 0;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < tadLength; i++) {
|
||||
auto xOffset = shape::getIndexOffset(i, tadShapeShapeInfo, tadLength);
|
||||
if (rX[xOffset] > maxValue) {
|
||||
maxIdx = i;
|
||||
maxValue = rX[xOffset];
|
||||
}
|
||||
}
|
||||
|
||||
PRAGMA_OMP_SIMD
|
||||
for (int i = 0; i < tadLength; i++) {
|
||||
auto zOffset = shape::getIndexOffset(i, tadPackZ.primaryShapeInfo(), tadLength);
|
||||
rZ[zOffset] = maxIdx == i ? (Z) 1 : (Z) 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -62,7 +62,7 @@ static void ismax_(nd4j::LaunchContext * context, const NDArray* input, NDArray*
|
|||
int dimensionLength = dimensions.size();
|
||||
std::vector<int> copy(dimensions);
|
||||
|
||||
auto packZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), copy.data(), copy.size());
|
||||
auto packZ = nd4j::ConstantTadHelper::getInstance()->tadForDimensions(output->getShapeInfo(), copy.data(), copy.size());
|
||||
|
||||
auto indexMaxArr = input->applyIndexReduce(indexreduce::IndexMax, dimensions);
|
||||
|
||||
|
|
|
@ -183,7 +183,7 @@ __global__ static void scatterLockCuda(const int opCode,
|
|||
|
||||
__shared__ bool vectorCase;
|
||||
if(threadIdx.x == 0)
|
||||
vectorCase = yTadLen == xLen && shape::rank(xShapeInfo) == 1;
|
||||
vectorCase = yTadLen == xLen && shape::rank(xShapeInfo) <= 1;
|
||||
__syncthreads();
|
||||
|
||||
for (int e = 0; e < xLen; e++) {
|
||||
|
|
|
@ -52,3 +52,20 @@ TEST_F(DeclarableOpsTests16, test_repeat_119) {
|
|||
|
||||
delete result;
|
||||
}
|
||||
|
||||
TEST_F(DeclarableOpsTests16, test_scatter_update_119) {
|
||||
auto x = NDArrayFactory::create<float>('c', {3}, {1, 1, 1});
|
||||
auto y = NDArrayFactory::create<int>(0);
|
||||
auto w = NDArrayFactory::create<float>(3.0f);
|
||||
auto e = NDArrayFactory::create<float>('c', {3}, {3.f, 1.f, 1.f});
|
||||
|
||||
nd4j::ops::scatter_upd op;
|
||||
auto result = op.execute({&x, &y, &w}, {}, {});
|
||||
ASSERT_EQ(Status::OK(), result->status());
|
||||
|
||||
auto z = result->at(0);
|
||||
|
||||
ASSERT_EQ(e, *z);
|
||||
|
||||
delete result;
|
||||
}
|
|
@ -1161,6 +1161,34 @@ TEST_F(JavaInteropTests, test_bfloat16_rng) {
|
|||
ASSERT_TRUE(z.sumNumber().e<float>(0) > 0);
|
||||
}
|
||||
|
||||
TEST_F(JavaInteropTests, test_ismax_view) {
|
||||
auto original = NDArrayFactory::create<double>('c', {2, 3, 40});
|
||||
auto v = original.subarray({NDIndex::all(), NDIndex::all(), NDIndex::interval(0, 40, 2)});
|
||||
v->assign(1.0);
|
||||
|
||||
auto e = v->ulike();
|
||||
auto t = e.tensorAlongDimension(0, {0, 1});
|
||||
t->assign(1.0);
|
||||
|
||||
auto z = v->ulike();
|
||||
|
||||
|
||||
Nd4jLong iArgs[] = {2L, 0L};
|
||||
Context ctx(1);
|
||||
ctx.setInputArray(0, v->buffer(), v->shapeInfo(), v->specialBuffer(), v->specialShapeInfo());
|
||||
ctx.setOutputArray(0, z.buffer(), z.shapeInfo(), z.specialBuffer(), z.specialShapeInfo());
|
||||
ctx.setIArguments(iArgs, 1);
|
||||
|
||||
nd4j::ops::ismax op;
|
||||
op.execute(&ctx);
|
||||
|
||||
z.printIndexedBuffer("z");
|
||||
ASSERT_EQ(e, z);
|
||||
|
||||
delete v;
|
||||
delete t;
|
||||
}
|
||||
|
||||
/*
|
||||
TEST_F(JavaInteropTests, Test_Results_Conversion_1) {
|
||||
auto pl = nd4j::graph::readFlatBuffers("./resources/gru_dynamic_mnist.fb");
|
||||
|
|
|
@ -103,38 +103,6 @@ public class Tile extends DynamicCustomOp {
|
|||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<LongShapeDescriptor> calculateOutputShape() {
|
||||
if(inputArguments.size() == 0)
|
||||
return Collections.emptyList();
|
||||
|
||||
/**
|
||||
* This op is special case: we can't infer its shape before both inputs are available.
|
||||
* So if reps argument is full of 0.0s - we skip shape inference
|
||||
*
|
||||
* And during actual op invocation both inputs should be available due to topo sort
|
||||
*/
|
||||
if (is_static_reps)
|
||||
return Nd4j.getExecutioner().calculateOutputShape(this);
|
||||
|
||||
if (inputArguments().length < 2)
|
||||
return Collections.emptyList();
|
||||
|
||||
val array = inputArguments()[1];
|
||||
|
||||
// FIXME: int cast
|
||||
val reps = new long[(int) array.length()];
|
||||
|
||||
for (int e = 0; e < reps.length; e++)
|
||||
reps[e] = (int) array.getDouble(e);
|
||||
|
||||
if (ArrayUtil.prodLong(reps) == 0)
|
||||
return Collections.emptyList();
|
||||
else
|
||||
return Nd4j.getExecutioner().calculateOutputShape(this);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String opName() {
|
||||
return "tile";
|
||||
|
|
Loading…
Reference in New Issue