[WIP] CUDA Java side (#58)

* one crashing test

Signed-off-by: raver119 <raver119@gmail.com>

* stupid issue fixed

Signed-off-by: raver119 <raver119@gmail.com>

* one fix

Signed-off-by: raver119 <raver119@gmail.com>

* dont ensure location for empty arrays

Signed-off-by: raver119 <raver119@gmail.com>

* few more signatures fixed

Signed-off-by: raver119 <raver119@gmail.com>

* few tweaks for DataBuffer creation from java primitives

Signed-off-by: raver119 <raver119@gmail.com>

* get rid of legacy im2col/col2im intercept

Signed-off-by: raver119 <raver119@gmail.com>

* rsubi scalar array fix

Signed-off-by: raver119 <raver119@gmail.com>
master
raver119 2019-07-12 20:39:54 +03:00 committed by AlexDBlack
parent 68b82f3856
commit 6ce458e949
15 changed files with 154 additions and 169 deletions

View File

@ -37,7 +37,6 @@ void DataBuffer::allocateSpecial() {
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
void DataBuffer::syncToPrimary(const LaunchContext* context, const bool forceSync) { void DataBuffer::syncToPrimary(const LaunchContext* context, const bool forceSync) {
if(isPrimaryActual() && !forceSync) if(isPrimaryActual() && !forceSync)
return; return;

View File

@ -93,8 +93,12 @@ namespace helpers {
if (comp != nullptr) if (comp != nullptr)
comp->syncToHost(); comp->syncToHost();
output->syncToHost(); if (output != nullptr)
numResult->syncToHost(); output->syncToHost();
if (numResult != nullptr)
numResult->syncToHost();
compScalar.syncToHost(); compScalar.syncToHost();
BUILD_SINGLE_SELECTOR(arg->dataType(), return processCondition_, (mode, arg, comp, output, numResult, compScalar), FLOAT_TYPES); BUILD_SINGLE_SELECTOR(arg->dataType(), return processCondition_, (mode, arg, comp, output, numResult, compScalar), FLOAT_TYPES);
@ -104,8 +108,12 @@ namespace helpers {
if (comp != nullptr) if (comp != nullptr)
comp->syncToDevice(); comp->syncToDevice();
output->syncToDevice(); if (output != nullptr)
numResult->syncToDevice(); output->syncToDevice();
if (numResult != nullptr)
numResult->syncToDevice();
compScalar.syncToDevice(); compScalar.syncToDevice();
} }

View File

@ -0,0 +1,62 @@
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
//
// @author raver119@gmail.com
//
#include "testlayers.h"
#include <ops/declarable/CustomOperations.h>
#include <NDArray.h>
#include <ops/ops.h>
#include <GradCheck.h>
using namespace nd4j;
class DeclarableOpsTestsCuda1 : public testing::Test {
public:
DeclarableOpsTestsCuda1() {
printf("\n");
fflush(stdout);
}
};
TEST_F(DeclarableOpsTestsCuda1, Test_CHOOSE_SCALAR_LARGE) {
double inputData[150] = {
0, 0.51, 0.68, 0.69, 0.86, 0.91, 0.96, 0.97, 0.97, 1.03, 1.13, 1.16, 1.16, 1.17, 1.19, 1.25, 1.25, 1.26, 1.27, 1.28, 1.29, 1.29, 1.29, 1.30, 1.31, 1.32, 1.33, 1.33, 1.35, 1.35, 1.36, 1.37, 1.38, 1.40, 1.41, 1.42, 1.43, 1.44, 1.44, 1.45, 1.45, 1.47, 1.47, 1.51, 1.51, 1.51, 1.52, 1.53, 1.56, 1.57, 1.58, 1.59, 1.61, 1.62, 1.63, 1.63, 1.64, 1.64, 1.66, 1.66, 1.67, 1.67, 1.70, 1.70, 1.70, 1.72, 1.72, 1.72, 1.72, 1.73, 1.74, 1.74, 1.76, 1.76, 1.77, 1.77, 1.80, 1.80, 1.81, 1.82, 1.83, 1.83, 1.84, 1.84, 1.84, 1.85, 1.85, 1.85, 1.86, 1.86, 1.87, 1.88, 1.89, 1.89, 1.89, 1.89, 1.89, 1.91, 1.91, 1.91, 1.92, 1.94, 1.95, 1.97, 1.98, 1.98, 1.98, 1.98, 1.98, 1.99, 2, 2, 2.01, 2.01, 2.02, 2.03, 2.03, 2.03, 2.04, 2.04, 2.05, 2.06, 2.07, 2.08, 2.08, 2.08, 2.08, 2.09, 2.09, 2.10, 2.10, 2.11, 2.11, 2.11, 2.12, 2.12, 2.13, 2.13, 2.14, 2.14, 2.14, 2.14, 2.15, 2.15, 2.16, 2.16, 2.16, 2.16, 2.16, 2.17
};
auto precursor = NDArrayFactory::create<double>(inputData,'c',{1,149});
NDArray x(nullptr, precursor.specialBuffer(), precursor.shapeInfo());
nd4j::ops::choose op;
//greater than test
auto result = op.execute({&x}, {0.0},{3});
ASSERT_EQ(Status::OK(), result->status());
auto z = result->at(1);
ASSERT_EQ(148,z->e<double>(0));
//ASSERT_TRUE(exp.isSameShape(z));
delete result;
}

View File

@ -4003,7 +4003,7 @@ public abstract class BaseNDArray implements INDArray, Iterable {
public INDArray rsubi(INDArray other, INDArray result) { public INDArray rsubi(INDArray other, INDArray result) {
validateNumericalArray("rsubi", false); validateNumericalArray("rsubi", false);
if (other.isScalar()) { if (other.isScalar()) {
return this.addi(other.getDouble(0), result); return this.rsubi(other.getDouble(0), result);
} }
if (isScalar()) { if (isScalar()) {

View File

@ -379,7 +379,11 @@ public class CudaAffinityManager extends BasicAffinityManager {
@Override @Override
public void ensureLocation(INDArray array, Location location) { public void ensureLocation(INDArray array, Location location) {
AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array); // to location to ensure for empty array
if (array.isEmpty())
return;
val point = AtomicAllocator.getInstance().getAllocationPoint(array);
switch (location) { switch (location) {
case HOST: { case HOST: {
AtomicAllocator.getInstance().synchronizeHostData(array); AtomicAllocator.getInstance().synchronizeHostData(array);
@ -399,7 +403,10 @@ public class CudaAffinityManager extends BasicAffinityManager {
@Override @Override
public Location getActiveLocation(INDArray array) { public Location getActiveLocation(INDArray array) {
AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array); if (array.isEmpty())
return Location.EVERYWHERE;
val point = AtomicAllocator.getInstance().getAllocationPoint(array);
if (point.isActualOnDeviceSide() && point.isActualOnHostSide()) { if (point.isActualOnDeviceSide() && point.isActualOnHostSide()) {
return Location.EVERYWHERE; return Location.EVERYWHERE;

View File

@ -18,6 +18,7 @@ package org.nd4j.jita.flow.impl;
import lombok.Getter; import lombok.Getter;
import lombok.NonNull;
import lombok.val; import lombok.val;
import org.bytedeco.javacpp.DoublePointer; import org.bytedeco.javacpp.DoublePointer;
import org.nd4j.jita.allocator.Allocator; import org.nd4j.jita.allocator.Allocator;
@ -95,7 +96,7 @@ public class SynchronousFlowController implements FlowController {
} }
@Override @Override
public void synchronizeToDevice(AllocationPoint point) { public void synchronizeToDevice(@NonNull AllocationPoint point) {
if (point.isConstant()) if (point.isConstant())
return; return;

View File

@ -567,6 +567,14 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda
return allocationPoint.getPointers().getHostPointer().address(); return allocationPoint.getPointers().getHostPointer().address();
} }
@Override
public Pointer pointer() {
// FIXME: very bad thing,
lazyAllocateHostPointer();
return super.pointer();
}
/** /**
* *
@ -672,7 +680,6 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda
} }
} }
public void set(long[] data, long length, long srcOffset, long dstOffset) { public void set(long[] data, long length, long srcOffset, long dstOffset) {
// TODO: make sure getPointer returns proper pointer // TODO: make sure getPointer returns proper pointer
@ -698,11 +705,14 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda
} }
break; break;
case UBYTE: { case UBYTE: {
data = ArrayUtil.cutBelowZero(data);
for (int e = 0; e < data.length; e++) { for (int e = 0; e < data.length; e++) {
put(e, data[e]); put(e, data[e]);
} }
} }
break; break;
case UINT16:
data = ArrayUtil.cutBelowZero(data);
case SHORT: { case SHORT: {
val pointer = new ShortPointer(ArrayUtil.toShorts(data)); val pointer = new ShortPointer(ArrayUtil.toShorts(data));
val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize)); val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
@ -714,6 +724,7 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda
} }
break; break;
case UINT32: case UINT32:
data = ArrayUtil.cutBelowZero(data);
case INT: { case INT: {
val pointer = new IntPointer(ArrayUtil.toInts(data)); val pointer = new IntPointer(ArrayUtil.toInts(data));
val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize)); val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
@ -725,6 +736,7 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda
} }
break; break;
case UINT64: case UINT64:
data = ArrayUtil.cutBelowZero(data);
case LONG: { case LONG: {
val pointer = new LongPointer(data); val pointer = new LongPointer(data);
val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize)); val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));

View File

@ -187,6 +187,11 @@ public class CudaBfloat16DataBuffer extends BaseCudaDataBuffer {
setData(ArrayUtil.toShorts(data)); setData(ArrayUtil.toShorts(data));
} }
@Override
public void setData(long[] data) {
setData(ArrayUtil.toShorts(data));
}
@Override @Override

View File

@ -414,6 +414,8 @@ public class CudaDataBufferFactory implements DataBufferFactory {
return new CudaFloatDataBuffer(length, initialize, workspace); return new CudaFloatDataBuffer(length, initialize, workspace);
case HALF: case HALF:
return new CudaHalfDataBuffer(length, initialize, workspace); return new CudaHalfDataBuffer(length, initialize, workspace);
case BFLOAT16:
return new CudaBfloat16DataBuffer(length, initialize, workspace);
case BOOL: case BOOL:
return new CudaBoolDataBuffer(length, initialize, workspace); return new CudaBoolDataBuffer(length, initialize, workspace);
default: default:

View File

@ -61,6 +61,7 @@ import org.nd4j.linalg.exception.ND4JIllegalArgumentException;
import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.exception.ND4JIllegalStateException;
import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.jcublas.buffer.AddressRetriever; import org.nd4j.linalg.jcublas.buffer.AddressRetriever;
import org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer;
import org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer; import org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer;
import org.nd4j.linalg.jcublas.context.CudaContext; import org.nd4j.linalg.jcublas.context.CudaContext;
import org.nd4j.linalg.primitives.AtomicBoolean; import org.nd4j.linalg.primitives.AtomicBoolean;
@ -1495,7 +1496,8 @@ public class CudaExecutioner extends DefaultOpExecutioner {
@Override @Override
public <T extends Aggregate> void exec(Batch<T> batch) { public <T extends Aggregate> void exec(Batch<T> batch) {
DataBuffer surfaceBuffer = getBuffer(batch); val surfaceBuffer = (BaseCudaDataBuffer) getBuffer(batch);
surfaceBuffer.lazyAllocateHostPointer();
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
@ -2238,152 +2240,6 @@ public class CudaExecutioner extends DefaultOpExecutioner {
} }
} }
if (op.opName().equalsIgnoreCase("im2col")) {
val xArr = op.inputArguments()[0];
val zArr = op.outputArguments()[0];
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(zArr, xArr);
if (extraz.get() == null)
extraz.set(new PointerPointer(32));
PointerPointer xShapeHost =
extraz.get().put(AddressRetriever.retrieveHostPointer(xArr.shapeInfoDataBuffer()), // 0
context.getOldStream(), // 1
AtomicAllocator.getInstance().getDeviceIdPointer(), // 2
context.getBufferAllocation(), // 3
context.getBufferReduction(), // 4
context.getBufferScalar(), // 5
context.getBufferSpecial(),
null,
AddressRetriever.retrieveHostPointer(zArr.shapeInfoDataBuffer())
);
val x = AtomicAllocator.getInstance().getPointer(xArr, context);
val z = AtomicAllocator.getInstance().getPointer(zArr, context);
val xShape = AtomicAllocator.getInstance().getPointer(xArr.shapeInfoDataBuffer(), context);
val zShape = AtomicAllocator.getInstance().getPointer(zArr.shapeInfoDataBuffer(), context);
val hxShape = AtomicAllocator.getInstance().getHostPointer(xArr.shapeInfoDataBuffer());
val hzShape = AtomicAllocator.getInstance().getHostPointer(zArr.shapeInfoDataBuffer());
double zeroPad = 0.0;
if(op.tArgs() != null && op.tArgs().length > 0){
zeroPad = op.tArgs()[0];
}
val extrass = new double[]{op.iArgs()[0], op.iArgs()[1], op.iArgs()[2], op.iArgs()[3], op.iArgs()[4], op.iArgs()[5], op.iArgs()[6], op.iArgs()[7], op.iArgs()[8], zeroPad};
val extraArgsBuff = Nd4j.getConstantHandler().getConstantBuffer(extrass, xArr.dataType());
val extraArgs = AtomicAllocator.getInstance().getPointer(extraArgsBuff, context);
nativeOps.execTransformSame(xShapeHost, 9,
null, (LongPointer) hxShape, x, (LongPointer) xShape,
null, (LongPointer) hzShape, z, (LongPointer) zShape, extraArgs);
//AtomicAllocator.getInstance().getAllocationPoint(zArr).tickDeviceWrite();
AtomicAllocator.getInstance().getFlowController().registerAction(context, zArr, xArr);
Nd4j.getExecutioner().commit();
return op.outputArguments();
} else if (op.opName().equalsIgnoreCase("col2im")) {
val dtype = Nd4j.dataType();
val xArr = op.inputArguments()[0];
val zArr = op.outputArguments()[0];
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(zArr, xArr);
if (extraz.get() == null)
extraz.set(new PointerPointer(32));
PointerPointer xShapeHost =
extraz.get().put(AddressRetriever.retrieveHostPointer(xArr.shapeInfoDataBuffer()), // 0
context.getOldStream(), // 1
AtomicAllocator.getInstance().getDeviceIdPointer(), // 2
context.getBufferAllocation(), // 3
context.getBufferReduction(), // 4
context.getBufferScalar(), // 5
context.getBufferSpecial(),
null,
AddressRetriever.retrieveHostPointer(zArr.shapeInfoDataBuffer())
);
val x = AtomicAllocator.getInstance().getPointer(xArr, context);
val z = AtomicAllocator.getInstance().getPointer(zArr, context);
val xShape = AtomicAllocator.getInstance().getPointer(xArr.shapeInfoDataBuffer(), context);
val zShape = AtomicAllocator.getInstance().getPointer(zArr.shapeInfoDataBuffer(), context);
val hxShape = AtomicAllocator.getInstance().getHostPointer(xArr.shapeInfoDataBuffer());
val hzShape = AtomicAllocator.getInstance().getHostPointer(zArr.shapeInfoDataBuffer());
val extrass = new double[]{op.iArgs()[0], op.iArgs()[1], op.iArgs()[2], op.iArgs()[3], op.iArgs()[4], op.iArgs()[5], op.iArgs()[6], op.iArgs()[7]};
val extraArgsBuff = Nd4j.getConstantHandler().getConstantBuffer(extrass, xArr.dataType());
val extraArgs = AtomicAllocator.getInstance().getPointer(extraArgsBuff, context);
nativeOps.execTransformSame(xShapeHost, 8,
null, (LongPointer) hxShape, x, (LongPointer) xShape,
null, (LongPointer) hzShape, z, (LongPointer) zShape, extraArgs);
//AtomicAllocator.getInstance().getAllocationPoint(zArr).tickDeviceWrite();
AtomicAllocator.getInstance().getFlowController().registerAction(context, zArr, xArr);
//Nd4j.getExecutioner().commit();
return op.outputArguments();
} else if (op.opName().equalsIgnoreCase("pooling2d")) {
val dtype = Nd4j.dataType();
val xArr = op.inputArguments()[0];
val zArr = op.outputArguments()[0];
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(zArr, xArr);
if (extraz.get() == null)
extraz.set(new PointerPointer(32));
PointerPointer xShapeHost =
extraz.get().put(AddressRetriever.retrieveHostPointer(xArr.shapeInfoDataBuffer()), // 0
context.getOldStream(), // 1
AtomicAllocator.getInstance().getDeviceIdPointer(), // 2
context.getBufferAllocation(), // 3
context.getBufferReduction(), // 4
context.getBufferScalar(), // 5
context.getBufferSpecial(),
null,
AddressRetriever.retrieveHostPointer(zArr.shapeInfoDataBuffer())
);
val x = AtomicAllocator.getInstance().getPointer(xArr, context);
val z = AtomicAllocator.getInstance().getPointer(zArr, context);
val xShape = AtomicAllocator.getInstance().getPointer(xArr.shapeInfoDataBuffer(), context);
val zShape = AtomicAllocator.getInstance().getPointer(zArr.shapeInfoDataBuffer(), context);
val hxShape = AtomicAllocator.getInstance().getHostPointer(xArr.shapeInfoDataBuffer());
val hzShape = AtomicAllocator.getInstance().getHostPointer(zArr.shapeInfoDataBuffer());
val extrass = new double[]{op.iArgs()[0], op.iArgs()[1], op.iArgs()[2], op.iArgs()[3], op.iArgs()[4], op.iArgs()[5], op.iArgs()[6], op.iArgs()[7], op.iArgs()[8]};
val extraArgsBuff = Nd4j.getConstantHandler().getConstantBuffer(extrass, zArr.dataType());
val extraArgs = AtomicAllocator.getInstance().getPointer(extraArgsBuff, context);
nativeOps.execTransformFloat(xShapeHost, 23,
null, (LongPointer) hxShape, x, (LongPointer) xShape,
zArr.data().addressPointer(), (LongPointer) hzShape, z, (LongPointer) zShape,
extraArgs);
// AtomicAllocator.getInstance().getAllocationPoint(zArr).tickDeviceWrite();
AtomicAllocator.getInstance().getFlowController().registerAction(context, zArr, xArr);
return op.outputArguments();
}
Nd4j.getExecutioner().commit();
val ctx = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); val ctx = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
val context = (CudaOpContext) buildContext(); val context = (CudaOpContext) buildContext();

View File

@ -15377,7 +15377,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K] * 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K] * 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K] * 3: 2d tensor of previous cell state [bS x K]
* 4: optional, 2d tensor of dropout mask [bS x K] * 4: optional, 2d tensor of dropout mask [bS x K]
* *
@ -15410,7 +15410,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [2K x 6K] * 1: 2d tensor of weights [2K x 6K]
* 2: row of biases with twice length [1 × 4K] * 2: row of biases with twice length [1 x 4K]
* 3: 2d tensor of previous cell state [bS x 2K] * 3: 2d tensor of previous cell state [bS x 2K]
* 4: optional, 2d tensor of dropout mask [bS x 2K] * 4: optional, 2d tensor of dropout mask [bS x 2K]
* *
@ -15444,7 +15444,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [3K x K] * 1: 2d tensor of weights [3K x K]
* 2: row of biases with twice length [1 × 2K] * 2: row of biases with twice length [1 x 2K]
* 3: 2d tensor of previous cell state [bS x K] * 3: 2d tensor of previous cell state [bS x K]
* 4: 3d tensor of cell state [bS x K x N] * 4: 3d tensor of cell state [bS x K x N]
* 5: 2d tensor of cell state gradients [bS x K] * 5: 2d tensor of cell state gradients [bS x K]
@ -15482,7 +15482,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
* Input arrays: * Input arrays:
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features * 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
* 1: 2d tensor of weights [2K x 6K] * 1: 2d tensor of weights [2K x 6K]
* 2: row of biases with twice length [1 × 4K] * 2: row of biases with twice length [1 x 4K]
* 3: 2d tensor of previous cell state [bS x 2K] * 3: 2d tensor of previous cell state [bS x 2K]
* 4: 3d tensor of cell state [N x bS x 2K] * 4: 3d tensor of cell state [N x bS x 2K]
* 5: 2d tensor of cell state gradients [bS x 2K] * 5: 2d tensor of cell state gradients [bS x 2K]
@ -15681,7 +15681,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
* 0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features * 0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features
* 1: previous cell state [batchSize x inSize], that is at previous time step t-1 * 1: previous cell state [batchSize x inSize], that is at previous time step t-1
* 2: weights [inSize x 3*inSize] * 2: weights [inSize x 3*inSize]
* 3: biases [1 × 2*inSize] * 3: biases [1 x 2*inSize]
* *
* Output arrays: * Output arrays:
* 0: current cell output [batchSize x inSize], that is at current time step t * 0: current cell output [batchSize x inSize], that is at current time step t

View File

@ -23,6 +23,7 @@ import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.junit.runners.Parameterized; import org.junit.runners.Parameterized;
import org.nd4j.linalg.BaseNd4jTest; import org.nd4j.linalg.BaseNd4jTest;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.aggregates.impl.AggregateCBOW; import org.nd4j.linalg.api.ops.aggregates.impl.AggregateCBOW;
import org.nd4j.linalg.api.ops.aggregates.impl.AggregateSkipGram; import org.nd4j.linalg.api.ops.aggregates.impl.AggregateSkipGram;
@ -95,17 +96,17 @@ public class HierarchicSoftmaxTests extends BaseNd4jTest {
@Test @Test
public void testSGGradient1() { public void testSGGradient1() {
INDArray syn0 = Nd4j.create(10, 10).assign(0.01f); INDArray syn0 = Nd4j.create(DataType.DOUBLE, 10, 10).assign(0.01f);
INDArray syn1 = Nd4j.create(10, 10).assign(0.02f); INDArray syn1 = Nd4j.create(DataType.DOUBLE,10, 10).assign(0.02f);
INDArray syn1Neg = Nd4j.ones(10, 10).assign(0.03f); INDArray syn1Neg = Nd4j.create(DataType.DOUBLE,10, 10).assign(0.03f);
INDArray expTable = Nd4j.create(10000).assign(0.5f); INDArray expTable = Nd4j.create(DataType.DOUBLE,10000).assign(0.5f);
double lr = 0.001; double lr = 0.001;
int idxSyn0 = 0; int idxSyn0 = 0;
INDArray expSyn0 = Nd4j.create(10).assign(0.01001f); INDArray expSyn0 = Nd4j.create(DataType.DOUBLE,10).assign(0.01001f);
INDArray expSyn1_1 = Nd4j.create(10).assign(0.020005); INDArray expSyn1_1 = Nd4j.create(DataType.DOUBLE,10).assign(0.020005);
INDArray syn0row = syn0.getRow(idxSyn0); INDArray syn0row = syn0.getRow(idxSyn0);

View File

@ -24,6 +24,7 @@ import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.junit.runners.Parameterized; import org.junit.runners.Parameterized;
import org.nd4j.linalg.BaseNd4jTest; import org.nd4j.linalg.BaseNd4jTest;
import org.nd4j.linalg.api.concurrency.AffinityManager;
import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration; import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
import org.nd4j.linalg.api.memory.enums.AllocationPolicy; import org.nd4j.linalg.api.memory.enums.AllocationPolicy;
@ -288,6 +289,8 @@ public class DataBufferTests extends BaseNd4jTest {
continue; continue;
} }
log.info("Testing source [{}]; target: [{}]", sourceType, dt);
for (boolean useWs : new boolean[]{false, true}) { for (boolean useWs : new boolean[]{false, true}) {
try (MemoryWorkspace ws = (useWs ? workspace.notifyScopeEntered() : null)) { try (MemoryWorkspace ws = (useWs ? workspace.notifyScopeEntered() : null)) {
@ -334,7 +337,6 @@ public class DataBufferTests extends BaseNd4jTest {
assertFalse(db2.isAttached()); assertFalse(db2.isAttached());
if(!sourceType.equals("boolean")){ if(!sourceType.equals("boolean")){
log.info("Testing source [{}]; target: [{}]", sourceType, dt);
testDBOps(db1); testDBOps(db1);
testDBOps(db2); testDBOps(db2);
} }
@ -375,6 +377,8 @@ public class DataBufferTests extends BaseNd4jTest {
bb.position(0); bb.position(0);
bb.put(b); bb.put(b);
Nd4j.getAffinityManager().tagLocation(arr2, AffinityManager.Location.HOST);
assertEquals(arr.toString(), arr2.toString()); assertEquals(arr.toString(), arr2.toString());
assertEquals(arr, arr2); assertEquals(arr, arr2);

View File

@ -44,6 +44,7 @@ import static org.junit.Assert.*;
/** /**
* @author raver119@gmail.com * @author raver119@gmail.com
*/ */
@Ignore
@Slf4j @Slf4j
@RunWith(Parameterized.class) @RunWith(Parameterized.class)
public class CompressionTests extends BaseNd4jTest { public class CompressionTests extends BaseNd4jTest {

View File

@ -1133,6 +1133,33 @@ public class ArrayUtil {
return ret; return ret;
} }
public static int[] cutBelowZero(int[] data) {
val ret = new int[data.length];
for (int i = 0; i < data.length; i++)
ret[i] = data[i] < 0 ? 0 : data[i];
return ret;
}
public static long[] cutBelowZero(long[] data) {
val ret = new long[data.length];
for (int i = 0; i < data.length; i++)
ret[i] = data[i] < 0 ? 0 : data[i];
return ret;
}
public static short[] cutBelowZero(short[] data) {
val ret = new short[data.length];
for (int i = 0; i < data.length; i++)
ret[i] = data[i] < 0 ? 0 : data[i];
return ret;
}
public static byte[] cutBelowZero(byte[] data) {
val ret = new byte[data.length];
for (int i = 0; i < data.length; i++)
ret[i] = data[i] < 0 ? 0 : data[i];
return ret;
}
/** /**
* Return a copy of this array with the * Return a copy of this array with the