[WIP] CUDA Java side (#58)
* one crashing test Signed-off-by: raver119 <raver119@gmail.com> * stupid issue fixed Signed-off-by: raver119 <raver119@gmail.com> * one fix Signed-off-by: raver119 <raver119@gmail.com> * dont ensure location for empty arrays Signed-off-by: raver119 <raver119@gmail.com> * few more signatures fixed Signed-off-by: raver119 <raver119@gmail.com> * few tweaks for DataBuffer creation from java primitives Signed-off-by: raver119 <raver119@gmail.com> * get rid of legacy im2col/col2im intercept Signed-off-by: raver119 <raver119@gmail.com> * rsubi scalar array fix Signed-off-by: raver119 <raver119@gmail.com>master
parent
68b82f3856
commit
6ce458e949
|
@ -37,7 +37,6 @@ void DataBuffer::allocateSpecial() {
|
|||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
void DataBuffer::syncToPrimary(const LaunchContext* context, const bool forceSync) {
|
||||
|
||||
if(isPrimaryActual() && !forceSync)
|
||||
return;
|
||||
|
||||
|
|
|
@ -93,8 +93,12 @@ namespace helpers {
|
|||
if (comp != nullptr)
|
||||
comp->syncToHost();
|
||||
|
||||
if (output != nullptr)
|
||||
output->syncToHost();
|
||||
|
||||
if (numResult != nullptr)
|
||||
numResult->syncToHost();
|
||||
|
||||
compScalar.syncToHost();
|
||||
|
||||
BUILD_SINGLE_SELECTOR(arg->dataType(), return processCondition_, (mode, arg, comp, output, numResult, compScalar), FLOAT_TYPES);
|
||||
|
@ -104,8 +108,12 @@ namespace helpers {
|
|||
if (comp != nullptr)
|
||||
comp->syncToDevice();
|
||||
|
||||
if (output != nullptr)
|
||||
output->syncToDevice();
|
||||
|
||||
if (numResult != nullptr)
|
||||
numResult->syncToDevice();
|
||||
|
||||
compScalar.syncToDevice();
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2015-2018 Skymind, Inc.
|
||||
*
|
||||
* This program and the accompanying materials are made available under the
|
||||
* terms of the Apache License, Version 2.0 which is available at
|
||||
* https://www.apache.org/licenses/LICENSE-2.0.
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
******************************************************************************/
|
||||
|
||||
|
||||
//
|
||||
// @author raver119@gmail.com
|
||||
//
|
||||
|
||||
#include "testlayers.h"
|
||||
#include <ops/declarable/CustomOperations.h>
|
||||
#include <NDArray.h>
|
||||
#include <ops/ops.h>
|
||||
#include <GradCheck.h>
|
||||
|
||||
|
||||
using namespace nd4j;
|
||||
|
||||
|
||||
class DeclarableOpsTestsCuda1 : public testing::Test {
|
||||
public:
|
||||
|
||||
DeclarableOpsTestsCuda1() {
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
TEST_F(DeclarableOpsTestsCuda1, Test_CHOOSE_SCALAR_LARGE) {
|
||||
double inputData[150] = {
|
||||
0, 0.51, 0.68, 0.69, 0.86, 0.91, 0.96, 0.97, 0.97, 1.03, 1.13, 1.16, 1.16, 1.17, 1.19, 1.25, 1.25, 1.26, 1.27, 1.28, 1.29, 1.29, 1.29, 1.30, 1.31, 1.32, 1.33, 1.33, 1.35, 1.35, 1.36, 1.37, 1.38, 1.40, 1.41, 1.42, 1.43, 1.44, 1.44, 1.45, 1.45, 1.47, 1.47, 1.51, 1.51, 1.51, 1.52, 1.53, 1.56, 1.57, 1.58, 1.59, 1.61, 1.62, 1.63, 1.63, 1.64, 1.64, 1.66, 1.66, 1.67, 1.67, 1.70, 1.70, 1.70, 1.72, 1.72, 1.72, 1.72, 1.73, 1.74, 1.74, 1.76, 1.76, 1.77, 1.77, 1.80, 1.80, 1.81, 1.82, 1.83, 1.83, 1.84, 1.84, 1.84, 1.85, 1.85, 1.85, 1.86, 1.86, 1.87, 1.88, 1.89, 1.89, 1.89, 1.89, 1.89, 1.91, 1.91, 1.91, 1.92, 1.94, 1.95, 1.97, 1.98, 1.98, 1.98, 1.98, 1.98, 1.99, 2, 2, 2.01, 2.01, 2.02, 2.03, 2.03, 2.03, 2.04, 2.04, 2.05, 2.06, 2.07, 2.08, 2.08, 2.08, 2.08, 2.09, 2.09, 2.10, 2.10, 2.11, 2.11, 2.11, 2.12, 2.12, 2.13, 2.13, 2.14, 2.14, 2.14, 2.14, 2.15, 2.15, 2.16, 2.16, 2.16, 2.16, 2.16, 2.17
|
||||
};
|
||||
|
||||
auto precursor = NDArrayFactory::create<double>(inputData,'c',{1,149});
|
||||
NDArray x(nullptr, precursor.specialBuffer(), precursor.shapeInfo());
|
||||
|
||||
nd4j::ops::choose op;
|
||||
//greater than test
|
||||
auto result = op.execute({&x}, {0.0},{3});
|
||||
ASSERT_EQ(Status::OK(), result->status());
|
||||
|
||||
auto z = result->at(1);
|
||||
|
||||
ASSERT_EQ(148,z->e<double>(0));
|
||||
//ASSERT_TRUE(exp.isSameShape(z));
|
||||
|
||||
delete result;
|
||||
|
||||
}
|
|
@ -4003,7 +4003,7 @@ public abstract class BaseNDArray implements INDArray, Iterable {
|
|||
public INDArray rsubi(INDArray other, INDArray result) {
|
||||
validateNumericalArray("rsubi", false);
|
||||
if (other.isScalar()) {
|
||||
return this.addi(other.getDouble(0), result);
|
||||
return this.rsubi(other.getDouble(0), result);
|
||||
}
|
||||
|
||||
if (isScalar()) {
|
||||
|
|
|
@ -379,7 +379,11 @@ public class CudaAffinityManager extends BasicAffinityManager {
|
|||
|
||||
@Override
|
||||
public void ensureLocation(INDArray array, Location location) {
|
||||
AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array);
|
||||
// to location to ensure for empty array
|
||||
if (array.isEmpty())
|
||||
return;
|
||||
|
||||
val point = AtomicAllocator.getInstance().getAllocationPoint(array);
|
||||
switch (location) {
|
||||
case HOST: {
|
||||
AtomicAllocator.getInstance().synchronizeHostData(array);
|
||||
|
@ -399,7 +403,10 @@ public class CudaAffinityManager extends BasicAffinityManager {
|
|||
|
||||
@Override
|
||||
public Location getActiveLocation(INDArray array) {
|
||||
AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array);
|
||||
if (array.isEmpty())
|
||||
return Location.EVERYWHERE;
|
||||
|
||||
val point = AtomicAllocator.getInstance().getAllocationPoint(array);
|
||||
|
||||
if (point.isActualOnDeviceSide() && point.isActualOnHostSide()) {
|
||||
return Location.EVERYWHERE;
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.nd4j.jita.flow.impl;
|
|||
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.NonNull;
|
||||
import lombok.val;
|
||||
import org.bytedeco.javacpp.DoublePointer;
|
||||
import org.nd4j.jita.allocator.Allocator;
|
||||
|
@ -95,7 +96,7 @@ public class SynchronousFlowController implements FlowController {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void synchronizeToDevice(AllocationPoint point) {
|
||||
public void synchronizeToDevice(@NonNull AllocationPoint point) {
|
||||
if (point.isConstant())
|
||||
return;
|
||||
|
||||
|
|
|
@ -567,6 +567,14 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda
|
|||
return allocationPoint.getPointers().getHostPointer().address();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pointer pointer() {
|
||||
// FIXME: very bad thing,
|
||||
lazyAllocateHostPointer();
|
||||
|
||||
return super.pointer();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -672,7 +680,6 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
public void set(long[] data, long length, long srcOffset, long dstOffset) {
|
||||
// TODO: make sure getPointer returns proper pointer
|
||||
|
||||
|
@ -698,11 +705,14 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda
|
|||
}
|
||||
break;
|
||||
case UBYTE: {
|
||||
data = ArrayUtil.cutBelowZero(data);
|
||||
for (int e = 0; e < data.length; e++) {
|
||||
put(e, data[e]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case UINT16:
|
||||
data = ArrayUtil.cutBelowZero(data);
|
||||
case SHORT: {
|
||||
val pointer = new ShortPointer(ArrayUtil.toShorts(data));
|
||||
val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
|
||||
|
@ -714,6 +724,7 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda
|
|||
}
|
||||
break;
|
||||
case UINT32:
|
||||
data = ArrayUtil.cutBelowZero(data);
|
||||
case INT: {
|
||||
val pointer = new IntPointer(ArrayUtil.toInts(data));
|
||||
val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
|
||||
|
@ -725,6 +736,7 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda
|
|||
}
|
||||
break;
|
||||
case UINT64:
|
||||
data = ArrayUtil.cutBelowZero(data);
|
||||
case LONG: {
|
||||
val pointer = new LongPointer(data);
|
||||
val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
|
||||
|
|
|
@ -187,6 +187,11 @@ public class CudaBfloat16DataBuffer extends BaseCudaDataBuffer {
|
|||
setData(ArrayUtil.toShorts(data));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setData(long[] data) {
|
||||
setData(ArrayUtil.toShorts(data));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
|
|
|
@ -414,6 +414,8 @@ public class CudaDataBufferFactory implements DataBufferFactory {
|
|||
return new CudaFloatDataBuffer(length, initialize, workspace);
|
||||
case HALF:
|
||||
return new CudaHalfDataBuffer(length, initialize, workspace);
|
||||
case BFLOAT16:
|
||||
return new CudaBfloat16DataBuffer(length, initialize, workspace);
|
||||
case BOOL:
|
||||
return new CudaBoolDataBuffer(length, initialize, workspace);
|
||||
default:
|
||||
|
|
|
@ -61,6 +61,7 @@ import org.nd4j.linalg.exception.ND4JIllegalArgumentException;
|
|||
import org.nd4j.linalg.exception.ND4JIllegalStateException;
|
||||
import org.nd4j.linalg.factory.Nd4j;
|
||||
import org.nd4j.linalg.jcublas.buffer.AddressRetriever;
|
||||
import org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer;
|
||||
import org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer;
|
||||
import org.nd4j.linalg.jcublas.context.CudaContext;
|
||||
import org.nd4j.linalg.primitives.AtomicBoolean;
|
||||
|
@ -1495,7 +1496,8 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
|||
|
||||
@Override
|
||||
public <T extends Aggregate> void exec(Batch<T> batch) {
|
||||
DataBuffer surfaceBuffer = getBuffer(batch);
|
||||
val surfaceBuffer = (BaseCudaDataBuffer) getBuffer(batch);
|
||||
surfaceBuffer.lazyAllocateHostPointer();
|
||||
|
||||
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
|
||||
|
||||
|
@ -2238,152 +2240,6 @@ public class CudaExecutioner extends DefaultOpExecutioner {
|
|||
}
|
||||
}
|
||||
|
||||
if (op.opName().equalsIgnoreCase("im2col")) {
|
||||
val xArr = op.inputArguments()[0];
|
||||
val zArr = op.outputArguments()[0];
|
||||
|
||||
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(zArr, xArr);
|
||||
|
||||
if (extraz.get() == null)
|
||||
extraz.set(new PointerPointer(32));
|
||||
|
||||
PointerPointer xShapeHost =
|
||||
extraz.get().put(AddressRetriever.retrieveHostPointer(xArr.shapeInfoDataBuffer()), // 0
|
||||
context.getOldStream(), // 1
|
||||
AtomicAllocator.getInstance().getDeviceIdPointer(), // 2
|
||||
context.getBufferAllocation(), // 3
|
||||
context.getBufferReduction(), // 4
|
||||
context.getBufferScalar(), // 5
|
||||
context.getBufferSpecial(),
|
||||
null,
|
||||
AddressRetriever.retrieveHostPointer(zArr.shapeInfoDataBuffer())
|
||||
);
|
||||
|
||||
|
||||
val x = AtomicAllocator.getInstance().getPointer(xArr, context);
|
||||
val z = AtomicAllocator.getInstance().getPointer(zArr, context);
|
||||
|
||||
val xShape = AtomicAllocator.getInstance().getPointer(xArr.shapeInfoDataBuffer(), context);
|
||||
val zShape = AtomicAllocator.getInstance().getPointer(zArr.shapeInfoDataBuffer(), context);
|
||||
|
||||
val hxShape = AtomicAllocator.getInstance().getHostPointer(xArr.shapeInfoDataBuffer());
|
||||
val hzShape = AtomicAllocator.getInstance().getHostPointer(zArr.shapeInfoDataBuffer());
|
||||
|
||||
double zeroPad = 0.0;
|
||||
if(op.tArgs() != null && op.tArgs().length > 0){
|
||||
zeroPad = op.tArgs()[0];
|
||||
}
|
||||
val extrass = new double[]{op.iArgs()[0], op.iArgs()[1], op.iArgs()[2], op.iArgs()[3], op.iArgs()[4], op.iArgs()[5], op.iArgs()[6], op.iArgs()[7], op.iArgs()[8], zeroPad};
|
||||
val extraArgsBuff = Nd4j.getConstantHandler().getConstantBuffer(extrass, xArr.dataType());
|
||||
val extraArgs = AtomicAllocator.getInstance().getPointer(extraArgsBuff, context);
|
||||
|
||||
nativeOps.execTransformSame(xShapeHost, 9,
|
||||
null, (LongPointer) hxShape, x, (LongPointer) xShape,
|
||||
null, (LongPointer) hzShape, z, (LongPointer) zShape, extraArgs);
|
||||
|
||||
//AtomicAllocator.getInstance().getAllocationPoint(zArr).tickDeviceWrite();
|
||||
AtomicAllocator.getInstance().getFlowController().registerAction(context, zArr, xArr);
|
||||
|
||||
Nd4j.getExecutioner().commit();
|
||||
|
||||
return op.outputArguments();
|
||||
} else if (op.opName().equalsIgnoreCase("col2im")) {
|
||||
val dtype = Nd4j.dataType();
|
||||
|
||||
val xArr = op.inputArguments()[0];
|
||||
val zArr = op.outputArguments()[0];
|
||||
|
||||
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(zArr, xArr);
|
||||
|
||||
if (extraz.get() == null)
|
||||
extraz.set(new PointerPointer(32));
|
||||
|
||||
PointerPointer xShapeHost =
|
||||
extraz.get().put(AddressRetriever.retrieveHostPointer(xArr.shapeInfoDataBuffer()), // 0
|
||||
context.getOldStream(), // 1
|
||||
AtomicAllocator.getInstance().getDeviceIdPointer(), // 2
|
||||
context.getBufferAllocation(), // 3
|
||||
context.getBufferReduction(), // 4
|
||||
context.getBufferScalar(), // 5
|
||||
context.getBufferSpecial(),
|
||||
null,
|
||||
AddressRetriever.retrieveHostPointer(zArr.shapeInfoDataBuffer())
|
||||
);
|
||||
|
||||
|
||||
val x = AtomicAllocator.getInstance().getPointer(xArr, context);
|
||||
val z = AtomicAllocator.getInstance().getPointer(zArr, context);
|
||||
|
||||
val xShape = AtomicAllocator.getInstance().getPointer(xArr.shapeInfoDataBuffer(), context);
|
||||
val zShape = AtomicAllocator.getInstance().getPointer(zArr.shapeInfoDataBuffer(), context);
|
||||
|
||||
val hxShape = AtomicAllocator.getInstance().getHostPointer(xArr.shapeInfoDataBuffer());
|
||||
val hzShape = AtomicAllocator.getInstance().getHostPointer(zArr.shapeInfoDataBuffer());
|
||||
|
||||
val extrass = new double[]{op.iArgs()[0], op.iArgs()[1], op.iArgs()[2], op.iArgs()[3], op.iArgs()[4], op.iArgs()[5], op.iArgs()[6], op.iArgs()[7]};
|
||||
val extraArgsBuff = Nd4j.getConstantHandler().getConstantBuffer(extrass, xArr.dataType());
|
||||
val extraArgs = AtomicAllocator.getInstance().getPointer(extraArgsBuff, context);
|
||||
|
||||
|
||||
nativeOps.execTransformSame(xShapeHost, 8,
|
||||
null, (LongPointer) hxShape, x, (LongPointer) xShape,
|
||||
null, (LongPointer) hzShape, z, (LongPointer) zShape, extraArgs);
|
||||
|
||||
//AtomicAllocator.getInstance().getAllocationPoint(zArr).tickDeviceWrite();
|
||||
AtomicAllocator.getInstance().getFlowController().registerAction(context, zArr, xArr);
|
||||
|
||||
//Nd4j.getExecutioner().commit();
|
||||
return op.outputArguments();
|
||||
} else if (op.opName().equalsIgnoreCase("pooling2d")) {
|
||||
val dtype = Nd4j.dataType();
|
||||
|
||||
val xArr = op.inputArguments()[0];
|
||||
val zArr = op.outputArguments()[0];
|
||||
|
||||
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(zArr, xArr);
|
||||
|
||||
if (extraz.get() == null)
|
||||
extraz.set(new PointerPointer(32));
|
||||
|
||||
PointerPointer xShapeHost =
|
||||
extraz.get().put(AddressRetriever.retrieveHostPointer(xArr.shapeInfoDataBuffer()), // 0
|
||||
context.getOldStream(), // 1
|
||||
AtomicAllocator.getInstance().getDeviceIdPointer(), // 2
|
||||
context.getBufferAllocation(), // 3
|
||||
context.getBufferReduction(), // 4
|
||||
context.getBufferScalar(), // 5
|
||||
context.getBufferSpecial(),
|
||||
null,
|
||||
AddressRetriever.retrieveHostPointer(zArr.shapeInfoDataBuffer())
|
||||
);
|
||||
|
||||
|
||||
val x = AtomicAllocator.getInstance().getPointer(xArr, context);
|
||||
val z = AtomicAllocator.getInstance().getPointer(zArr, context);
|
||||
|
||||
val xShape = AtomicAllocator.getInstance().getPointer(xArr.shapeInfoDataBuffer(), context);
|
||||
val zShape = AtomicAllocator.getInstance().getPointer(zArr.shapeInfoDataBuffer(), context);
|
||||
|
||||
val hxShape = AtomicAllocator.getInstance().getHostPointer(xArr.shapeInfoDataBuffer());
|
||||
val hzShape = AtomicAllocator.getInstance().getHostPointer(zArr.shapeInfoDataBuffer());
|
||||
|
||||
val extrass = new double[]{op.iArgs()[0], op.iArgs()[1], op.iArgs()[2], op.iArgs()[3], op.iArgs()[4], op.iArgs()[5], op.iArgs()[6], op.iArgs()[7], op.iArgs()[8]};
|
||||
val extraArgsBuff = Nd4j.getConstantHandler().getConstantBuffer(extrass, zArr.dataType());
|
||||
val extraArgs = AtomicAllocator.getInstance().getPointer(extraArgsBuff, context);
|
||||
|
||||
|
||||
nativeOps.execTransformFloat(xShapeHost, 23,
|
||||
null, (LongPointer) hxShape, x, (LongPointer) xShape,
|
||||
zArr.data().addressPointer(), (LongPointer) hzShape, z, (LongPointer) zShape,
|
||||
extraArgs);
|
||||
|
||||
// AtomicAllocator.getInstance().getAllocationPoint(zArr).tickDeviceWrite();
|
||||
AtomicAllocator.getInstance().getFlowController().registerAction(context, zArr, xArr);
|
||||
|
||||
return op.outputArguments();
|
||||
}
|
||||
|
||||
Nd4j.getExecutioner().commit();
|
||||
val ctx = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
|
||||
|
||||
val context = (CudaOpContext) buildContext();
|
||||
|
|
|
@ -15377,7 +15377,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [3K x K]
|
||||
* 2: row of biases with twice length [1 × 2K]
|
||||
* 2: row of biases with twice length [1 x 2K]
|
||||
* 3: 2d tensor of previous cell state [bS x K]
|
||||
* 4: optional, 2d tensor of dropout mask [bS x K]
|
||||
*
|
||||
|
@ -15410,7 +15410,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [2K x 6K]
|
||||
* 2: row of biases with twice length [1 × 4K]
|
||||
* 2: row of biases with twice length [1 x 4K]
|
||||
* 3: 2d tensor of previous cell state [bS x 2K]
|
||||
* 4: optional, 2d tensor of dropout mask [bS x 2K]
|
||||
*
|
||||
|
@ -15444,7 +15444,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [3K x K]
|
||||
* 2: row of biases with twice length [1 × 2K]
|
||||
* 2: row of biases with twice length [1 x 2K]
|
||||
* 3: 2d tensor of previous cell state [bS x K]
|
||||
* 4: 3d tensor of cell state [bS x K x N]
|
||||
* 5: 2d tensor of cell state gradients [bS x K]
|
||||
|
@ -15482,7 +15482,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
|
|||
* Input arrays:
|
||||
* 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features
|
||||
* 1: 2d tensor of weights [2K x 6K]
|
||||
* 2: row of biases with twice length [1 × 4K]
|
||||
* 2: row of biases with twice length [1 x 4K]
|
||||
* 3: 2d tensor of previous cell state [bS x 2K]
|
||||
* 4: 3d tensor of cell state [N x bS x 2K]
|
||||
* 5: 2d tensor of cell state gradients [bS x 2K]
|
||||
|
@ -15681,7 +15681,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
|
|||
* 0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features
|
||||
* 1: previous cell state [batchSize x inSize], that is at previous time step t-1
|
||||
* 2: weights [inSize x 3*inSize]
|
||||
* 3: biases [1 × 2*inSize]
|
||||
* 3: biases [1 x 2*inSize]
|
||||
*
|
||||
* Output arrays:
|
||||
* 0: current cell output [batchSize x inSize], that is at current time step t
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.junit.Test;
|
|||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.nd4j.linalg.BaseNd4jTest;
|
||||
import org.nd4j.linalg.api.buffer.DataType;
|
||||
import org.nd4j.linalg.api.ndarray.INDArray;
|
||||
import org.nd4j.linalg.api.ops.aggregates.impl.AggregateCBOW;
|
||||
import org.nd4j.linalg.api.ops.aggregates.impl.AggregateSkipGram;
|
||||
|
@ -95,17 +96,17 @@ public class HierarchicSoftmaxTests extends BaseNd4jTest {
|
|||
|
||||
@Test
|
||||
public void testSGGradient1() {
|
||||
INDArray syn0 = Nd4j.create(10, 10).assign(0.01f);
|
||||
INDArray syn1 = Nd4j.create(10, 10).assign(0.02f);
|
||||
INDArray syn1Neg = Nd4j.ones(10, 10).assign(0.03f);
|
||||
INDArray expTable = Nd4j.create(10000).assign(0.5f);
|
||||
INDArray syn0 = Nd4j.create(DataType.DOUBLE, 10, 10).assign(0.01f);
|
||||
INDArray syn1 = Nd4j.create(DataType.DOUBLE,10, 10).assign(0.02f);
|
||||
INDArray syn1Neg = Nd4j.create(DataType.DOUBLE,10, 10).assign(0.03f);
|
||||
INDArray expTable = Nd4j.create(DataType.DOUBLE,10000).assign(0.5f);
|
||||
|
||||
double lr = 0.001;
|
||||
|
||||
int idxSyn0 = 0;
|
||||
|
||||
INDArray expSyn0 = Nd4j.create(10).assign(0.01001f);
|
||||
INDArray expSyn1_1 = Nd4j.create(10).assign(0.020005);
|
||||
INDArray expSyn0 = Nd4j.create(DataType.DOUBLE,10).assign(0.01001f);
|
||||
INDArray expSyn1_1 = Nd4j.create(DataType.DOUBLE,10).assign(0.020005);
|
||||
|
||||
INDArray syn0row = syn0.getRow(idxSyn0);
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.junit.Test;
|
|||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
import org.nd4j.linalg.BaseNd4jTest;
|
||||
import org.nd4j.linalg.api.concurrency.AffinityManager;
|
||||
import org.nd4j.linalg.api.memory.MemoryWorkspace;
|
||||
import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration;
|
||||
import org.nd4j.linalg.api.memory.enums.AllocationPolicy;
|
||||
|
@ -288,6 +289,8 @@ public class DataBufferTests extends BaseNd4jTest {
|
|||
continue;
|
||||
}
|
||||
|
||||
log.info("Testing source [{}]; target: [{}]", sourceType, dt);
|
||||
|
||||
for (boolean useWs : new boolean[]{false, true}) {
|
||||
|
||||
try (MemoryWorkspace ws = (useWs ? workspace.notifyScopeEntered() : null)) {
|
||||
|
@ -334,7 +337,6 @@ public class DataBufferTests extends BaseNd4jTest {
|
|||
assertFalse(db2.isAttached());
|
||||
|
||||
if(!sourceType.equals("boolean")){
|
||||
log.info("Testing source [{}]; target: [{}]", sourceType, dt);
|
||||
testDBOps(db1);
|
||||
testDBOps(db2);
|
||||
}
|
||||
|
@ -375,6 +377,8 @@ public class DataBufferTests extends BaseNd4jTest {
|
|||
bb.position(0);
|
||||
bb.put(b);
|
||||
|
||||
Nd4j.getAffinityManager().tagLocation(arr2, AffinityManager.Location.HOST);
|
||||
|
||||
assertEquals(arr.toString(), arr2.toString());
|
||||
assertEquals(arr, arr2);
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@ import static org.junit.Assert.*;
|
|||
/**
|
||||
* @author raver119@gmail.com
|
||||
*/
|
||||
@Ignore
|
||||
@Slf4j
|
||||
@RunWith(Parameterized.class)
|
||||
public class CompressionTests extends BaseNd4jTest {
|
||||
|
|
|
@ -1133,6 +1133,33 @@ public class ArrayUtil {
|
|||
return ret;
|
||||
}
|
||||
|
||||
public static int[] cutBelowZero(int[] data) {
|
||||
val ret = new int[data.length];
|
||||
for (int i = 0; i < data.length; i++)
|
||||
ret[i] = data[i] < 0 ? 0 : data[i];
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static long[] cutBelowZero(long[] data) {
|
||||
val ret = new long[data.length];
|
||||
for (int i = 0; i < data.length; i++)
|
||||
ret[i] = data[i] < 0 ? 0 : data[i];
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static short[] cutBelowZero(short[] data) {
|
||||
val ret = new short[data.length];
|
||||
for (int i = 0; i < data.length; i++)
|
||||
ret[i] = data[i] < 0 ? 0 : data[i];
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static byte[] cutBelowZero(byte[] data) {
|
||||
val ret = new byte[data.length];
|
||||
for (int i = 0; i < data.length; i++)
|
||||
ret[i] = data[i] < 0 ? 0 : data[i];
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a copy of this array with the
|
||||
|
|
Loading…
Reference in New Issue