From 6ce458e949798561debcc94805bf03d0013f5c24 Mon Sep 17 00:00:00 2001 From: raver119 Date: Fri, 12 Jul 2019 20:39:54 +0300 Subject: [PATCH] [WIP] CUDA Java side (#58) * one crashing test Signed-off-by: raver119 * stupid issue fixed Signed-off-by: raver119 * one fix Signed-off-by: raver119 * dont ensure location for empty arrays Signed-off-by: raver119 * few more signatures fixed Signed-off-by: raver119 * few tweaks for DataBuffer creation from java primitives Signed-off-by: raver119 * get rid of legacy im2col/col2im intercept Signed-off-by: raver119 * rsubi scalar array fix Signed-off-by: raver119 --- libnd4j/include/array/cuda/DataBuffer.cu | 1 - .../ops/declarable/helpers/impl/choose.cpp | 16 +- .../layers_tests/DeclarableOpsTestsCuda1.cu | 62 ++++++++ .../nd4j/linalg/api/ndarray/BaseNDArray.java | 2 +- .../jita/concurrency/CudaAffinityManager.java | 11 +- .../flow/impl/SynchronousFlowController.java | 3 +- .../jcublas/buffer/BaseCudaDataBuffer.java | 14 +- .../buffer/CudaBfloat16DataBuffer.java | 5 + .../buffer/factory/CudaDataBufferFactory.java | 2 + .../ops/executioner/CudaExecutioner.java | 150 +----------------- .../java/org/nd4j/nativeblas/Nd4jCpu.java | 10 +- .../aggregates/HierarchicSoftmaxTests.java | 13 +- .../linalg/api/buffer/DataBufferTests.java | 6 +- .../linalg/compression/CompressionTests.java | 1 + .../java/org/nd4j/linalg/util/ArrayUtil.java | 27 ++++ 15 files changed, 154 insertions(+), 169 deletions(-) create mode 100644 libnd4j/tests_cpu/layers_tests/DeclarableOpsTestsCuda1.cu diff --git a/libnd4j/include/array/cuda/DataBuffer.cu b/libnd4j/include/array/cuda/DataBuffer.cu index 2da83e43b..cf288d507 100644 --- a/libnd4j/include/array/cuda/DataBuffer.cu +++ b/libnd4j/include/array/cuda/DataBuffer.cu @@ -37,7 +37,6 @@ void DataBuffer::allocateSpecial() { //////////////////////////////////////////////////////////////////////// void DataBuffer::syncToPrimary(const LaunchContext* context, const bool forceSync) { - if(isPrimaryActual() && !forceSync) return; diff --git a/libnd4j/include/ops/declarable/helpers/impl/choose.cpp b/libnd4j/include/ops/declarable/helpers/impl/choose.cpp index d18cde269..47ca64d3b 100644 --- a/libnd4j/include/ops/declarable/helpers/impl/choose.cpp +++ b/libnd4j/include/ops/declarable/helpers/impl/choose.cpp @@ -93,8 +93,12 @@ namespace helpers { if (comp != nullptr) comp->syncToHost(); - output->syncToHost(); - numResult->syncToHost(); + if (output != nullptr) + output->syncToHost(); + + if (numResult != nullptr) + numResult->syncToHost(); + compScalar.syncToHost(); BUILD_SINGLE_SELECTOR(arg->dataType(), return processCondition_, (mode, arg, comp, output, numResult, compScalar), FLOAT_TYPES); @@ -104,8 +108,12 @@ namespace helpers { if (comp != nullptr) comp->syncToDevice(); - output->syncToDevice(); - numResult->syncToDevice(); + if (output != nullptr) + output->syncToDevice(); + + if (numResult != nullptr) + numResult->syncToDevice(); + compScalar.syncToDevice(); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTestsCuda1.cu b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTestsCuda1.cu new file mode 100644 index 000000000..161b96918 --- /dev/null +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTestsCuda1.cu @@ -0,0 +1,62 @@ +/******************************************************************************* + * Copyright (c) 2015-2018 Skymind, Inc. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + + +// +// @author raver119@gmail.com +// + +#include "testlayers.h" +#include +#include +#include +#include + + +using namespace nd4j; + + +class DeclarableOpsTestsCuda1 : public testing::Test { +public: + + DeclarableOpsTestsCuda1() { + printf("\n"); + fflush(stdout); + } +}; + + +TEST_F(DeclarableOpsTestsCuda1, Test_CHOOSE_SCALAR_LARGE) { + double inputData[150] = { + 0, 0.51, 0.68, 0.69, 0.86, 0.91, 0.96, 0.97, 0.97, 1.03, 1.13, 1.16, 1.16, 1.17, 1.19, 1.25, 1.25, 1.26, 1.27, 1.28, 1.29, 1.29, 1.29, 1.30, 1.31, 1.32, 1.33, 1.33, 1.35, 1.35, 1.36, 1.37, 1.38, 1.40, 1.41, 1.42, 1.43, 1.44, 1.44, 1.45, 1.45, 1.47, 1.47, 1.51, 1.51, 1.51, 1.52, 1.53, 1.56, 1.57, 1.58, 1.59, 1.61, 1.62, 1.63, 1.63, 1.64, 1.64, 1.66, 1.66, 1.67, 1.67, 1.70, 1.70, 1.70, 1.72, 1.72, 1.72, 1.72, 1.73, 1.74, 1.74, 1.76, 1.76, 1.77, 1.77, 1.80, 1.80, 1.81, 1.82, 1.83, 1.83, 1.84, 1.84, 1.84, 1.85, 1.85, 1.85, 1.86, 1.86, 1.87, 1.88, 1.89, 1.89, 1.89, 1.89, 1.89, 1.91, 1.91, 1.91, 1.92, 1.94, 1.95, 1.97, 1.98, 1.98, 1.98, 1.98, 1.98, 1.99, 2, 2, 2.01, 2.01, 2.02, 2.03, 2.03, 2.03, 2.04, 2.04, 2.05, 2.06, 2.07, 2.08, 2.08, 2.08, 2.08, 2.09, 2.09, 2.10, 2.10, 2.11, 2.11, 2.11, 2.12, 2.12, 2.13, 2.13, 2.14, 2.14, 2.14, 2.14, 2.15, 2.15, 2.16, 2.16, 2.16, 2.16, 2.16, 2.17 + }; + + auto precursor = NDArrayFactory::create(inputData,'c',{1,149}); + NDArray x(nullptr, precursor.specialBuffer(), precursor.shapeInfo()); + + nd4j::ops::choose op; + //greater than test + auto result = op.execute({&x}, {0.0},{3}); + ASSERT_EQ(Status::OK(), result->status()); + + auto z = result->at(1); + + ASSERT_EQ(148,z->e(0)); + //ASSERT_TRUE(exp.isSameShape(z)); + + delete result; + +} \ No newline at end of file diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java index 3f3a6469d..e77dd7fd0 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ndarray/BaseNDArray.java @@ -4003,7 +4003,7 @@ public abstract class BaseNDArray implements INDArray, Iterable { public INDArray rsubi(INDArray other, INDArray result) { validateNumericalArray("rsubi", false); if (other.isScalar()) { - return this.addi(other.getDouble(0), result); + return this.rsubi(other.getDouble(0), result); } if (isScalar()) { diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/concurrency/CudaAffinityManager.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/concurrency/CudaAffinityManager.java index b412938e2..2b3e57f5c 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/concurrency/CudaAffinityManager.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/concurrency/CudaAffinityManager.java @@ -379,7 +379,11 @@ public class CudaAffinityManager extends BasicAffinityManager { @Override public void ensureLocation(INDArray array, Location location) { - AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array); + // to location to ensure for empty array + if (array.isEmpty()) + return; + + val point = AtomicAllocator.getInstance().getAllocationPoint(array); switch (location) { case HOST: { AtomicAllocator.getInstance().synchronizeHostData(array); @@ -399,7 +403,10 @@ public class CudaAffinityManager extends BasicAffinityManager { @Override public Location getActiveLocation(INDArray array) { - AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array); + if (array.isEmpty()) + return Location.EVERYWHERE; + + val point = AtomicAllocator.getInstance().getAllocationPoint(array); if (point.isActualOnDeviceSide() && point.isActualOnHostSide()) { return Location.EVERYWHERE; diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/flow/impl/SynchronousFlowController.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/flow/impl/SynchronousFlowController.java index 6e5969604..2f84fa54d 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/flow/impl/SynchronousFlowController.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/jita/flow/impl/SynchronousFlowController.java @@ -18,6 +18,7 @@ package org.nd4j.jita.flow.impl; import lombok.Getter; +import lombok.NonNull; import lombok.val; import org.bytedeco.javacpp.DoublePointer; import org.nd4j.jita.allocator.Allocator; @@ -95,7 +96,7 @@ public class SynchronousFlowController implements FlowController { } @Override - public void synchronizeToDevice(AllocationPoint point) { + public void synchronizeToDevice(@NonNull AllocationPoint point) { if (point.isConstant()) return; diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/BaseCudaDataBuffer.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/BaseCudaDataBuffer.java index 5ac029ca1..79196d15b 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/BaseCudaDataBuffer.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/BaseCudaDataBuffer.java @@ -567,6 +567,14 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda return allocationPoint.getPointers().getHostPointer().address(); } + @Override + public Pointer pointer() { + // FIXME: very bad thing, + lazyAllocateHostPointer(); + + return super.pointer(); + } + /** * @@ -672,7 +680,6 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda } } - public void set(long[] data, long length, long srcOffset, long dstOffset) { // TODO: make sure getPointer returns proper pointer @@ -698,11 +705,14 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda } break; case UBYTE: { + data = ArrayUtil.cutBelowZero(data); for (int e = 0; e < data.length; e++) { put(e, data[e]); } } break; + case UINT16: + data = ArrayUtil.cutBelowZero(data); case SHORT: { val pointer = new ShortPointer(ArrayUtil.toShorts(data)); val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize)); @@ -714,6 +724,7 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda } break; case UINT32: + data = ArrayUtil.cutBelowZero(data); case INT: { val pointer = new IntPointer(ArrayUtil.toInts(data)); val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize)); @@ -725,6 +736,7 @@ public abstract class BaseCudaDataBuffer extends BaseDataBuffer implements JCuda } break; case UINT64: + data = ArrayUtil.cutBelowZero(data); case LONG: { val pointer = new LongPointer(data); val srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize)); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/CudaBfloat16DataBuffer.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/CudaBfloat16DataBuffer.java index 970c557de..193a9e21c 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/CudaBfloat16DataBuffer.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/CudaBfloat16DataBuffer.java @@ -187,6 +187,11 @@ public class CudaBfloat16DataBuffer extends BaseCudaDataBuffer { setData(ArrayUtil.toShorts(data)); } + @Override + public void setData(long[] data) { + setData(ArrayUtil.toShorts(data)); + } + @Override diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/factory/CudaDataBufferFactory.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/factory/CudaDataBufferFactory.java index 1d775721f..77093a5bb 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/factory/CudaDataBufferFactory.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/buffer/factory/CudaDataBufferFactory.java @@ -414,6 +414,8 @@ public class CudaDataBufferFactory implements DataBufferFactory { return new CudaFloatDataBuffer(length, initialize, workspace); case HALF: return new CudaHalfDataBuffer(length, initialize, workspace); + case BFLOAT16: + return new CudaBfloat16DataBuffer(length, initialize, workspace); case BOOL: return new CudaBoolDataBuffer(length, initialize, workspace); default: diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java index a250fd8f9..90bc487df 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java @@ -61,6 +61,7 @@ import org.nd4j.linalg.exception.ND4JIllegalArgumentException; import org.nd4j.linalg.exception.ND4JIllegalStateException; import org.nd4j.linalg.factory.Nd4j; import org.nd4j.linalg.jcublas.buffer.AddressRetriever; +import org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer; import org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer; import org.nd4j.linalg.jcublas.context.CudaContext; import org.nd4j.linalg.primitives.AtomicBoolean; @@ -1495,7 +1496,8 @@ public class CudaExecutioner extends DefaultOpExecutioner { @Override public void exec(Batch batch) { - DataBuffer surfaceBuffer = getBuffer(batch); + val surfaceBuffer = (BaseCudaDataBuffer) getBuffer(batch); + surfaceBuffer.lazyAllocateHostPointer(); CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); @@ -2238,152 +2240,6 @@ public class CudaExecutioner extends DefaultOpExecutioner { } } - if (op.opName().equalsIgnoreCase("im2col")) { - val xArr = op.inputArguments()[0]; - val zArr = op.outputArguments()[0]; - - CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(zArr, xArr); - - if (extraz.get() == null) - extraz.set(new PointerPointer(32)); - - PointerPointer xShapeHost = - extraz.get().put(AddressRetriever.retrieveHostPointer(xArr.shapeInfoDataBuffer()), // 0 - context.getOldStream(), // 1 - AtomicAllocator.getInstance().getDeviceIdPointer(), // 2 - context.getBufferAllocation(), // 3 - context.getBufferReduction(), // 4 - context.getBufferScalar(), // 5 - context.getBufferSpecial(), - null, - AddressRetriever.retrieveHostPointer(zArr.shapeInfoDataBuffer()) - ); - - - val x = AtomicAllocator.getInstance().getPointer(xArr, context); - val z = AtomicAllocator.getInstance().getPointer(zArr, context); - - val xShape = AtomicAllocator.getInstance().getPointer(xArr.shapeInfoDataBuffer(), context); - val zShape = AtomicAllocator.getInstance().getPointer(zArr.shapeInfoDataBuffer(), context); - - val hxShape = AtomicAllocator.getInstance().getHostPointer(xArr.shapeInfoDataBuffer()); - val hzShape = AtomicAllocator.getInstance().getHostPointer(zArr.shapeInfoDataBuffer()); - - double zeroPad = 0.0; - if(op.tArgs() != null && op.tArgs().length > 0){ - zeroPad = op.tArgs()[0]; - } - val extrass = new double[]{op.iArgs()[0], op.iArgs()[1], op.iArgs()[2], op.iArgs()[3], op.iArgs()[4], op.iArgs()[5], op.iArgs()[6], op.iArgs()[7], op.iArgs()[8], zeroPad}; - val extraArgsBuff = Nd4j.getConstantHandler().getConstantBuffer(extrass, xArr.dataType()); - val extraArgs = AtomicAllocator.getInstance().getPointer(extraArgsBuff, context); - - nativeOps.execTransformSame(xShapeHost, 9, - null, (LongPointer) hxShape, x, (LongPointer) xShape, - null, (LongPointer) hzShape, z, (LongPointer) zShape, extraArgs); - - //AtomicAllocator.getInstance().getAllocationPoint(zArr).tickDeviceWrite(); - AtomicAllocator.getInstance().getFlowController().registerAction(context, zArr, xArr); - - Nd4j.getExecutioner().commit(); - - return op.outputArguments(); - } else if (op.opName().equalsIgnoreCase("col2im")) { - val dtype = Nd4j.dataType(); - - val xArr = op.inputArguments()[0]; - val zArr = op.outputArguments()[0]; - - CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(zArr, xArr); - - if (extraz.get() == null) - extraz.set(new PointerPointer(32)); - - PointerPointer xShapeHost = - extraz.get().put(AddressRetriever.retrieveHostPointer(xArr.shapeInfoDataBuffer()), // 0 - context.getOldStream(), // 1 - AtomicAllocator.getInstance().getDeviceIdPointer(), // 2 - context.getBufferAllocation(), // 3 - context.getBufferReduction(), // 4 - context.getBufferScalar(), // 5 - context.getBufferSpecial(), - null, - AddressRetriever.retrieveHostPointer(zArr.shapeInfoDataBuffer()) - ); - - - val x = AtomicAllocator.getInstance().getPointer(xArr, context); - val z = AtomicAllocator.getInstance().getPointer(zArr, context); - - val xShape = AtomicAllocator.getInstance().getPointer(xArr.shapeInfoDataBuffer(), context); - val zShape = AtomicAllocator.getInstance().getPointer(zArr.shapeInfoDataBuffer(), context); - - val hxShape = AtomicAllocator.getInstance().getHostPointer(xArr.shapeInfoDataBuffer()); - val hzShape = AtomicAllocator.getInstance().getHostPointer(zArr.shapeInfoDataBuffer()); - - val extrass = new double[]{op.iArgs()[0], op.iArgs()[1], op.iArgs()[2], op.iArgs()[3], op.iArgs()[4], op.iArgs()[5], op.iArgs()[6], op.iArgs()[7]}; - val extraArgsBuff = Nd4j.getConstantHandler().getConstantBuffer(extrass, xArr.dataType()); - val extraArgs = AtomicAllocator.getInstance().getPointer(extraArgsBuff, context); - - - nativeOps.execTransformSame(xShapeHost, 8, - null, (LongPointer) hxShape, x, (LongPointer) xShape, - null, (LongPointer) hzShape, z, (LongPointer) zShape, extraArgs); - - //AtomicAllocator.getInstance().getAllocationPoint(zArr).tickDeviceWrite(); - AtomicAllocator.getInstance().getFlowController().registerAction(context, zArr, xArr); - - //Nd4j.getExecutioner().commit(); - return op.outputArguments(); - } else if (op.opName().equalsIgnoreCase("pooling2d")) { - val dtype = Nd4j.dataType(); - - val xArr = op.inputArguments()[0]; - val zArr = op.outputArguments()[0]; - - CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(zArr, xArr); - - if (extraz.get() == null) - extraz.set(new PointerPointer(32)); - - PointerPointer xShapeHost = - extraz.get().put(AddressRetriever.retrieveHostPointer(xArr.shapeInfoDataBuffer()), // 0 - context.getOldStream(), // 1 - AtomicAllocator.getInstance().getDeviceIdPointer(), // 2 - context.getBufferAllocation(), // 3 - context.getBufferReduction(), // 4 - context.getBufferScalar(), // 5 - context.getBufferSpecial(), - null, - AddressRetriever.retrieveHostPointer(zArr.shapeInfoDataBuffer()) - ); - - - val x = AtomicAllocator.getInstance().getPointer(xArr, context); - val z = AtomicAllocator.getInstance().getPointer(zArr, context); - - val xShape = AtomicAllocator.getInstance().getPointer(xArr.shapeInfoDataBuffer(), context); - val zShape = AtomicAllocator.getInstance().getPointer(zArr.shapeInfoDataBuffer(), context); - - val hxShape = AtomicAllocator.getInstance().getHostPointer(xArr.shapeInfoDataBuffer()); - val hzShape = AtomicAllocator.getInstance().getHostPointer(zArr.shapeInfoDataBuffer()); - - val extrass = new double[]{op.iArgs()[0], op.iArgs()[1], op.iArgs()[2], op.iArgs()[3], op.iArgs()[4], op.iArgs()[5], op.iArgs()[6], op.iArgs()[7], op.iArgs()[8]}; - val extraArgsBuff = Nd4j.getConstantHandler().getConstantBuffer(extrass, zArr.dataType()); - val extraArgs = AtomicAllocator.getInstance().getPointer(extraArgsBuff, context); - - - nativeOps.execTransformFloat(xShapeHost, 23, - null, (LongPointer) hxShape, x, (LongPointer) xShape, - zArr.data().addressPointer(), (LongPointer) hzShape, z, (LongPointer) zShape, - extraArgs); - - // AtomicAllocator.getInstance().getAllocationPoint(zArr).tickDeviceWrite(); - AtomicAllocator.getInstance().getFlowController().registerAction(context, zArr, xArr); - - return op.outputArguments(); - } - - Nd4j.getExecutioner().commit(); val ctx = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); val context = (CudaOpContext) buildContext(); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java index e6858f8b6..c5c14d034 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java @@ -15377,7 +15377,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); * Input arrays: * 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features * 1: 2d tensor of weights [3K x K] - * 2: row of biases with twice length [1 × 2K] + * 2: row of biases with twice length [1 x 2K] * 3: 2d tensor of previous cell state [bS x K] * 4: optional, 2d tensor of dropout mask [bS x K] * @@ -15410,7 +15410,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); * Input arrays: * 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features * 1: 2d tensor of weights [2K x 6K] - * 2: row of biases with twice length [1 × 4K] + * 2: row of biases with twice length [1 x 4K] * 3: 2d tensor of previous cell state [bS x 2K] * 4: optional, 2d tensor of dropout mask [bS x 2K] * @@ -15444,7 +15444,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); * Input arrays: * 0: input 3d tensor with shape [bS x K x N], N - number of time steps, bS - batch size, K - number of features * 1: 2d tensor of weights [3K x K] - * 2: row of biases with twice length [1 × 2K] + * 2: row of biases with twice length [1 x 2K] * 3: 2d tensor of previous cell state [bS x K] * 4: 3d tensor of cell state [bS x K x N] * 5: 2d tensor of cell state gradients [bS x K] @@ -15482,7 +15482,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); * Input arrays: * 0: input 3d tensor with shape [N x bS x 2K], N - number of time steps, bS - batch size, K - number of features * 1: 2d tensor of weights [2K x 6K] - * 2: row of biases with twice length [1 × 4K] + * 2: row of biases with twice length [1 x 4K] * 3: 2d tensor of previous cell state [bS x 2K] * 4: 3d tensor of cell state [N x bS x 2K] * 5: 2d tensor of cell state gradients [bS x 2K] @@ -15681,7 +15681,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD(); * 0: input with shape [batchSize x inSize], batchSize - batch size, inSize - number of features * 1: previous cell state [batchSize x inSize], that is at previous time step t-1 * 2: weights [inSize x 3*inSize] - * 3: biases [1 × 2*inSize] + * 3: biases [1 x 2*inSize] * * Output arrays: * 0: current cell output [batchSize x inSize], that is at current time step t diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/aggregates/HierarchicSoftmaxTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/aggregates/HierarchicSoftmaxTests.java index 46f6fe243..3f07b6a2a 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/aggregates/HierarchicSoftmaxTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/aggregates/HierarchicSoftmaxTests.java @@ -23,6 +23,7 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.nd4j.linalg.BaseNd4jTest; +import org.nd4j.linalg.api.buffer.DataType; import org.nd4j.linalg.api.ndarray.INDArray; import org.nd4j.linalg.api.ops.aggregates.impl.AggregateCBOW; import org.nd4j.linalg.api.ops.aggregates.impl.AggregateSkipGram; @@ -95,17 +96,17 @@ public class HierarchicSoftmaxTests extends BaseNd4jTest { @Test public void testSGGradient1() { - INDArray syn0 = Nd4j.create(10, 10).assign(0.01f); - INDArray syn1 = Nd4j.create(10, 10).assign(0.02f); - INDArray syn1Neg = Nd4j.ones(10, 10).assign(0.03f); - INDArray expTable = Nd4j.create(10000).assign(0.5f); + INDArray syn0 = Nd4j.create(DataType.DOUBLE, 10, 10).assign(0.01f); + INDArray syn1 = Nd4j.create(DataType.DOUBLE,10, 10).assign(0.02f); + INDArray syn1Neg = Nd4j.create(DataType.DOUBLE,10, 10).assign(0.03f); + INDArray expTable = Nd4j.create(DataType.DOUBLE,10000).assign(0.5f); double lr = 0.001; int idxSyn0 = 0; - INDArray expSyn0 = Nd4j.create(10).assign(0.01001f); - INDArray expSyn1_1 = Nd4j.create(10).assign(0.020005); + INDArray expSyn0 = Nd4j.create(DataType.DOUBLE,10).assign(0.01001f); + INDArray expSyn1_1 = Nd4j.create(DataType.DOUBLE,10).assign(0.020005); INDArray syn0row = syn0.getRow(idxSyn0); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/api/buffer/DataBufferTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/api/buffer/DataBufferTests.java index 584f9be9a..4f690ea72 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/api/buffer/DataBufferTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/api/buffer/DataBufferTests.java @@ -24,6 +24,7 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.nd4j.linalg.BaseNd4jTest; +import org.nd4j.linalg.api.concurrency.AffinityManager; import org.nd4j.linalg.api.memory.MemoryWorkspace; import org.nd4j.linalg.api.memory.conf.WorkspaceConfiguration; import org.nd4j.linalg.api.memory.enums.AllocationPolicy; @@ -288,6 +289,8 @@ public class DataBufferTests extends BaseNd4jTest { continue; } + log.info("Testing source [{}]; target: [{}]", sourceType, dt); + for (boolean useWs : new boolean[]{false, true}) { try (MemoryWorkspace ws = (useWs ? workspace.notifyScopeEntered() : null)) { @@ -334,7 +337,6 @@ public class DataBufferTests extends BaseNd4jTest { assertFalse(db2.isAttached()); if(!sourceType.equals("boolean")){ - log.info("Testing source [{}]; target: [{}]", sourceType, dt); testDBOps(db1); testDBOps(db2); } @@ -375,6 +377,8 @@ public class DataBufferTests extends BaseNd4jTest { bb.position(0); bb.put(b); + Nd4j.getAffinityManager().tagLocation(arr2, AffinityManager.Location.HOST); + assertEquals(arr.toString(), arr2.toString()); assertEquals(arr, arr2); diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/compression/CompressionTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/compression/CompressionTests.java index 383dafce9..69ac8eacc 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/compression/CompressionTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/compression/CompressionTests.java @@ -44,6 +44,7 @@ import static org.junit.Assert.*; /** * @author raver119@gmail.com */ +@Ignore @Slf4j @RunWith(Parameterized.class) public class CompressionTests extends BaseNd4jTest { diff --git a/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/ArrayUtil.java b/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/ArrayUtil.java index bdeb882f9..e51e75ce4 100644 --- a/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/ArrayUtil.java +++ b/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/ArrayUtil.java @@ -1133,6 +1133,33 @@ public class ArrayUtil { return ret; } + public static int[] cutBelowZero(int[] data) { + val ret = new int[data.length]; + for (int i = 0; i < data.length; i++) + ret[i] = data[i] < 0 ? 0 : data[i]; + return ret; + } + + public static long[] cutBelowZero(long[] data) { + val ret = new long[data.length]; + for (int i = 0; i < data.length; i++) + ret[i] = data[i] < 0 ? 0 : data[i]; + return ret; + } + + public static short[] cutBelowZero(short[] data) { + val ret = new short[data.length]; + for (int i = 0; i < data.length; i++) + ret[i] = data[i] < 0 ? 0 : data[i]; + return ret; + } + + public static byte[] cutBelowZero(byte[] data) { + val ret = new byte[data.length]; + for (int i = 0; i < data.length; i++) + ret[i] = data[i] < 0 ? 0 : data[i]; + return ret; + } /** * Return a copy of this array with the