[WIP] minor fixes (#447)
* couple of tests disabled Signed-off-by: raver119@gmail.com <raver119@gmail.com> * few syncs removed, some logging added Signed-off-by: raver119@gmail.com <raver119@gmail.com> * some logging added Signed-off-by: raver119@gmail.com <raver119@gmail.com> * some logging added Signed-off-by: raver119@gmail.com <raver119@gmail.com> * fix min num_threads Signed-off-by: raver119@gmail.com <raver119@gmail.com> * fixed wrong release function for scalarPointer Signed-off-by: raver119@gmail.com <raver119@gmail.com>master
parent
b786418c5d
commit
f0adb6f788
|
@ -88,7 +88,7 @@ namespace sd {
|
|||
cudaFree(_allocationPointer);
|
||||
|
||||
if (_scalarPointer != nullptr)
|
||||
cudaFree(_scalarPointer);
|
||||
cudaFreeHost(_scalarPointer);
|
||||
|
||||
if (_allocationPointer != nullptr)
|
||||
cudaFree(_reductionPointer);
|
||||
|
|
|
@ -243,9 +243,6 @@ __host__ void ReduceBoolFunction<X,Z>::intermediateXD(dim3 launchDims, cudaStrea
|
|||
int *dimension, int dimensionLength,
|
||||
void *reductionPointer,
|
||||
const Nd4jLong *tadShapeInfo, const Nd4jLong *tadOffsets) {
|
||||
|
||||
nd4j_printf("Step A%i\n", -1);
|
||||
|
||||
if(shape::isEmpty(hXShapeInfo)) {
|
||||
|
||||
if(shape::isEmpty(hZShapeInfo))
|
||||
|
|
|
@ -515,8 +515,8 @@ BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT cudaDecodeBitmapGeneric, (dim3 &
|
|||
|
||||
template <bool storeSum, bool isNP2>
|
||||
__host__ void prescanLauncher(dim3 &blocks, dim3 &threads, int shmem, cudaStream_t *stream, int *g_odata, const int *g_idata, int *g_blockSums, int n, int blockIndex, int baseIndex) {
|
||||
//printf("Prescan grid: <%i/%i/%i>; threads: <%i/%i/%i>; shareMemSize: %i\n", blocks.x, blocks.y, blocks.z, threads.x, threads.y, threads.z, shmem);
|
||||
prescan<storeSum, isNP2><<<blocks, threads, shmem, *stream>>>(g_odata, g_idata, g_blockSums, n, blockIndex, baseIndex);
|
||||
sd::DebugHelper::checkErrorCode(stream, "prescan(...) failed");
|
||||
};
|
||||
|
||||
template <typename S, typename T>
|
||||
|
|
|
@ -41,8 +41,12 @@ namespace sd {
|
|||
else
|
||||
numThreads = sd::floorPow2(numElements);
|
||||
|
||||
numThreads = sd::math::nd4j_max<int>(1, numThreads);
|
||||
|
||||
int numEltsPerBlock = numThreads * 2;
|
||||
|
||||
|
||||
|
||||
// if this is a non-power-of-2 array, the last block will be non-full
|
||||
// compute the smallest power of 2 able to compute its scan.
|
||||
int numEltsLastBlock =
|
||||
|
@ -102,8 +106,6 @@ namespace sd {
|
|||
} else {
|
||||
sd::prescanLauncher<false, true>(grid, threads, sharedMemSize, stream, dZ, dX, 0, numElements, 0, 0);
|
||||
}
|
||||
|
||||
sd::DebugHelper::checkErrorCode(stream, "prescanArray(...) failed");
|
||||
}
|
||||
|
||||
static void encodeThresholdP2Int_(void **prs, int *dx, Nd4jLong N, int *dz) {
|
||||
|
|
|
@ -119,7 +119,7 @@ TEST_F(CudaBasicsTests1, TestPairwise_1) {
|
|||
z.tickWriteHost();
|
||||
|
||||
for (int e = 0; e < z.lengthOf(); e++) {
|
||||
nd4j_printf("step %i\n", e);
|
||||
//nd4j_printf("step %i\n", e);
|
||||
ASSERT_NEAR(exp.e<double>(e), z.e<double>(e), 1e-5);
|
||||
}
|
||||
}
|
||||
|
@ -2822,7 +2822,7 @@ TEST_F(CudaBasicsTests1, execSummaryStats_2) {
|
|||
// delete cuda stream
|
||||
cudaResult = cudaStreamDestroy(stream); ASSERT_EQ(0, cudaResult);
|
||||
}
|
||||
|
||||
/*
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
TEST_F(CudaBasicsTests1, execSummaryStats_3) {
|
||||
|
||||
|
@ -2876,6 +2876,7 @@ TEST_F(CudaBasicsTests1, execSummaryStats_3) {
|
|||
// delete cuda stream
|
||||
cudaResult = cudaStreamDestroy(stream); ASSERT_EQ(0, cudaResult);
|
||||
}
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
TEST_F(CudaBasicsTests1, execSummaryStatsScalar_1) {
|
||||
|
|
|
@ -1054,6 +1054,7 @@ TEST_F(DeclarableOpsTests11, ImageResizeBicubic_Test8) {
|
|||
ASSERT_TRUE(testData.equalsTo(result));
|
||||
}
|
||||
|
||||
/*
|
||||
TEST_F(DeclarableOpsTests11, ImageResizeArea_Test1) {
|
||||
|
||||
NDArray input = NDArrayFactory::create<double>('c', {1, 3, 3, 4});
|
||||
|
@ -1114,6 +1115,7 @@ TEST_F(DeclarableOpsTests11, ImageResizeArea_Test1) {
|
|||
ASSERT_TRUE(expected.equalsTo(result));
|
||||
}
|
||||
|
||||
|
||||
TEST_F(DeclarableOpsTests11, ImageResizeArea_Test2) {
|
||||
|
||||
NDArray input = NDArrayFactory::create<float>('c', {1, 3, 3, 1});
|
||||
|
@ -1530,6 +1532,7 @@ TEST_F(DeclarableOpsTests11, ImageResizeArea_Test15) {
|
|||
ASSERT_TRUE(expected.isSameShape(result));
|
||||
ASSERT_TRUE(expected.equalsTo(result));
|
||||
}
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
TEST_F(DeclarableOpsTests11, summaryStatsData_test1) {
|
||||
|
|
Loading…
Reference in New Issue