bump up shared memory (#449)

Signed-off-by: raver119@gmail.com <raver119@gmail.com>
master
raver119 2020-05-11 16:55:23 +03:00 committed by GitHub
parent 0d8b2d65cd
commit 05c0f12c73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 1 additions and 0 deletions

View File

@ -516,6 +516,7 @@ BUILD_SINGLE_TEMPLATE(template void ND4J_EXPORT cudaDecodeBitmapGeneric, (dim3 &
template <bool storeSum, bool isNP2>
__host__ void prescanLauncher(dim3 &blocks, dim3 &threads, int shmem, cudaStream_t *stream, int *g_odata, const int *g_idata, int *g_blockSums, int n, int blockIndex, int baseIndex) {
//printf("Prescan grid: <%i/%i/%i>; threads: <%i/%i/%i>; shareMemSize: %i\n", blocks.x, blocks.y, blocks.z, threads.x, threads.y, threads.z, shmem);
shmem = sd::math::nd4j_max<int>(shmem, 16384);
prescan<storeSum, isNP2><<<blocks, threads, shmem, *stream>>>(g_odata, g_idata, g_blockSums, n, blockIndex, baseIndex);
};