allow atomicAdd for CUDA 10 only
Signed-off-by: raver119 <raver119@gmail.com>
This commit is contained in:
parent
f6f9437a36
commit
9d03bb9425
@ -1057,7 +1057,7 @@ inline __device__ uint64_t nd4j_atomicAdd<uint64_t>(uint64_t* address, uint64_t
|
||||
|
||||
template <>
|
||||
inline __device__ float16 nd4j_atomicAdd<float16>(float16* address, float16 val) {
|
||||
#if __CUDA_ARCH__ >= 700
|
||||
#if __CUDA_ARCH__ >= 700 && defined(CUDA_10)
|
||||
atomicAdd(reinterpret_cast<__half*>(address), val.data);
|
||||
#else
|
||||
auto address_as_ull = (int*) address;
|
||||
|
Loading…
x
Reference in New Issue
Block a user