621 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			621 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*******************************************************************************
 | |
|  * Copyright (c) 2015-2018 Skymind, Inc.
 | |
|  *
 | |
|  * This program and the accompanying materials are made available under the
 | |
|  * terms of the Apache License, Version 2.0 which is available at
 | |
|  * https://www.apache.org/licenses/LICENSE-2.0.
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 | |
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 | |
|  * License for the specific language governing permissions and limitations
 | |
|  * under the License.
 | |
|  *
 | |
|  * SPDX-License-Identifier: Apache-2.0
 | |
|  ******************************************************************************/
 | |
| 
 | |
| //
 | |
| // @author raver119@gmail.com
 | |
| //
 | |
| 
 | |
| #ifndef LIBND4J_HELPER_GENERATOR_H
 | |
| #define LIBND4J_HELPER_GENERATOR_H
 | |
| 
 | |
| #include <op_boilerplate.h>
 | |
| #include <pointercast.h>
 | |
| #include <array/DataTypeUtils.h>
 | |
| #include <dll.h>
 | |
| 
 | |
| #ifdef _MSC_VER
 | |
| // include for uint64_t on MSVC
 | |
| #include <stdint.h>
 | |
| #elif ANDROID
 | |
| #include <stdint.h>
 | |
| 
 | |
| #ifndef UINT64_C
 | |
| #if defined(__LP64__)
 | |
| #define UINT64_C(c)     c ## UL
 | |
| #else
 | |
| #define UINT64_C(c)     c ## ULL
 | |
| #endif //LP64
 | |
| #endif // UINT64
 | |
| 
 | |
| #endif // MSVC/ANDROID
 | |
| 
 | |
| 
 | |
| #ifdef __GNUC__
 | |
| #include <inttypes.h>
 | |
| #endif
 | |
| 
 | |
| 
 | |
| namespace nd4j {
 | |
|     namespace random {
 | |
| 
 | |
| #ifdef __CUDACC__
 | |
|         class ND4J_EXPORT CudaManaged {
 | |
|         private:
 | |
| 
 | |
|         protected:
 | |
|             void *devHolder;
 | |
| 
 | |
|         public:
 | |
|             void *operator new(size_t len) {
 | |
|                 void *ptr;
 | |
|                 cudaHostAlloc(&ptr, len, cudaHostAllocDefault);
 | |
|                 return ptr;
 | |
|              }
 | |
| 
 | |
|             void operator delete(void *ptr) {
 | |
|                 cudaFreeHost(ptr);
 | |
|             }
 | |
|         };
 | |
| 
 | |
|         class ND4J_EXPORT RandomBuffer : public CudaManaged {
 | |
| #else
 | |
|         class ND4J_EXPORT RandomBuffer {
 | |
| #endif
 | |
|         private:
 | |
|             void *devHolder;
 | |
|             Nd4jLong size;
 | |
|             uint64_t *buffer;
 | |
|             uint64_t *devBuffer;
 | |
|             Nd4jLong offset;
 | |
|             Nd4jLong seed;
 | |
|             Nd4jLong position;
 | |
|             Nd4jLong generation;
 | |
|             Nd4jLong currentPosition;
 | |
|             Nd4jLong amplifier;
 | |
|             unsigned int synchronizer;
 | |
| 
 | |
| #ifdef __CUDACC__
 | |
|             curandGenerator_t gen;
 | |
| #endif
 | |
| 
 | |
|         public:
 | |
|             /**
 | |
|              * This method allocates buffer of size * sizeof(Nd4jLong)
 | |
|              *
 | |
|              * @param size
 | |
|              * @return
 | |
|              */
 | |
| #ifdef __CUDACC__
 | |
|             __host__
 | |
|             RandomBuffer(Nd4jLong seed, Nd4jLong size, uint64_t *hostBuffer, uint64_t *devBuffer) {
 | |
|                 this->buffer = hostBuffer;
 | |
|                 this->seed = seed;
 | |
|                 this->size = size;
 | |
|                 this->generation = 1;
 | |
|                 this->currentPosition = 0;
 | |
|                 this->offset = 0;
 | |
|                 this->amplifier = seed;
 | |
|                 this->synchronizer = 0;
 | |
|                 this->devBuffer = devBuffer;
 | |
| 
 | |
|                 cudaMalloc(&devHolder, sizeof(nd4j::random::RandomBuffer));
 | |
|             }
 | |
| 
 | |
|             __host__
 | |
|             Nd4jPointer getDevicePointer() {
 | |
|                 return reinterpret_cast<Nd4jPointer>(devHolder);
 | |
|             }
 | |
| 
 | |
|             __host__
 | |
|             ~RandomBuffer() {
 | |
|                 cudaFree(devHolder);
 | |
|             }
 | |
| 
 | |
|             __host__
 | |
|             void propagateToDevice(nd4j::random::RandomBuffer *buffer, cudaStream_t stream) {
 | |
|                 cudaMemcpyAsync(devHolder, buffer, sizeof(nd4j::random::RandomBuffer), cudaMemcpyHostToDevice, stream);
 | |
|             }
 | |
| 
 | |
|             __host__ __device__
 | |
| #endif
 | |
|             RandomBuffer(Nd4jLong seed, Nd4jLong size, uint64_t *buffer) {
 | |
|                 this->buffer = buffer;
 | |
|                 this->seed = seed;
 | |
|                 this->size = size;
 | |
|                 this->generation = 1;
 | |
|                 this->currentPosition = 0;
 | |
|                 this->offset = 0;
 | |
|                 this->amplifier = seed;
 | |
|                 this->synchronizer = 0;
 | |
|                 this->devBuffer = buffer;
 | |
|             }
 | |
| 
 | |
|             inline _CUDA_HD uint64_t *getBuffer() {
 | |
|                 return this->buffer;
 | |
|             }
 | |
| 
 | |
|             inline _CUDA_HD uint64_t *getDeviceBuffer() {
 | |
|                 return this->devBuffer;
 | |
|             }
 | |
| 
 | |
| #ifdef __CUDACC__
 | |
|             _CUDA_HD curandGenerator_t *getGeneratorPointer() {
 | |
|                 return &gen;
 | |
|             }
 | |
| 
 | |
|             _CUDA_HD curandGenerator_t getGenerator() {
 | |
|                 return gen;
 | |
|             }
 | |
| 
 | |
| 
 | |
|             _CUDA_H void setBuffer(uint64_t *ptr) {
 | |
|                 this->buffer = ptr;
 | |
|             }
 | |
| #endif
 | |
| 
 | |
|             inline _CUDA_HD Nd4jLong getSize() {
 | |
|                 return this->size;
 | |
|             }
 | |
| 
 | |
|             inline _CUDA_HD Nd4jLong getSeed() {
 | |
|                 return this->seed;
 | |
|             }
 | |
| 
 | |
|             void _CUDA_HD setSeed(Nd4jLong seed) {
 | |
|                 this->seed = seed;
 | |
|                 this->amplifier = seed;
 | |
|                 this->generation = 1;
 | |
|             }
 | |
| 
 | |
|             Nd4jLong _CUDA_HD getAllocatedSize() {
 | |
|                 return this->size * sizeof(double);
 | |
|             }
 | |
| 
 | |
|             inline _CUDA_HD Nd4jLong getOffset() {
 | |
|                 return this->currentPosition;
 | |
|             }
 | |
| 
 | |
|             void _CUDA_HD setOffset(Nd4jLong offset) {
 | |
|                 this->currentPosition = offset;
 | |
|             }
 | |
| 
 | |
|             void _CUDA_HD reSeed(Nd4jLong amplifier) {
 | |
|                 this->amplifier = amplifier;
 | |
|             }
 | |
| 
 | |
|             inline _CUDA_D uint64_t getElement(Nd4jLong position) {
 | |
|                 Nd4jLong actualPosition = this->getOffset() + position;
 | |
|                 Nd4jLong tempGen = generation;
 | |
|                 if (actualPosition >= this->size) {
 | |
|                     tempGen += actualPosition / this->size;
 | |
|                     actualPosition = actualPosition % this->size;
 | |
|                 }
 | |
| #ifdef __CUDACC__
 | |
| //                __syncthreads();
 | |
| 
 | |
|                 auto ret = static_cast<uint64_t>(devBuffer[actualPosition]);
 | |
| #else
 | |
|                 auto ret = static_cast<uint64_t>(buffer[actualPosition]);
 | |
| #endif
 | |
| 
 | |
|                 if (tempGen != generation)
 | |
|                     ret = safeShift(ret, tempGen);
 | |
| 
 | |
|                 if(generation > 1)
 | |
|                     ret = safeShift(ret, generation);
 | |
| 
 | |
|                 if (amplifier != seed)
 | |
|                     ret = safeShift(ret, amplifier);
 | |
| 
 | |
| #ifdef __CUDACC__
 | |
| //                __syncthreads();
 | |
| #endif
 | |
|                 if (amplifier != seed || generation > 1 || tempGen != generation)
 | |
|                     ret = next64(seedConv(static_cast<Nd4jLong>(ret)));
 | |
| 
 | |
|                 return ret;
 | |
|             }
 | |
| 
 | |
|             uint64_t _CUDA_HD next64(uint64_t shiftedSeed) {
 | |
|                 const auto s0 = static_cast<uint64_t>(shiftedSeed);
 | |
|                 auto s1 = static_cast<uint64_t>(shiftedSeed) % nd4j::DataTypeUtils::max<int>() + 11;
 | |
|                 uint64_t r0, r1;
 | |
| 
 | |
|                 s1 ^= s0;
 | |
|                 r0 = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
 | |
|                 r1 = rotl(s1, 36); // c
 | |
| 
 | |
|                 return r0 + r1;
 | |
|             }
 | |
| 
 | |
|             static _CUDA_HD inline uint64_t rotl(const uint64_t x, uint64_t k) {
 | |
|                 return (x << k) | (x >> (64 - k));
 | |
|             }
 | |
| 
 | |
|             uint64_t static _CUDA_HD inline safeShift(uint64_t x, uint64_t y) {
 | |
|                 if (y != 0 && x > nd4j::DataTypeUtils::max<uint64_t>() / y) {
 | |
|                     return x / y + 11;
 | |
|                 } else return (x * y) + 11;
 | |
|             }
 | |
| 
 | |
|             uint64_t _CUDA_HD seedConv(Nd4jLong seed) {
 | |
|                 uint64_t x = static_cast<uint64_t>(seed);
 | |
|                 uint64_t z = (x += UINT64_C(0x9E3779B97F4A7C15));
 | |
|                 z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
 | |
|                 z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
 | |
|                 return z ^ (z >> 31);
 | |
|             }
 | |
| 
 | |
|             void _CUDA_HD incrementGeneration() {
 | |
|                 this->generation++;
 | |
|             }
 | |
| 
 | |
|             Nd4jLong _CUDA_HD getNextIndex() {
 | |
|                 currentPosition++;
 | |
|                 if (currentPosition >= size) {
 | |
|                     currentPosition = 0;
 | |
|                     generation++;
 | |
|                 }
 | |
|                 Nd4jLong ret = currentPosition;
 | |
| 
 | |
|                 return ret;
 | |
|             }
 | |
| 
 | |
|             uint64_t _CUDA_HD getNextElement() {
 | |
|                 // TODO: proper implementation needed here
 | |
|                 return generation == 1 ? buffer[getNextIndex()] : buffer[getNextIndex()]  * generation;
 | |
|             }
 | |
| 
 | |
| 
 | |
|             /**
 | |
|              * This method skips X elements from buffer
 | |
|              *
 | |
|              * @param numberOfElements number of elements to skip
 | |
|              */
 | |
| #ifdef __CUDACC__
 | |
|             __device__
 | |
|             void rewind(Nd4jLong numberOfElements) {
 | |
|                 if (gridDim.x > 1) {
 | |
|                     __shared__ bool amLast;
 | |
| 
 | |
|                     if (threadIdx.x == 0) {
 | |
| 						unsigned int ticket = atomicInc(&synchronizer, gridDim.x);
 | |
| 						amLast = (ticket == gridDim.x - 1);
 | |
| 					}
 | |
| 					__syncthreads();
 | |
| 
 | |
| 					if (amLast) {
 | |
| 					    if (threadIdx.x == 0) {
 | |
| 					        synchronizer = 0;
 | |
| 
 | |
| 					        Nd4jLong newPos = this->getOffset() + numberOfElements;
 | |
|                             if (newPos > this->getSize()) {
 | |
|                                 generation += newPos / this->size;
 | |
|                                 newPos = newPos % this->size;
 | |
|                             } else if (newPos == this->getSize()) {
 | |
|                                 newPos = 0;
 | |
|                                 generation++;
 | |
|                             }
 | |
| 
 | |
|                             this->setOffset(newPos);
 | |
| 					    }
 | |
| 					}
 | |
|                 } else {
 | |
|                     if (threadIdx.x == 0) {
 | |
|                         Nd4jLong newPos = this->getOffset() + numberOfElements;
 | |
|                         if (newPos > this->getSize()) {
 | |
|                             generation += newPos / this->size;
 | |
|                             newPos = newPos % this->size;
 | |
|                         } else if (newPos == this->getSize()) {
 | |
|                             generation++;
 | |
|                             newPos = 0;
 | |
|                         }
 | |
| 
 | |
|                         this->setOffset(newPos);
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
| #endif
 | |
|             void rewindH(Nd4jLong numberOfElements) {
 | |
|                 Nd4jLong newPos = this->getOffset() + numberOfElements;
 | |
|                 if (newPos > this->getSize()) {
 | |
|                     generation += newPos / this->size;
 | |
|                     newPos = newPos % this->size;
 | |
|                 }
 | |
|                 else if (newPos == this->getSize()) {
 | |
|                     generation++;
 | |
|                     newPos = 0;
 | |
|                 }
 | |
| 
 | |
|                 this->setOffset(newPos);
 | |
|             }
 | |
| 
 | |
|             /**
 | |
|             * This method returns random int in range [0..MAX_INT]
 | |
|             * @return
 | |
|             */
 | |
|             int _CUDA_D nextInt() {
 | |
|                 auto u = nextUInt64();
 | |
|                 return u <= nd4j::DataTypeUtils::max<int>() ? static_cast<int>(u) : static_cast<int>(u % nd4j::DataTypeUtils::max<int>());
 | |
|             };
 | |
| 
 | |
|             uint64_t _CUDA_D nextUInt64() {
 | |
|                 return getNextElement();
 | |
|             }
 | |
| 
 | |
|             /**
 | |
|              * This method returns random int in range [0..to]
 | |
|              * @param to
 | |
|              * @return
 | |
|              */
 | |
|             int _CUDA_D nextInt(int to) {
 | |
|                 int r = nextInt();
 | |
|                 int m = to - 1;
 | |
|                 if ((to & m) == 0)  // i.e., bound is a power of 2
 | |
|                     r = ((to * (Nd4jLong) r) >> 31);
 | |
|                 else {
 | |
|                     for (int u = r;
 | |
|                          u - (r = u % to) + m < 0;
 | |
|                          u = nextInt());
 | |
|                 }
 | |
|                 return r;
 | |
|             };
 | |
| 
 | |
|             /**
 | |
|              * This method returns random int in range [from..to]
 | |
|              * @param from
 | |
|              * @param to
 | |
|              * @return
 | |
|              */
 | |
|             int _CUDA_D nextInt(int from, int to) {
 | |
|                 if (from == 0)
 | |
|                     return nextInt(to);
 | |
| 
 | |
|                 return from + nextInt(to - from);
 | |
|             };
 | |
| 
 | |
| 
 | |
|             /**
 | |
|              * This method returns random T in range of [0..1]
 | |
|              * @return
 | |
|              */
 | |
|             template<typename T>
 | |
|             _CUDA_D T nextT() {
 | |
|                 auto u = static_cast<float>(nextUInt64());
 | |
|                 auto m = static_cast<float>(nd4j::DataTypeUtils::max<uint64_t>());
 | |
|                 return static_cast<T>(u / m);
 | |
|             }
 | |
| 
 | |
|             /**
 | |
|              * This method returns random T in range of [0..to]
 | |
|              * @param to
 | |
|              * @return
 | |
|              */
 | |
|             template<typename T>
 | |
|             _CUDA_D T nextT(T to) {
 | |
|                 if (to == static_cast<T>(1.0f))
 | |
|                     return nextT<T>();
 | |
| 
 | |
|                 return nextT<T>(static_cast<T>(0.0f), to);
 | |
|             }
 | |
| 
 | |
|             /**
 | |
|              * This method returns random T in range [from..to]
 | |
|              * @param from
 | |
|              * @param to
 | |
|              * @return
 | |
|              */
 | |
|             template<typename T>
 | |
|             _CUDA_D T inline nextT(T from, T to) {
 | |
|                 return from + (nextT<T>() * (to - from));
 | |
|             }
 | |
| 
 | |
|             inline _CUDA_D uint64_t relativeUInt64(Nd4jLong index) {
 | |
|                 return getElement(index);
 | |
|             }
 | |
| 
 | |
|             /**
 | |
|              *  relative methods are made as workaround for lock-free concurrent execution
 | |
|              */
 | |
|             inline int _CUDA_D relativeInt(Nd4jLong index) {
 | |
|                 auto u = relativeUInt64(index);
 | |
|                 return u <= nd4j::DataTypeUtils::max<int>() ? static_cast<int>(u) : static_cast<int>(u % nd4j::DataTypeUtils::max<int>());
 | |
|             }
 | |
| 
 | |
|             /**
 | |
|              * This method returns random int within [0..to]
 | |
|              *
 | |
|              * @param index
 | |
|              * @param to
 | |
|              * @return
 | |
|              */
 | |
|             inline int _CUDA_D relativeInt(Nd4jLong index, int to) {
 | |
|                 auto rel = relativeInt(index);
 | |
|                 return rel % to;
 | |
|             }
 | |
| 
 | |
|             /**
 | |
|              * This method returns random int within [from..to]
 | |
|              *
 | |
|              * @param index
 | |
|              * @param to
 | |
|              * @param from
 | |
|              * @return
 | |
|              */
 | |
|             inline _CUDA_D int relativeInt(Nd4jLong index, int from, int to) {
 | |
|                 if (from == 0)
 | |
|                     return relativeInt(index, to);
 | |
| 
 | |
|                 return from + relativeInt(index, to - from);
 | |
|             }
 | |
| 
 | |
|             /**
 | |
|              * This method returns random T within [0..1]
 | |
|              *
 | |
|              * @param index
 | |
|              * @return
 | |
|              */
 | |
|             template <typename T>
 | |
|             inline _CUDA_D T relativeT(Nd4jLong index) {
 | |
|                 /**
 | |
|                  * Basically we just get float u/m value, and convert into to
 | |
|                  *
 | |
|                  * FIXME: once we add support for additional datatypes this code must be tweaked
 | |
|                  */
 | |
|                 auto u = static_cast<float>(relativeUInt64(index));
 | |
|                 auto m = static_cast<float> (nd4j::DataTypeUtils::max<uint64_t>());
 | |
|                 return static_cast<T>(u / m);
 | |
|             }
 | |
| 
 | |
| /**
 | |
|  * This method returns random T within [0..to]
 | |
|  *
 | |
|  * @param index
 | |
|  * @param to
 | |
|  * @return
 | |
|  */
 | |
| 
 | |
|             template<typename T>
 | |
|             _CUDA_D T relativeT(Nd4jLong index, T to) {
 | |
|                 if (to == static_cast<T>(1.0f))
 | |
|                     return relativeT<T>(index);
 | |
| 
 | |
|                 return relativeT<T>(index, static_cast<T>(0.0f), to);
 | |
|             }
 | |
| 
 | |
| /**
 | |
|  * This method returns random T within [from..to]
 | |
|  *
 | |
|  * @param index
 | |
|  * @param from
 | |
|  * @param to
 | |
|  * @return
 | |
|  */
 | |
|             template<typename T>
 | |
|             _CUDA_D T relativeT(Nd4jLong index, T from, T to) {
 | |
|                 return from + (relativeT<T>(index) * (to - from));
 | |
|             }
 | |
| 
 | |
|         };
 | |
| 
 | |
|         class ND4J_EXPORT IGenerator {
 | |
|         protected:
 | |
|             Nd4jLong limit;
 | |
|             Nd4jLong seed;
 | |
|             uint64_t *buffer;
 | |
|             nd4j::random::RandomBuffer *realBuffer;
 | |
| 
 | |
|         public:
 | |
| 
 | |
|             _CUDA_HD IGenerator(nd4j::random::RandomBuffer *buffer) {
 | |
|                 this->limit = buffer->getSize();
 | |
|                 this->buffer = reinterpret_cast<uint64_t *>(buffer->getBuffer());
 | |
|                 this->realBuffer = buffer;
 | |
|                 this->seed = buffer->getSeed();
 | |
|             }
 | |
| 
 | |
| 
 | |
|             _CUDA_HD RandomBuffer *getBuffer() {
 | |
|                 return realBuffer;
 | |
|             }
 | |
| 
 | |
|             _CUDA_HD void setOffset(Nd4jLong offset) {
 | |
|                 this->realBuffer->setOffset(offset);
 | |
|             }
 | |
| 
 | |
|             _CUDA_HD Nd4jLong getElementAbsolute(Nd4jLong position) {
 | |
|                 return buffer[position];
 | |
|             }
 | |
| 
 | |
|             _CUDA_HD Nd4jLong getElementRelative(Nd4jLong position) {
 | |
|                 return buffer[realBuffer->getOffset() + position];
 | |
|             }
 | |
| 
 | |
|             virtual _CUDA_HD void refreshBuffer() = 0;
 | |
|         };
 | |
| 
 | |
| 
 | |
| 
 | |
|         class ND4J_EXPORT Xoroshiro128 : public IGenerator {
 | |
|         protected:
 | |
|             uint64_t state[2];
 | |
| 
 | |
|             static inline _CUDA_HD uint64_t rotl(const uint64_t x, int k) {
 | |
|                 return (x << k) | (x >> (64 - k));
 | |
|             }
 | |
| 
 | |
|             /**
 | |
|              * This method returns 64 random bits
 | |
|              * @return
 | |
|              */
 | |
|             uint64_t _CUDA_HD next64() {
 | |
|                 const uint64_t s0 = state[0];
 | |
|                 uint64_t s1 = state[1];
 | |
|                 const uint64_t result = s0 + s1;
 | |
| 
 | |
|                 s1 ^= s0;
 | |
|                 state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
 | |
|                 state[1] = rotl(s1, 36); // c
 | |
| 
 | |
|                 return result;
 | |
|             }
 | |
| 
 | |
|             uint64_t _CUDA_HD seedConv(Nd4jLong seed) {
 | |
|                 uint64_t x = static_cast<uint64_t>(seed);
 | |
|                 uint64_t z = (x += UINT64_C(0x9E3779B97F4A7C15));
 | |
|                 z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
 | |
|                 z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
 | |
|                 return z ^ (z >> 31);
 | |
|             }
 | |
| 
 | |
|             void _CUDA_H jump(void) {
 | |
|                 static const uint64_t JUMP[] = { 0xbeac0467eba5facb, 0xd86b048b86aa9922 };
 | |
| 
 | |
|                 uint64_t s0 = 0;
 | |
|                 uint64_t s1 = 0;
 | |
|                 for(unsigned int i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
 | |
|                     for(int b = 0; b < 64; b++) {
 | |
|                         if (JUMP[i] & 1ULL << b) {
 | |
|                             s0 ^= state[0];
 | |
|                             s1 ^= state[1];
 | |
|                         }
 | |
|                         next64();
 | |
|                     }
 | |
| 
 | |
|                 state[0] = s0;
 | |
|                 state[1] = s1;
 | |
|             }
 | |
| 
 | |
|         public:
 | |
|             _CUDA_HD Xoroshiro128(nd4j::random::RandomBuffer *buffer) : IGenerator(buffer) {
 | |
|                 //
 | |
|             }
 | |
| 
 | |
|             _CUDA_HD void refreshBuffer() {
 | |
|                 state[0] = seedConv(this->seed);
 | |
|                 state[1] = seedConv(this->seed * 119 + 3);
 | |
| 
 | |
|                 int fd = 3 + 3;
 | |
| 
 | |
|                 for (Nd4jLong i = 0; i < limit; i++) {
 | |
|                     buffer[i] = next64();
 | |
|                 }
 | |
|             }
 | |
|         };
 | |
|     }
 | |
| }
 | |
| #endif //LIBND4J_HELPER_GENERATOR_H
 |