From 966642c1c9b0fd60ef98b8d7a0f6804ca45d7794 Mon Sep 17 00:00:00 2001 From: raver119 Date: Sat, 30 May 2020 21:13:33 +0300 Subject: [PATCH] Rng tweaks (#479) * initial commit Signed-off-by: raver119@gmail.com * Java Random.getFloat()/getDouble() methods mapped to C++ Signed-off-by: raver119@gmail.com * Refactored relativeT for float and double data types. Signed-off-by: shugeo * Refactored float relativeT method. Signed-off-by: shugeo * Refactored relativeT Signed-off-by: shugeo * - additional rng tests - float/double uniform generation methos slightly changed Signed-off-by: raver119@gmail.com * use bitset instead of manual conversion Signed-off-by: raver119@gmail.com * rollback valueBits changes Signed-off-by: raver119@gmail.com * remove unused shapelist Signed-off-by: raver119@gmail.com * update KMeans ground truth test Signed-off-by: raver119@gmail.com * dedicated union to make MSVC happy Signed-off-by: raver119 * minor tweaks Signed-off-by: raver119 * .seh_savexmm workaround? Signed-off-by: raver119 * don't use march=native in tests on windows Signed-off-by: raver119 Co-authored-by: shugeo --- .../clustering/kmeans/KMeansTest.java | 8 +- libnd4j/include/array/NDArray.hXX | 6 +- libnd4j/include/graph/RandomGenerator.h | 103 ++++++++++-------- libnd4j/include/helpers/StringUtils.h | 11 ++ libnd4j/include/helpers/impl/BitwiseUtils.cpp | 22 ++-- libnd4j/include/helpers/impl/StringUtils.cpp | 26 +++++ libnd4j/include/legacy/NativeOps.h | 2 + libnd4j/include/legacy/cpu/NativeOps.cpp | 8 ++ libnd4j/include/legacy/cuda/NativeOps.cu | 8 ++ .../generic/images/resize_images.cpp | 1 - libnd4j/include/types/u32.h | 40 +++++++ libnd4j/tests_cpu/layers_tests/CMakeLists.txt | 4 +- libnd4j/tests_cpu/layers_tests/RNGTests.cpp | 103 +++++++++++++++++- .../tests_cpu/layers_tests/StringTests.cpp | 12 ++ .../java/org/nd4j/nativeblas/NativeOps.java | 2 + .../main/java/org/nd4j/rng/NativeRandom.java | 8 +- .../linalg/jcublas/rng/CudaNativeRandom.java | 10 ++ .../java/org/nd4j/nativeblas/Nd4jCuda.java | 13 ++- .../cpu/nativecpu/rng/CpuNativeRandom.java | 10 ++ .../java/org/nd4j/nativeblas/Nd4jCpu.java | 13 ++- .../java/org/nd4j/linalg/rng/RandomTests.java | 22 ++++ 21 files changed, 350 insertions(+), 82 deletions(-) create mode 100644 libnd4j/include/types/u32.h diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java index abbfa04bc..e01274a71 100644 --- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java +++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/test/java/org/deeplearning4j/clustering/kmeans/KMeansTest.java @@ -273,10 +273,10 @@ public class KMeansTest extends BaseDL4JTest { ClusterSet clusterSet = kMeansClustering.applyTo(points); double[] centroid1 = {2.44e8, 2.71e8, 2.98e8, 3.25e8}; - double[] centroid2 = {5.14e8, 5.41e8, 5.68e8, 5.95e8}; - double[] centroid3 = {1000000.0, 2.8E7, 5.5E7, 8.2E7}; - double[] centroid4 = {7.03E8, 7.3E8, 7.57E8, 7.84E8}; - double[] centroid5 = {3.79E8, 4.06E8, 4.33E8, 4.6E8}; + double[] centroid2 = {1000000.0, 2.8E7, 5.5E7, 8.2E7}; + double[] centroid3 = {5.95E8, 6.22e8, 6.49e8, 6.76e8}; + double[] centroid4 = {3.79E8, 4.06E8, 4.33E8, 4.6E8}; + double[] centroid5 = {5.5E7, 8.2E7, 1.09E8, 1.36E8}; assertArrayEquals(centroid1, clusterSet.getClusters().get(0).getCenter().getArray().toDoubleVector(), 1e-4); assertArrayEquals(centroid2, clusterSet.getClusters().get(1).getCenter().getArray().toDoubleVector(), 1e-4); diff --git a/libnd4j/include/array/NDArray.hXX b/libnd4j/include/array/NDArray.hXX index 773d845ab..9e48b05de 100644 --- a/libnd4j/include/array/NDArray.hXX +++ b/libnd4j/include/array/NDArray.hXX @@ -1671,11 +1671,11 @@ void NDArray::printLinearBuffer() const { } else if(this->dataType() == sd::DataType::FLOAT32) { for(Nd4jLong e = 0; e < len; e++) - printf("%.3f, ", this->bufferAsT()[e * ews]); + printf("%.8f, ", this->bufferAsT()[e * ews]); } else if(this->dataType() == sd::DataType::DOUBLE) { for(Nd4jLong e = 0; e < len; e++) - printf("%.3f, ", this->bufferAsT()[e * ews]); + printf("%.8f, ", this->bufferAsT()[e * ews]); } else throw std::invalid_argument("NDArray::printLinearBuffer: not implemented yet for this data type !"); @@ -1773,7 +1773,7 @@ void NDArray::printIndexedBuffer(const char* msg, Nd4jLong limit) const { if (this->isZ()) printf("%lld\n", this->e(0)); else if (this->isR()) - printf("%f\n", this->e(0)); + printf("%.8f\n", this->e(0)); else if (this->isB()) { printf("%s\n", this->e(0)?"true":"false"); } diff --git a/libnd4j/include/graph/RandomGenerator.h b/libnd4j/include/graph/RandomGenerator.h index ef06c345d..407993a09 100644 --- a/libnd4j/include/graph/RandomGenerator.h +++ b/libnd4j/include/graph/RandomGenerator.h @@ -22,6 +22,7 @@ #define LIBND4J_GRAPH_RNG_H #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #ifdef __CUDACC__ #include @@ -79,9 +81,9 @@ namespace sd { */ static FORCEINLINE Nd4jLong currentMilliseconds(); - - FORCEINLINE _CUDA_HD uint32_t xoroshiro32(Nd4jLong index); - FORCEINLINE _CUDA_HD uint64_t xoroshiro64(Nd4jLong index); + public: + FORCEINLINE _CUDA_HD uint32_t xoroshiro32(uint64_t index); + FORCEINLINE _CUDA_HD uint64_t xoroshiro64(uint64_t index); /** * This method returns integer value between 0 and MAX_UINT @@ -119,7 +121,7 @@ namespace sd { FORCEINLINE _CUDA_HD int relativeInt(Nd4jLong index); FORCEINLINE _CUDA_HD Nd4jLong relativeLong(Nd4jLong index); - FORCEINLINE _CUDA_HD void rewindH(Nd4jLong steps); + FORCEINLINE _CUDA_HD void rewindH(uint64_t steps); /** * These methods set up only node states, with non-changed root ones @@ -172,6 +174,24 @@ namespace sd { return v; } + template <> + _CUDA_HD FORCEINLINE float RandomGenerator::relativeT(Nd4jLong index) { + u32 u; + u._u32 = (0x3f800000 | (this->xoroshiro32(index) >> 9)); + return u._f32 - 1.0f; + } + + template <> + _CUDA_HD FORCEINLINE double RandomGenerator::relativeT(Nd4jLong index) { +#ifdef __DOUBLE_RNG__ + u64 u; + u._ulong = ((UINT64_C(0x3FF) << 52) | (this->xoroshiro64(index) >> 12)); + return u._double - 1.0; +#else + return (double) relativeT(index); +#endif + } + template <> _CUDA_HD FORCEINLINE uint64_t RandomGenerator::relativeT(Nd4jLong index) { return this->xoroshiro64(index); @@ -184,16 +204,14 @@ namespace sd { template <> _CUDA_HD FORCEINLINE int RandomGenerator::relativeT(Nd4jLong index) { - auto x = this->relativeT(index); - auto r = static_cast(x % DataTypeUtils::max()); - return r; + auto r = relativeT(index); + return r <= DataTypeUtils::max() ? r : r % DataTypeUtils::max(); } template <> _CUDA_HD FORCEINLINE Nd4jLong RandomGenerator::relativeT(Nd4jLong index) { - auto x = this->relativeT(index); - auto r = static_cast(x % DataTypeUtils::max()); - return r; + auto r = relativeT(index); + return r <= DataTypeUtils::max() ? r : r % DataTypeUtils::max(); } template @@ -220,24 +238,18 @@ namespace sd { template _CUDA_HD FORCEINLINE T RandomGenerator::relativeT(Nd4jLong index) { // This is default implementation for floating point types -#ifdef __DOUBLE_RNG__ - auto i = static_cast(this->relativeT(index)); - auto r = i / static_cast(DataTypeUtils::max()); - return static_cast(r); -#else - auto i = static_cast(this->relativeT(index)); - auto r = i / static_cast(DataTypeUtils::max()); - return static_cast(r); -#endif + return static_cast(relativeT(index)); } _CUDA_HD FORCEINLINE int RandomGenerator::relativeInt(Nd4jLong index) { - return relativeT(index); + auto r = relativeT(index); + return r <= DataTypeUtils::max() ? r : r % DataTypeUtils::max(); } _CUDA_HD FORCEINLINE Nd4jLong RandomGenerator::relativeLong(Nd4jLong index) { - return relativeT(index); + auto r = relativeT(index); + return r <= DataTypeUtils::max() ? r : r % DataTypeUtils::max(); } ////// @@ -249,23 +261,12 @@ namespace sd { return (x << k) | (x >> (64 - k)); } - _CUDA_HD FORCEINLINE uint32_t RandomGenerator::xoroshiro32(Nd4jLong index) { - - auto s0 = _rootState._ulong; - auto s1 = _nodeState._ulong; - - // xor by idx - s0 |= ((index + 2) * (s1 + 24243287)); - s1 ^= ((index + 2) * (s0 + 723829)); - - unsigned long val = 0; - val = s1 ^ s0; - int* pHalf = reinterpret_cast(&val); - - return rotl(*pHalf * 0x9E3779BB, 5) * 5; + static FORCEINLINE _CUDA_HD uint32_t next(uint32_t s0, uint32_t s1, uint32_t s2, uint32_t s3) { + const uint32_t result = rotl(s0 + s3, 7) + s0; + return result; } - _CUDA_HD FORCEINLINE uint64_t RandomGenerator::xoroshiro64(Nd4jLong index) { + _CUDA_HD FORCEINLINE uint32_t RandomGenerator::xoroshiro32(uint64_t index) { auto s0 = _rootState._ulong; auto s1 = _nodeState._ulong; @@ -273,23 +274,29 @@ namespace sd { s0 |= ((index + 2) * (s1 + 24243287)); s1 ^= ((index + 2) * (s0 + 723829)); - // since we're not modifying state - do rotl step right here - s1 ^= s0; - s0 = rotl(s0, 55) ^ s1 ^ (s1 << 14); - s1 = rotl(s1, 36); + unsigned long val = 0; + val = s1 ^ s0; + int* pHalf = reinterpret_cast(&val); - return s0 + s1; + return rotl(*pHalf * 0x9E3779BB, 5) * 5; } - _CUDA_HD FORCEINLINE void RandomGenerator::rewindH(Nd4jLong steps) { - auto s0 = _nodeState._du32._v0; - auto s1 = _nodeState._du32._v1; + _CUDA_HD FORCEINLINE uint64_t RandomGenerator::xoroshiro64(uint64_t index) { + uint64_t upper = ((uint64_t) xoroshiro32(index)) << 32; + uint32_t lower = xoroshiro32(sd::math::nd4j_rotl(index, 32)); + return upper + lower; + } - s1 ^= s0; - _nodeState._du32._v0 = rotl(s0, 26) ^ s1 ^ (s1 << 9); // a, b - _nodeState._du32._v1 = rotl(s1, 13); // c + _CUDA_HD FORCEINLINE void RandomGenerator::rewindH(uint64_t steps) { + // we only update node state, if any + auto s0 = _nodeState._du32._v0; + auto s1 = _nodeState._du32._v1; - _nodeState._long ^= (steps ^ 0xdeadbeef); + s1 ^= s0; + _nodeState._du32._v0 = rotl(s0, 26) ^ s1 ^ (s1 << 9); // a, b + _nodeState._du32._v1 = rotl(s1, 13); // c + + _nodeState._long ^= (steps ^ 0xdeadbeef); } } } diff --git a/libnd4j/include/helpers/StringUtils.h b/libnd4j/include/helpers/StringUtils.h index ef9586637..e5f9f2990 100644 --- a/libnd4j/include/helpers/StringUtils.h +++ b/libnd4j/include/helpers/StringUtils.h @@ -44,6 +44,14 @@ namespace sd { return os.str(); } + /** + * These methods convert integer values to string with 0s and 1s + * @param value + * @return + */ + template + static std::string bitsToString(T value); + /** * This method just concatenates error message with a given graphId * @param message @@ -137,6 +145,9 @@ namespace sd { * @return boolean status */ static bool u32StringToU8String(const std::u32string& u32, std::string& u8); + + template + static std::string vectorToString(const std::vector &vec); }; } diff --git a/libnd4j/include/helpers/impl/BitwiseUtils.cpp b/libnd4j/include/helpers/impl/BitwiseUtils.cpp index e3f4ce92a..9bd3fa8cf 100644 --- a/libnd4j/include/helpers/impl/BitwiseUtils.cpp +++ b/libnd4j/include/helpers/impl/BitwiseUtils.cpp @@ -49,31 +49,29 @@ namespace sd { return -1; } - std::vector BitwiseUtils::valueBits(int holder) { std::vector bits; if (holder == 0) { - for (int e = 0; e < 32; e++) - bits.emplace_back(0); + for (int e = 0; e < 32; e++) + bits.emplace_back(0); - return bits; + return bits; } - #ifdef REVERSE_BITS for (int e = 32; e >= 0; e--) { #else for (int e = 0; e < 32; e++) { #endif - bool isOne = (holder & 1 << e) != 0; + bool isOne = (holder & 1 << e) != 0; - if (isOne) - bits.emplace_back(1); - else - bits.emplace_back(0); - } + if (isOne) + bits.emplace_back(1); + else + bits.emplace_back(0); + } - return bits; + return bits; } sd::ByteOrder BitwiseUtils::asByteOrder() { diff --git a/libnd4j/include/helpers/impl/StringUtils.cpp b/libnd4j/include/helpers/impl/StringUtils.cpp index 5ac2fd8cc..757def763 100644 --- a/libnd4j/include/helpers/impl/StringUtils.cpp +++ b/libnd4j/include/helpers/impl/StringUtils.cpp @@ -21,7 +21,9 @@ // #include +#include #include +#include namespace sd { static FORCEINLINE bool match(const uint8_t *haystack, const uint8_t *needle, uint64_t length) { @@ -32,6 +34,17 @@ namespace sd { return true; } + template + std::string StringUtils::bitsToString(T value) { + return std::bitset(value).to_string(); + } + +template std::string StringUtils::bitsToString(int value); +template std::string StringUtils::bitsToString(uint32_t value); +template std::string StringUtils::bitsToString(Nd4jLong value); +template std::string StringUtils::bitsToString(uint64_t value); + + uint64_t StringUtils::countSubarrays(const void *vhaystack, uint64_t haystackLength, const void *vneedle, uint64_t needleLength) { auto haystack = reinterpret_cast(vhaystack); auto needle = reinterpret_cast(vneedle); @@ -155,4 +168,17 @@ namespace sd { return true; } + template + std::string StringUtils::vectorToString(const std::vector &vec) { + std::string result; + for (auto v:vec) + result += valueToString(v); + + return result; + } + + template std::string StringUtils::vectorToString(const std::vector &vec); + template std::string StringUtils::vectorToString(const std::vector &vec); + template std::string StringUtils::vectorToString(const std::vector &vec); + template std::string StringUtils::vectorToString(const std::vector &vec); } diff --git a/libnd4j/include/legacy/NativeOps.h b/libnd4j/include/legacy/NativeOps.h index 17affd1c3..c72b0d535 100755 --- a/libnd4j/include/legacy/NativeOps.h +++ b/libnd4j/include/legacy/NativeOps.h @@ -1606,6 +1606,8 @@ ND4J_EXPORT OpaqueRandomGenerator* createRandomGenerator(Nd4jLong rootSeed = 0, ND4J_EXPORT Nd4jLong getRandomGeneratorRootState(OpaqueRandomGenerator* ptr); ND4J_EXPORT Nd4jLong getRandomGeneratorNodeState(OpaqueRandomGenerator* ptr); ND4J_EXPORT void setRandomGeneratorStates(OpaqueRandomGenerator* ptr, Nd4jLong rootSeed = 0, Nd4jLong nodeSeed = 0); +ND4J_EXPORT float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator* ptr, Nd4jLong index); +ND4J_EXPORT double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator* ptr, Nd4jLong index); ND4J_EXPORT int getRandomGeneratorRelativeInt(OpaqueRandomGenerator* ptr, Nd4jLong index); ND4J_EXPORT Nd4jLong getRandomGeneratorRelativeLong(OpaqueRandomGenerator* ptr, Nd4jLong index); ND4J_EXPORT void deleteRandomGenerator(OpaqueRandomGenerator* ptr); diff --git a/libnd4j/include/legacy/cpu/NativeOps.cpp b/libnd4j/include/legacy/cpu/NativeOps.cpp index 799351ccc..ae8a22a6a 100644 --- a/libnd4j/include/legacy/cpu/NativeOps.cpp +++ b/libnd4j/include/legacy/cpu/NativeOps.cpp @@ -2832,6 +2832,14 @@ void setRandomGeneratorStates(sd::graph::RandomGenerator* ptr, Nd4jLong rootSeed ptr->setStates(rootSeed, nodeSeed); } +float getRandomGeneratorRelativeFloat(sd::graph::RandomGenerator* ptr, Nd4jLong index) { + return ptr->relativeT(index); +} + +double getRandomGeneratorRelativeDouble(sd::graph::RandomGenerator* ptr, Nd4jLong index) { + return ptr->relativeT(index); +} + int getRandomGeneratorRelativeInt(sd::graph::RandomGenerator* ptr, Nd4jLong index) { return ptr->relativeInt(index); } diff --git a/libnd4j/include/legacy/cuda/NativeOps.cu b/libnd4j/include/legacy/cuda/NativeOps.cu index 8be9b3bfd..465029207 100755 --- a/libnd4j/include/legacy/cuda/NativeOps.cu +++ b/libnd4j/include/legacy/cuda/NativeOps.cu @@ -3515,6 +3515,14 @@ void setRandomGeneratorStates(sd::graph::RandomGenerator* ptr, Nd4jLong rootSeed ptr->setStates(rootSeed, nodeSeed); } +float getRandomGeneratorRelativeFloat(sd::graph::RandomGenerator* ptr, Nd4jLong index) { + return ptr->relativeT(index); +} + +double getRandomGeneratorRelativeDouble(sd::graph::RandomGenerator* ptr, Nd4jLong index) { + return ptr->relativeT(index); +} + int getRandomGeneratorRelativeInt(sd::graph::RandomGenerator* ptr, Nd4jLong index) { return ptr->relativeInt(index); } diff --git a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp index c3f9ae8f1..18d048450 100644 --- a/libnd4j/include/ops/declarable/generic/images/resize_images.cpp +++ b/libnd4j/include/ops/declarable/generic/images/resize_images.cpp @@ -81,7 +81,6 @@ namespace sd { } DECLARE_SHAPE_FN(resize_images) { - auto shapeList = SHAPELIST(); auto in = inputShape->at(0); Nd4jLong* outputShape; diff --git a/libnd4j/include/types/u32.h b/libnd4j/include/types/u32.h new file mode 100644 index 000000000..115b207cb --- /dev/null +++ b/libnd4j/include/types/u32.h @@ -0,0 +1,40 @@ +/******************************************************************************* + * Copyright (c) 2020 Konduit K.K. + * + * This program and the accompanying materials are made available under the + * terms of the Apache License, Version 2.0 which is available at + * https://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + * + * SPDX-License-Identifier: Apache-2.0 + ******************************************************************************/ + +// +// @author raver119@protonmail.com +// +#ifndef SD_U32_H +#define SD_U32_H + +#include +#include + + +namespace sd { + union u32 { + bool _bool; + int8_t _s8; + uint8_t _u8; + int16_t _s16; + uint16_t _u16; + int32_t _s32; + uint32_t _u32; + float _f32; + }; +} + +#endif \ No newline at end of file diff --git a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt index 5ae202542..563bf58f6 100644 --- a/libnd4j/tests_cpu/layers_tests/CMakeLists.txt +++ b/libnd4j/tests_cpu/layers_tests/CMakeLists.txt @@ -45,7 +45,7 @@ if (APPLE) set(CMAKE_CXX_FLAGS " -fPIC -D__APPLE_OS__=true") elseif(WIN32) if (SD_CPU) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -march=native -mtune=native -O3") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx -mavx2 -O3") endif() if (SD_CPU AND LINUX) @@ -61,7 +61,7 @@ else() endif() if (SD_CPU AND SD_SANITIZE) - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} -fsanitize=address") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address") else() # CUDA? endif() diff --git a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp index 37facc43c..469cc77be 100644 --- a/libnd4j/tests_cpu/layers_tests/RNGTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/RNGTests.cpp @@ -49,8 +49,8 @@ public: //_bufferB = new Nd4jLong[100000]; //_rngA = (sd::random::RandomBuffer *) initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferA); //_rngB = (sd::random::RandomBuffer *) initRandom(nullptr, _seed, 100000, (Nd4jPointer) _bufferB); - _rngA.setStates(_seed, _seed); - _rngB.setStates(_seed, _seed); + _rngA.setStates(_seed * 0xDEADBEEF * 13, _seed * 0xDEADBEEF * 7); + _rngB.setStates(_seed * 0xDEADBEEF * 13, _seed * 0xDEADBEEF * 7); nexp0->assign(-1.0f); nexp1->assign(-2.0f); nexp2->assign(-3.0f); @@ -204,6 +204,9 @@ TEST_F(RNGTests, Test_Uniform_1) { RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngA, &x0, 1.0f, 2.0f); RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngB, &x1, 1.0f, 2.0f); + x0.printLinearBuffer(); + x1.printLinearBuffer(); + ASSERT_TRUE(x0.equalsTo(&x1)); ASSERT_FALSE(x0.equalsTo(nexp0)); @@ -212,10 +215,82 @@ TEST_F(RNGTests, Test_Uniform_1) { for (int e = 0; e < x0.lengthOf(); e++) { float v = x0.e(e); + nd4j_printf("%f\n", v); ASSERT_TRUE(v >= 1.0f && v <= 2.0f); } } +TEST_F(RNGTests, Test_Uniform_10) { + auto x = NDArrayFactory::create('c', {10000, 10000}); + auto z = NDArrayFactory::create(0.0f); + + RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngA, &x, 0.0f, 1.0f); + + sd::ops::reduce_max op; + auto status = op.execute({&x}, {&z}); + ASSERT_EQ(Status::OK(), status); + + ASSERT_LT(z.t(0), 1.0f); +} + +TEST_F(RNGTests, Test_Uniform_10_double) { + auto x = NDArrayFactory::create('c', {10000, 10000}); + auto z = NDArrayFactory::create(0.0f); + + RandomLauncher::fillUniform(LaunchContext::defaultContext(), _rngA, &x, 0.0f, 1.0f); + + sd::ops::reduce_max op; + auto status = op.execute({&x}, {&z}); + ASSERT_EQ(Status::OK(), status); + + ASSERT_LT(z.t(0), 1.0); +} + +TEST_F(RNGTests, Test_Uniform_11) { + uint32_t max = 0; + for (int e = 0; e < 100000000; e++) { + auto v = _rngA.xoroshiro32(e) >> 8; + if (v > max) + max = v; + } + + nd4j_printf("Max value: %i\n", (int) max); +} + +TEST_F(RNGTests, Test_Uniform_12) { + float max = -std::numeric_limits::infinity(); + float min = std::numeric_limits::infinity(); + for (int e = 0; e < 100000000; e++) { + auto v = _rngA.relativeT(e); + if (v > max) + max = v; + + if (v < min) + min = v; + } + + nd4j_printf("Max value: %.8f; Min value: %.8f\n", (float) max, (float) min); + ASSERT_LT(max, 1.0f); + ASSERT_GE(min, 0.0); +} + +TEST_F(RNGTests, Test_Uniform_13) { + double max = -std::numeric_limits::infinity(); + double min = std::numeric_limits::infinity(); + for (int e = 0; e < 100000000; e++) { + auto v = _rngA.relativeT(e); + if (v > max) + max = v; + + if (v < min) + min = v; + } + + nd4j_printf("Max value: %.8f; Min value: %.8f\n", (float) max, (float) min); + ASSERT_LT(max, 1.0); + ASSERT_GE(min, 0.0); +} + TEST_F(RNGTests, Test_Uniform_3) { auto x0 = NDArrayFactory::create('c', {1000000}); @@ -258,8 +333,8 @@ TEST_F(RNGTests, Test_Gaussian_1) { } TEST_F(RNGTests, Test_Gaussian_21) { - auto x0 = NDArrayFactory::create('c', {10, 10}); - auto x1 = NDArrayFactory::create('c', {10, 10}); + auto x0 = NDArrayFactory::create('c', {1000, 1000}); + auto x1 = NDArrayFactory::create('c', {1000, 1000}); RandomLauncher::fillGaussian(LaunchContext::defaultContext(), _rngA, &x0, 0.0f, 1.0f); RandomLauncher::fillGaussian(LaunchContext::defaultContext(), _rngB, &x1, 0.0f, 1.0f); @@ -983,6 +1058,26 @@ TEST_F(RNGTests, Test_UniformDistribution_04) { } +TEST_F(RNGTests, Test_UniformDistribution_05) { + auto x = NDArrayFactory::create('c', {2}, {10000, 10000}); + auto al = NDArrayFactory::create(0.f); + auto be = NDArrayFactory::create(1.f); + auto exp0 = NDArrayFactory::create('c', {10000, 10000}); + + + sd::ops::randomuniform op; + auto result = op.evaluate({&x, &al, &be}, {}, {},{}, {DataType::FLOAT32}); + ASSERT_EQ(Status::OK(), result.status()); + + auto z = result.at(0); + ASSERT_TRUE(exp0.isSameShape(z)); + ASSERT_FALSE(exp0.equalsTo(z)); + + sd::ops::reduce_max checkOp; + auto checkResult = checkOp.evaluate({z}); + checkResult[0]->printIndexedBuffer("Max on uniform with 0 to 1 on 100M cases is"); +} + namespace sd { namespace tests { static void fillList(Nd4jLong seed, int numberOfArrays, std::vector &shape, std::vector &list, sd::graph::RandomGenerator *rng) { diff --git a/libnd4j/tests_cpu/layers_tests/StringTests.cpp b/libnd4j/tests_cpu/layers_tests/StringTests.cpp index 272c410c7..41352246e 100644 --- a/libnd4j/tests_cpu/layers_tests/StringTests.cpp +++ b/libnd4j/tests_cpu/layers_tests/StringTests.cpp @@ -25,6 +25,8 @@ #include #include "testlayers.h" #include +#include +#include using namespace sd; @@ -863,3 +865,13 @@ TEST_F(StringTests, Basic_cast_UTF8toUTF32) { ASSERT_EQ(u8, z0); ASSERT_EQ(u32, z1); } + +TEST_F(StringTests, test_bit_string_1) { + // check bits -> vector conversion first + auto vec = BitwiseUtils::valueBits(1); + + // check bits -> string conversion next; + auto str = StringUtils::bitsToString(1); + ASSERT_EQ(32, str.length()); + ASSERT_EQ(std::string("00000000000000000000000000000001"), str); +} \ No newline at end of file diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java index c7789d7dc..ae9ff1e94 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java @@ -1154,6 +1154,8 @@ public interface NativeOps { long getRandomGeneratorRootState(OpaqueRandomGenerator ptr); long getRandomGeneratorNodeState(OpaqueRandomGenerator ptr); void setRandomGeneratorStates(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/); + float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); + double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); int getRandomGeneratorRelativeInt(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); long getRandomGeneratorRelativeLong(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); void deleteRandomGenerator(OpaqueRandomGenerator ptr); diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java index 563fe2e45..04f9c7499 100644 --- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java +++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/rng/NativeRandom.java @@ -107,14 +107,10 @@ public abstract class NativeRandom implements Random { } @Override - public float nextFloat() { - return (float) nextInt() / (float) Integer.MAX_VALUE; - } + public abstract float nextFloat(); @Override - public double nextDouble() { - return (double) nextInt() / (double) Integer.MAX_VALUE; - } + public abstract double nextDouble(); @Override public double nextGaussian() { diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java index edb5d291a..e5067c9c9 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/rng/CudaNativeRandom.java @@ -81,6 +81,16 @@ public class CudaNativeRandom extends NativeRandom { return seed; } + @Override + public float nextFloat() { + return nativeOps.getRandomGeneratorRelativeFloat((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); + } + + @Override + public double nextDouble() { + return nativeOps.getRandomGeneratorRelativeDouble((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); + } + @Override public int nextInt() { return nativeOps.getRandomGeneratorRelativeInt((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java index 59496d780..ad9503849 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java @@ -3098,6 +3098,8 @@ public native @Cast("Nd4jLong") long getRandomGeneratorRootState(OpaqueRandomGen public native @Cast("Nd4jLong") long getRandomGeneratorNodeState(OpaqueRandomGenerator ptr); public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/); public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr); +public native float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); +public native double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native int getRandomGeneratorRelativeInt(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native @Cast("Nd4jLong") long getRandomGeneratorRelativeLong(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native void deleteRandomGenerator(OpaqueRandomGenerator ptr); @@ -5048,6 +5050,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); // #include // #include // #include +// #include // #ifdef __CUDACC__ // #endif @@ -5064,6 +5067,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); return (RandomGenerator)super.position(position); } + public native @Cast("uint32_t") int xoroshiro32(@Cast("uint64_t") long index); + public native @Cast("uint64_t") long xoroshiro64(@Cast("uint64_t") long index); public RandomGenerator(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/) { super((Pointer)null); allocate(rootSeed, nodeSeed); } private native void allocate(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/); public RandomGenerator() { super((Pointer)null); allocate(); } @@ -5094,7 +5099,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); public native int relativeInt(@Cast("Nd4jLong") long index); public native @Cast("Nd4jLong") long relativeLong(@Cast("Nd4jLong") long index); - public native void rewindH(@Cast("Nd4jLong") long steps); + public native void rewindH(@Cast("uint64_t") long steps); /** * These methods set up only node states, with non-changed root ones @@ -5126,6 +5131,10 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); + + + + @@ -5141,6 +5150,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); @Namespace("sd::graph") public static native @Cast("uint64_t") long rotl(@Cast("const uint64_t") long x, int k); + @Namespace("sd::graph") public static native @Cast("uint32_t") int next(@Cast("uint32_t") int s0, @Cast("uint32_t") int s1, @Cast("uint32_t") int s2, @Cast("uint32_t") int s3); + diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java index 2a2cff200..96219e8ff 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/rng/CpuNativeRandom.java @@ -75,6 +75,16 @@ public class CpuNativeRandom extends NativeRandom { return nativeOps.getRandomGeneratorRelativeInt((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); } + @Override + public float nextFloat() { + return nativeOps.getRandomGeneratorRelativeFloat((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); + } + + @Override + public double nextDouble() { + return nativeOps.getRandomGeneratorRelativeDouble((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); + } + @Override public long nextLong() { return nativeOps.getRandomGeneratorRelativeLong((OpaqueRandomGenerator)statePointer, currentPosition.getAndIncrement()); diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java index b9e4adb5a..402b096c6 100644 --- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java +++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java @@ -3102,6 +3102,8 @@ public native @Cast("Nd4jLong") long getRandomGeneratorRootState(OpaqueRandomGen public native @Cast("Nd4jLong") long getRandomGeneratorNodeState(OpaqueRandomGenerator ptr); public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/); public native void setRandomGeneratorStates(OpaqueRandomGenerator ptr); +public native float getRandomGeneratorRelativeFloat(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); +public native double getRandomGeneratorRelativeDouble(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native int getRandomGeneratorRelativeInt(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native @Cast("Nd4jLong") long getRandomGeneratorRelativeLong(OpaqueRandomGenerator ptr, @Cast("Nd4jLong") long index); public native void deleteRandomGenerator(OpaqueRandomGenerator ptr); @@ -5052,6 +5054,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); // #include // #include // #include +// #include // #ifdef __CUDACC__ // #endif @@ -5068,6 +5071,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); return (RandomGenerator)super.position(position); } + public native @Cast("uint32_t") int xoroshiro32(@Cast("uint64_t") long index); + public native @Cast("uint64_t") long xoroshiro64(@Cast("uint64_t") long index); public RandomGenerator(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/) { super((Pointer)null); allocate(rootSeed, nodeSeed); } private native void allocate(@Cast("Nd4jLong") long rootSeed/*=0*/, @Cast("Nd4jLong") long nodeSeed/*=0*/); public RandomGenerator() { super((Pointer)null); allocate(); } @@ -5098,7 +5103,7 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); public native int relativeInt(@Cast("Nd4jLong") long index); public native @Cast("Nd4jLong") long relativeLong(@Cast("Nd4jLong") long index); - public native void rewindH(@Cast("Nd4jLong") long steps); + public native void rewindH(@Cast("uint64_t") long steps); /** * These methods set up only node states, with non-changed root ones @@ -5130,6 +5135,10 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); + + + + @@ -5145,6 +5154,8 @@ public native @Cast("bool") boolean isOptimalRequirementsMet(); @Namespace("sd::graph") public static native @Cast("uint64_t") long rotl(@Cast("const uint64_t") long x, int k); + @Namespace("sd::graph") public static native @Cast("uint32_t") int next(@Cast("uint32_t") int s0, @Cast("uint32_t") int s1, @Cast("uint32_t") int s2, @Cast("uint32_t") int s3); + diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java index d784fb390..4e885db96 100644 --- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java +++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/rng/RandomTests.java @@ -1514,6 +1514,28 @@ public class RandomTests extends BaseNd4jTest { assertEquals(res[0], res1[0]); } + + @Test + public void testRandom() { + val r1 = new java.util.Random(119); + val r2 = Nd4j.getRandom(); + r2.setSeed(119); + float jmax = 0.0f; + float nmax = 0.0f; + for (int e = 0; e < 100_000_000; e++) { + val f = r1.nextFloat(); + val n = r2.nextFloat(); + if (f > jmax) + jmax = f; + + if (n > nmax) + nmax = n; + } + + assertTrue(jmax < 1.0); + assertTrue(nmax < 1.0); + } + @Override public char ordering() { return 'c';